Coverage Report

Created: 2026-02-26 07:17

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile_s3.c
Line
Count
Source
1
/*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2025 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <strings.h>
33
#include <time.h>
34
35
#include <errno.h>
36
#include <pthread.h>
37
38
#include "hfile_internal.h"
39
#ifdef ENABLE_PLUGINS
40
#include "version.h"
41
#endif
42
#include "htslib/hts.h"  // for hts_version() and hts_verbose
43
#include "htslib/kstring.h"
44
#include "hts_time_funcs.h"
45
46
#include <curl/curl.h>
47
48
typedef struct s3_auth_data {
49
    kstring_t id;
50
    kstring_t token;
51
    kstring_t secret;
52
    kstring_t region;
53
    kstring_t canonical_query_string;
54
    kstring_t user_query_string;
55
    kstring_t host;
56
    kstring_t profile;
57
    enum {s3_auto, s3_virtual, s3_path} url_style;
58
    time_t creds_expiry_time;
59
    char *bucket;
60
    time_t auth_time;
61
    char date[40];
62
    char date_long[17];
63
    char date_short[9];
64
    kstring_t date_html;
65
    char mode;
66
    int is_v4;
67
} s3_auth_data;
68
69
typedef struct {
70
    hFILE base;
71
    CURL *curl;
72
    CURLcode ret;
73
    s3_auth_data *au;
74
    kstring_t buffer;
75
    kstring_t url;
76
    long verbose;
77
    int write;
78
    int part_size; // size for reading or writing
79
80
    kstring_t content_hash;
81
    kstring_t authorisation;
82
    kstring_t content;
83
    kstring_t date;
84
    kstring_t token;
85
    kstring_t range;
86
87
    // write variables
88
    kstring_t upload_id;
89
    kstring_t completion_message;
90
    int part_no;
91
    int aborted;
92
    size_t index;
93
    int expand;
94
95
    // read variables
96
    size_t last_read;               // last read position (remote)
97
    size_t last_read_buffer;        // last read (local buffer)
98
    int64_t file_size;              // size of the file being read
99
    int keep_going;
100
101
} hFILE_s3;
102
103
170
#define AUTH_LIFETIME 60  // Regenerate auth headers if older than this
104
0
#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials
105
106
#if defined HAVE_COMMONCRYPTO
107
108
#include <CommonCrypto/CommonHMAC.h>
109
110
#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
111
#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
112
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
113
114
static size_t
115
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
116
{
117
    CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
118
    return CC_SHA1_DIGEST_LENGTH;
119
}
120
121
122
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
123
    CC_SHA256(in, length, out);
124
}
125
126
127
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
128
    CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
129
    *md_len = CC_SHA256_DIGEST_LENGTH;
130
}
131
132
133
#elif defined HAVE_HMAC
134
135
#include <openssl/hmac.h>
136
#include <openssl/sha.h>
137
138
#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
139
9.04k
#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
140
170
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
141
142
static size_t
143
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
144
0
{
145
0
    unsigned int len;
146
0
    HMAC(EVP_sha1(), key->s, key->l,
147
0
         (unsigned char *) message->s, message->l, digest, &len);
148
0
    return len;
149
0
}
150
151
152
269
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
153
269
    SHA256(in, length, out);
154
269
}
155
156
157
495
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
158
495
    HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
159
495
}
160
161
#else
162
#error No HMAC() routine found by configure
163
#endif
164
165
static void
166
urldecode_kput(const char *s, int len, kstring_t *str)
167
269
{
168
269
    char buf[3];
169
269
    int i = 0;
170
171
507k
    while (i < len)
172
506k
        if (s[i] == '%' && i+2 < len) {
173
196
            buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
174
196
            kputc(strtol(buf, NULL, 16), str);
175
196
            i += 3;
176
196
        }
177
506k
        else kputc(s[i++], str);
178
269
}
179
180
181
static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
182
0
{
183
0
    static const char base64[] =
184
0
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
185
186
0
    size_t i = 0;
187
0
    unsigned x = 0;
188
0
    int bits = 0, pad = 0;
189
190
0
    while (bits || i < len) {
191
0
        if (bits < 6) {
192
0
            x <<= 8, bits += 8;
193
0
            if (i < len) x |= data[i++];
194
0
            else pad++;
195
0
        }
196
197
0
        bits -= 6;
198
0
        kputc(base64[(x >> bits) & 63], str);
199
0
    }
200
201
0
    str->l -= pad;
202
0
    kputsn("==", pad, str);
203
0
}
204
205
206
static int is_dns_compliant(const char *s0, const char *slim, int is_https)
207
170
{
208
170
    int has_nondigit = 0, len = 0;
209
170
    const char *s;
210
211
105k
    for (s = s0; s < slim; len++, s++)
212
105k
        if (islower_c(*s))
213
104k
            has_nondigit = 1;
214
194
        else if (*s == '-') {
215
18
            has_nondigit = 1;
216
18
            if (s == s0 || s+1 == slim) return 0;
217
18
        }
218
176
        else if (isdigit_c(*s))
219
24
            ;
220
152
        else if (*s == '.') {
221
0
            if (is_https) return 0;
222
0
            if (s == s0 || ! isalnum_c(s[-1])) return 0;
223
0
            if (s+1 == slim || ! isalnum_c(s[1])) return 0;
224
0
        }
225
152
        else return 0;
226
227
10
    return has_nondigit && len >= 3 && len <= 63;
228
170
}
229
230
231
static FILE *expand_tilde_open(const char *fname, const char *mode)
232
207
{
233
207
    FILE *fp;
234
235
207
    if (strncmp(fname, "~/", 2) == 0) {
236
207
        kstring_t full_fname = { 0, 0, NULL };
237
207
        const char *home = getenv("HOME");
238
207
        if (! home) return NULL;
239
240
207
        kputs(home, &full_fname);
241
207
        kputs(&fname[1], &full_fname);
242
243
207
        fp = fopen(full_fname.s, mode);
244
207
        free(full_fname.s);
245
207
    }
246
0
    else
247
0
        fp = fopen(fname, mode);
248
249
207
    return fp;
250
207
}
251
252
253
static void parse_ini(const char *fname, const char *section, ...)
254
138
{
255
138
    kstring_t line = { 0, 0, NULL };
256
138
    int active = 1;  // Start active, so global properties are accepted
257
138
    char *s;
258
259
138
    FILE *fp = expand_tilde_open(fname, "r");
260
138
    if (fp == NULL) return;
261
262
0
    while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
263
0
        if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
264
0
            *s = '\0';
265
0
            active = (strcmp(&line.s[1], section) == 0);
266
0
        }
267
0
        else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
268
0
            const char *key = line.s, *value = &s[1], *akey;
269
0
            va_list args;
270
271
0
            while (isspace_c(*key)) key++;
272
0
            while (s > key && isspace_c(s[-1])) s--;
273
0
            *s = '\0';
274
275
0
            while (isspace_c(*value)) value++;
276
0
            while (line.l > 0 && isspace_c(line.s[line.l-1]))
277
0
                line.s[--line.l] = '\0';
278
279
0
            va_start(args, section);
280
0
            while ((akey = va_arg(args, const char *)) != NULL) {
281
0
                kstring_t *avar = va_arg(args, kstring_t *);
282
0
                if (strcmp(key, akey) == 0) {
283
0
                    avar->l = 0;
284
0
                    kputs(value, avar);
285
0
                    break; }
286
0
            }
287
0
            va_end(args);
288
0
        }
289
290
0
    fclose(fp);
291
0
    free(line.s);
292
0
}
293
294
295
static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
296
69
{
297
69
    kstring_t text = { 0, 0, NULL };
298
69
    char *s;
299
69
    size_t len;
300
301
69
    FILE *fp = expand_tilde_open(fname, "r");
302
69
    if (fp == NULL) return;
303
304
0
    while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
305
0
        kputc(' ', &text);
306
0
    fclose(fp);
307
308
0
    s = text.s;
309
0
    while (isspace_c(*s)) s++;
310
0
    kputsn(s, len = strcspn(s, " \t"), id);
311
312
0
    s += len;
313
0
    while (isspace_c(*s)) s++;
314
0
    kputsn(s, strcspn(s, " \t"), secret);
315
316
0
    free(text.s);
317
0
}
318
319
320
170
static void free_auth_data(s3_auth_data *ad) {
321
170
    free(ad->profile.s);
322
170
    free(ad->id.s);
323
170
    free(ad->token.s);
324
170
    free(ad->secret.s);
325
170
    free(ad->region.s);
326
170
    free(ad->canonical_query_string.s);
327
170
    free(ad->user_query_string.s);
328
170
    free(ad->host.s);
329
170
    free(ad->bucket);
330
170
    free(ad->date_html.s);
331
170
    free(ad);
332
170
}
333
334
static time_t parse_rfc3339_date(kstring_t *datetime)
335
0
{
336
0
    int offset = 0;
337
0
    time_t when;
338
0
    int num;
339
0
    char should_be_t = '\0', timezone[10] = { '\0' };
340
0
    unsigned int year, mon, day, hour, min, sec;
341
342
0
    if (!datetime->s)
343
0
        return 0;
344
345
    // It should be possible to do this with strptime(), but it seems
346
    // to not get on with our feature definitions.
347
0
    num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s",
348
0
                 &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone);
349
0
    if (num < 8)
350
0
        return 0;
351
0
    if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ')
352
0
        return 0;
353
0
    struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 };
354
355
0
    switch (timezone[0]) {
356
0
      case 'Z':
357
0
      case 'z':
358
0
      case '\0':
359
0
          break;
360
0
      case '+':
361
0
      case '-': {
362
0
          unsigned hr_off, min_off;
363
0
          if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) {
364
0
              if (hr_off < 24 && min_off <= 60) {
365
0
                  offset = ((hr_off * 60 + min_off)
366
0
                            * (timezone[0] == '+' ? -60 : 60));
367
0
              }
368
0
          }
369
0
          break;
370
0
      }
371
0
      default:
372
0
          return 0;
373
0
    }
374
375
0
    when = hts_time_gm(&parsed);
376
0
    return when >= 0 ? when + offset : 0;
377
0
}
378
379
0
static void refresh_auth_data(s3_auth_data *ad) {
380
    // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of
381
    // setup_auth_data(), but this only reads the authorisation parts.
382
0
    const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
383
0
    kstring_t expiry_time = KS_INITIALIZE;
384
0
    parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
385
0
              "aws_access_key_id", &ad->id,
386
0
              "aws_secret_access_key", &ad->secret,
387
0
              "aws_session_token", &ad->token,
388
0
              "expiry_time", &expiry_time);
389
0
    if (expiry_time.l) {
390
0
        ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
391
0
    }
392
0
    ks_free(&expiry_time);
393
0
}
394
395
396
/* like a escape path but for query strings '=' and '&' are untouched */
397
0
static char *escape_query(const char *qs) {
398
0
    size_t i, j = 0, length, alloced;
399
0
    char *escaped;
400
401
0
    length = strlen(qs);
402
0
    alloced = length * 3 + 1;
403
0
    if ((escaped = malloc(alloced)) == NULL) {
404
0
        return NULL;
405
0
    }
406
407
0
    for (i = 0; i < length; i++) {
408
0
        int c = qs[i];
409
410
0
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
411
0
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
412
0
            escaped[j++] = c;
413
0
        } else {
414
0
            snprintf(escaped + j, alloced - j, "%%%02X", c);
415
0
            j += 3;
416
0
        }
417
0
    }
418
419
0
    escaped[j] = '\0';
420
421
0
    return escaped;
422
0
}
423
424
425
71
static char *escape_path(const char *path) {
426
71
    size_t i, j = 0, length, alloced;
427
71
    char *escaped;
428
429
71
    length = strlen(path);
430
71
    alloced = length * 3 + 1;
431
432
71
    if ((escaped = malloc(alloced)) == NULL) {
433
0
        return NULL;
434
0
    }
435
436
310k
    for (i = 0; i < length; i++) {
437
310k
        int c = path[i];
438
439
310k
        if (c == '?') break; // don't escape ? or beyond
440
441
310k
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
442
305k
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
443
6.53k
            escaped[j++] = c;
444
303k
        } else {
445
303k
            snprintf(escaped + j, alloced - j, "%%%02X", c);
446
303k
            j += 3;
447
303k
        }
448
310k
    }
449
450
71
    if (i != length) {
451
        // in the case of a '?' copy the rest of the path across unchanged
452
48
        strcpy(escaped + j, path + i);
453
48
    } else {
454
23
        escaped[j] = '\0';
455
23
    }
456
457
71
    return escaped;
458
71
}
459
460
461
170
static int is_escaped(const char *str) {
462
170
    const char *c = str;
463
170
    int escaped = 0;
464
170
    int needs_escape = 0;
465
466
2.28M
    while (*c != '\0') {
467
2.28M
        if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
468
16.0k
            if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
469
8.32k
                escaped = 1;
470
8.32k
                c += 3;
471
8.32k
                continue;
472
8.32k
            } else {
473
                // only escaped if all % signs are escaped
474
7.71k
                escaped = 0;
475
7.71k
            }
476
16.0k
        }
477
2.27M
        if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
478
1.96M
              || (*c >= 'a' && *c <= 'z') ||
479
1.95M
              *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
480
1.93M
            needs_escape = 1;
481
1.93M
        }
482
2.27M
        c++;
483
2.27M
    }
484
485
170
    return escaped || !needs_escape;
486
170
}
487
488
489
0
static int redirect_endpoint(hFILE_s3 *fp, kstring_t *header) {
490
0
    s3_auth_data *ad = fp->au;
491
0
    kstring_t *url = &fp->url;
492
0
    char *new_region;
493
0
    char *end;
494
0
    int ret = -1;
495
496
    // get the new region from the reply header
497
0
    if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
498
499
0
        new_region += strlen("x-amz-bucket-region: ");
500
0
        end = new_region;
501
502
0
        while (isalnum_c(*end) || ispunct_c(*end)) end++;
503
504
0
        *end = 0;
505
506
0
        if (strstr(ad->host.s, "amazonaws.com")) {
507
0
            ad->region.l = 0;
508
0
            kputs(new_region, &ad->region);
509
510
0
            ad->host.l = 0;
511
512
0
            if (ad->url_style == s3_path) {
513
                // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name}
514
0
                ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
515
0
            } else {
516
                // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name}
517
                // Extract the {bucket-name} from {ad->host} to include in subdomain
518
0
                kstring_t url_prefix = KS_INITIALIZE;
519
0
                kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix);
520
521
0
                ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region);
522
0
                free(url_prefix.s);
523
0
            }
524
0
            if (ad->region.l && ad->host.l) {
525
0
               int e = 0;
526
0
               url->l = 0;
527
0
               e |= kputs("https://", url) < 0;
528
0
               e |= kputs(ad->host.s, url) < 0;
529
0
               e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0;
530
531
0
               if (!e)
532
0
                   ret = 0;
533
0
            }
534
0
            if (ad->user_query_string.l) {
535
0
                kputc('?', url);
536
0
                kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
537
0
            }
538
0
        }
539
0
    }
540
541
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: redirect_endpoint: return %d\n", ret);
542
543
0
    return ret;
544
0
}
545
546
static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
547
                                      int sigver, kstring_t *url)
548
170
{
549
170
    s3_auth_data *ad = calloc(1, sizeof(*ad));
550
170
    const char *bucket, *path;
551
170
    char *escaped = NULL;
552
170
    size_t url_path_pos;
553
170
    ptrdiff_t bucket_len;
554
170
    int is_https = 1, dns_compliant;
555
170
    char *query_start;
556
557
170
    if (!ad)
558
0
        return NULL;
559
170
    ad->mode = strchr(mode, 'r') ? 'r' : 'w';
560
170
    ad->url_style = s3_auto;
561
562
    // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
563
564
170
    if (s3url[2] == '+') {
565
0
        bucket = strchr(s3url, ':') + 1;
566
0
        if (bucket == NULL) {
567
0
            free(ad);
568
0
            return NULL;
569
0
        }
570
0
        kputsn(&s3url[3], bucket - &s3url[3], url);
571
0
        is_https = strncmp(url->s, "https:", 6) == 0;
572
0
    }
573
170
    else {
574
170
        kputs("https:", url);
575
170
        bucket = &s3url[3];
576
170
    }
577
454
    while (*bucket == '/') kputc(*bucket++, url);
578
579
170
    path = bucket + strcspn(bucket, "/?#@");
580
581
170
    if (*path == '@') {
582
103
        const char *colon = strpbrk(bucket, ":@");
583
103
        if (*colon != ':') {
584
2
            urldecode_kput(bucket, colon - bucket, &ad->profile);
585
2
        }
586
101
        else {
587
101
            const char *colon2 = strpbrk(&colon[1], ":@");
588
101
            urldecode_kput(bucket, colon - bucket, &ad->id);
589
101
            urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
590
101
            if (*colon2 == ':')
591
65
                urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
592
101
        }
593
594
103
        bucket = &path[1];
595
103
        path = bucket + strcspn(bucket, "/?#");
596
103
    }
597
67
    else {
598
        // If the URL has no ID[:SECRET]@, consider environment variables.
599
67
        const char *v;
600
67
        if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
601
67
        if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
602
67
        if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
603
67
        if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
604
67
        if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
605
606
67
        if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile);
607
67
        else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile);
608
67
        else kputs("default", &ad->profile);
609
610
67
        if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) {
611
0
            if (strcasecmp(v, "virtual") == 0) {
612
0
                ad->url_style = s3_virtual;
613
0
            } else if (strcasecmp(v, "path") == 0) {
614
0
                ad->url_style = s3_path;
615
0
            }
616
0
        }
617
67
    }
618
619
170
    if (ad->id.l == 0) {
620
69
        kstring_t url_style = KS_INITIALIZE;
621
69
        kstring_t expiry_time = KS_INITIALIZE;
622
69
        const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
623
69
        parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
624
69
                  "aws_access_key_id", &ad->id,
625
69
                  "aws_secret_access_key", &ad->secret,
626
69
                  "aws_session_token", &ad->token,
627
69
                  "region", &ad->region,
628
69
                  "addressing_style", &url_style,
629
69
                  "expiry_time", &expiry_time,
630
69
                  NULL);
631
632
69
        if (url_style.l) {
633
0
            if (strcmp(url_style.s, "virtual") == 0) {
634
0
                ad->url_style = s3_virtual;
635
0
            } else if (strcmp(url_style.s, "path") == 0) {
636
0
                ad->url_style = s3_path;
637
0
            } else {
638
0
                ad->url_style = s3_auto;
639
0
            }
640
0
        }
641
69
        if (expiry_time.l) {
642
            // Not a real part of the AWS configuration file, but it allows
643
            // support for short-term credentials like those for the IAM
644
            // service.  The botocore library uses the key "expiry_time"
645
            // internally for this purpose.
646
            // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py
647
0
            ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
648
0
        }
649
650
69
        ks_free(&url_style);
651
69
        ks_free(&expiry_time);
652
69
    }
653
654
170
    if (ad->id.l == 0) {
655
69
        kstring_t url_style = KS_INITIALIZE;
656
69
        const char *v = getenv("HTS_S3_S3CFG");
657
69
        parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id,
658
69
                  "secret_key", &ad->secret, "access_token", &ad->token,
659
69
                  "host_base", &ad->host,
660
69
                  "bucket_location", &ad->region,
661
69
                  "host_bucket", &url_style,
662
69
                  NULL);
663
664
69
        if (url_style.l) {
665
            // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string
666
            // indicates use of path style adressing.
667
0
            if (strstr(url_style.s, "%(bucket)s") == NULL) {
668
0
                ad->url_style = s3_path;
669
0
            } else {
670
0
                ad->url_style = s3_auto;
671
0
            }
672
0
        }
673
674
69
        ks_free(&url_style);
675
69
    }
676
677
170
    if (ad->id.l == 0)
678
69
        parse_simple("~/.awssecret", &ad->id, &ad->secret);
679
680
681
    // if address_style is set, force the dns_compliant setting
682
170
    if (ad->url_style == s3_virtual) {
683
0
        dns_compliant = 1;
684
170
    } else if (ad->url_style == s3_path) {
685
0
        dns_compliant = 0;
686
170
    } else {
687
170
        dns_compliant = is_dns_compliant(bucket, path, is_https);
688
170
    }
689
690
170
    if (ad->host.l == 0)
691
170
        kputs("s3.amazonaws.com", &ad->host);
692
693
170
    if (!dns_compliant && ad->region.l > 0
694
0
        && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
695
        // Can avoid a redirection by including the region in the host name
696
        // (assuming the right one has been specified)
697
0
        ad->host.l = 0;
698
0
        ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
699
0
    }
700
701
170
    if (ad->region.l == 0)
702
170
        kputs("us-east-1", &ad->region);
703
704
170
    if (!is_escaped(path)) {
705
71
        escaped = escape_path(path);
706
71
        if (escaped == NULL) {
707
0
            goto error;
708
0
        }
709
71
    }
710
711
170
    bucket_len = path - bucket;
712
713
    // Use virtual hosted-style access if possible, otherwise path-style.
714
170
    if (dns_compliant) {
715
0
        size_t url_host_pos = url->l;
716
        // Append "bucket.host" to url
717
0
        kputsn_(bucket, bucket_len, url);
718
0
        kputc('.', url);
719
0
        kputsn(ad->host.s, ad->host.l, url);
720
0
        url_path_pos = url->l;
721
722
0
        if (sigver == 4) {
723
            // Copy back to ad->host to use when making the signature
724
0
            ad->host.l = 0;
725
0
            kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
726
0
        }
727
0
    }
728
170
    else {
729
        // Append "host/bucket" to url
730
170
        kputsn(ad->host.s, ad->host.l, url);
731
170
        url_path_pos = url->l;
732
170
        kputc('/', url);
733
170
        kputsn(bucket, bucket_len, url);
734
170
    }
735
736
170
    kputs(escaped == NULL ? path : escaped, url);
737
738
170
    if (sigver == 4 || !dns_compliant) {
739
170
        ad->bucket = malloc(url->l - url_path_pos + 1);
740
170
        if (ad->bucket == NULL) {
741
0
            goto error;
742
0
        }
743
170
        memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
744
170
        ad->is_v4 = 1;
745
170
    }
746
0
    else {
747
0
        ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
748
0
        if (ad->bucket == NULL) {
749
0
            goto error;
750
0
        }
751
0
        ad->bucket[0] = '/';
752
0
        memcpy(ad->bucket + 1, bucket, bucket_len);
753
0
        memcpy(ad->bucket + bucket_len + 1,
754
0
               url->s + url_path_pos, url->l - url_path_pos + 1);
755
0
        ad->is_v4 = 0;
756
0
    }
757
758
    // write any query strings to its own place to use later
759
170
    if ((query_start = strchr(ad->bucket, '?'))) {
760
53
        kputs(query_start + 1, &ad->user_query_string);
761
53
        *query_start = 0;
762
53
    }
763
764
170
    free(escaped);
765
766
170
    return ad;
767
768
0
 error:
769
0
    free(escaped);
770
0
    free_auth_data(ad);
771
0
    return NULL;
772
170
}
773
774
775
0
static int v2_authorisation(hFILE_s3 *fp, char *request) {
776
0
    s3_auth_data *ad = fp->au;
777
0
    time_t now = time(NULL);
778
779
0
#ifdef HAVE_GMTIME_R
780
0
    struct tm tm_buffer;
781
0
    struct tm *tm = gmtime_r(&now, &tm_buffer);
782
#else
783
    struct tm *tm = gmtime(&now);
784
#endif
785
786
0
    kstring_t message = KS_INITIALIZE;
787
0
    unsigned char digest[DIGEST_BUFSIZ];
788
0
    size_t digest_len;
789
790
0
    if (ad->creds_expiry_time > 0
791
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
792
0
        refresh_auth_data(ad);
793
0
    }
794
795
    // date format between v2 and v4 is different.
796
797
0
    strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
798
799
0
    kputs(ad->date, &fp->date);
800
801
0
    if (!ad->id.l || !ad->secret.l) {
802
0
        ad->auth_time = now;
803
0
        return 0;
804
0
    }
805
806
0
    if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
807
0
                 request, ad->date + 6,
808
0
                 ad->token.l ? "x-amz-security-token:" : "",
809
0
                 ad->token.l ? ad->token.s : "",
810
0
                 ad->token.l ? "\n" : "",
811
0
                 ad->bucket) < 0) {
812
0
        return -1;
813
0
    }
814
815
0
    digest_len = s3_sign(digest, &ad->secret, &message);
816
817
0
    if (ksprintf(&fp->authorisation, "Authorization: AWS %s:", ad->id.s) < 0)
818
0
        goto fail;
819
820
0
    base64_kput(digest, digest_len, &fp->authorisation);
821
822
0
    free(message.s);
823
0
    ad->auth_time = now;
824
0
    return 0;
825
826
0
 fail:
827
0
    free(message.s);
828
0
    return -1;
829
0
}
830
831
/***************************************************************
832
833
AWS S3 sig version 4 writing code
834
835
****************************************************************/
836
837
269
static void hash_string(char *in, size_t length, char *out, size_t out_len) {
838
269
    unsigned char hashed[SHA256_DIGEST_BUFSIZE];
839
269
    int i, j;
840
841
269
    s3_sha256((const unsigned char *)in, length, hashed);
842
843
8.87k
    for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
844
8.60k
        snprintf(out + j, out_len - j, "%02x", hashed[i]);
845
8.60k
    }
846
269
}
847
848
849
99
static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) {
850
99
    unsigned char date_key[SHA256_DIGEST_BUFSIZE];
851
99
    unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
852
99
    unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
853
99
    unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
854
99
    unsigned char signature[SHA256_DIGEST_BUFSIZE];
855
856
99
    const unsigned char service[] = "s3";
857
99
    const unsigned char request[] = "aws4_request";
858
859
99
    kstring_t secret_access_key = KS_INITIALIZE;
860
99
    unsigned int len;
861
99
    unsigned int i, j;
862
863
99
    ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
864
865
99
    if (secret_access_key.l == 0) {
866
0
        return -1;
867
0
    }
868
869
99
    s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
870
99
    s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
871
99
    s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
872
99
    s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
873
99
    s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
874
875
3.26k
    for (i = 0, j = 0; i < len; i++, j+= 2) {
876
3.16k
        snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]);
877
3.16k
    }
878
879
99
    ks_free(&secret_access_key);
880
881
99
    return 0;
882
99
}
883
884
885
170
static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
886
170
    kstring_t signed_headers = KS_INITIALIZE;
887
170
    kstring_t canonical_headers = KS_INITIALIZE;
888
170
    kstring_t canonical_request = KS_INITIALIZE;
889
170
    kstring_t scope = KS_INITIALIZE;
890
170
    kstring_t string_to_sign = KS_INITIALIZE;
891
170
    char cr_hash[HASH_LENGTH_SHA256];
892
170
    char signature_string[HASH_LENGTH_SHA256];
893
170
    int ret = -1;
894
895
170
    if (!ad->id.l || !ad->secret.l) {
896
71
        return 0;
897
71
    }
898
899
99
    if (!ad->token.l) {
900
36
        kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
901
63
    } else {
902
63
        kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
903
63
    }
904
905
99
    if (signed_headers.l == 0) {
906
0
        return -1;
907
0
    }
908
909
910
99
    if (!ad->token.l) {
911
36
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
912
36
        ad->host.s, content, ad->date_long);
913
63
    } else {
914
63
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
915
63
        ad->host.s, content, ad->date_long, ad->token.s);
916
63
    }
917
918
99
    if (canonical_headers.l == 0) {
919
0
        goto cleanup;
920
0
    }
921
922
    // bucket == canonical_uri
923
99
    ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
924
99
        http_request, ad->bucket, ad->canonical_query_string.s,
925
99
        canonical_headers.s, signed_headers.s, content);
926
927
99
    if (canonical_request.l == 0) {
928
0
        goto cleanup;
929
0
    }
930
931
99
    hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash));
932
933
99
    ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
934
935
99
    if (scope.l == 0) {
936
0
        goto cleanup;
937
0
    }
938
939
99
    ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
940
941
99
    if (string_to_sign.l == 0) {
942
0
        goto cleanup;
943
0
    }
944
945
99
    if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) {
946
0
        goto cleanup;
947
0
    }
948
949
99
    ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
950
99
                ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
951
952
99
    if (auth->l == 0) {
953
0
        goto cleanup;
954
0
    }
955
956
99
    ret = 0;
957
958
99
 cleanup:
959
99
    ks_free(&signed_headers);
960
99
    ks_free(&canonical_headers);
961
99
    ks_free(&canonical_request);
962
99
    ks_free(&scope);
963
99
    ks_free(&string_to_sign);
964
965
99
    return ret;
966
99
}
967
968
969
170
static int update_time(s3_auth_data *ad, time_t now) {
970
170
    int ret = -1;
971
170
#ifdef HAVE_GMTIME_R
972
170
    struct tm tm_buffer;
973
170
    struct tm *tm = gmtime_r(&now, &tm_buffer);
974
#else
975
    struct tm *tm = gmtime(&now);
976
#endif
977
978
170
    if (now - ad->auth_time > AUTH_LIFETIME) {
979
        // update timestamp
980
170
        ad->auth_time = now;
981
982
170
        if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
983
0
            return -1;
984
0
        }
985
986
170
        if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
987
0
            return -1;;
988
0
        }
989
990
170
        ad->date_html.l = 0;
991
170
        ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
992
170
    }
993
994
170
    if (ad->date_html.l) ret = 0;
995
996
170
    return ret;
997
170
}
998
999
1000
0
static int query_cmp(const void *p1, const void *p2) {
1001
0
    char **q1 = (char **)p1;
1002
0
    char **q2 = (char **)p2;
1003
1004
0
    return strcmp(*q1, *q2);
1005
0
}
1006
1007
1008
/* Query strings must be in alphabetical order for authorisation */
1009
1010
0
static int order_query_string(kstring_t *qs) {
1011
0
    int *query_offset = NULL;
1012
0
    int num_queries, i;
1013
0
    char **queries = NULL;
1014
0
    kstring_t ordered = KS_INITIALIZE;
1015
0
    char *escaped = NULL;
1016
0
    int ret = -1;
1017
1018
0
    if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
1019
0
        return -1;
1020
0
    }
1021
1022
0
    if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
1023
0
        goto err;
1024
1025
0
    for (i = 0; i < num_queries; i++) {
1026
0
        queries[i] = qs->s + query_offset[i];
1027
0
    }
1028
1029
0
    qsort(queries, num_queries, sizeof(char *), query_cmp);
1030
1031
0
    for (i = 0; i < num_queries; i++) {
1032
0
        if (i) {
1033
0
            kputs("&", &ordered);
1034
0
        }
1035
1036
0
        kputs(queries[i], &ordered);
1037
0
    }
1038
1039
0
    if ((escaped = escape_query(ordered.s)) == NULL)
1040
0
        goto err;
1041
1042
0
    qs->l = 0;
1043
0
    kputs(escaped, qs);
1044
1045
0
    ret = 0;
1046
0
 err:
1047
0
    free(ordered.s);
1048
0
    free(queries);
1049
0
    free(query_offset);
1050
0
    free(escaped);
1051
1052
0
    return ret;
1053
0
}
1054
1055
1056
170
static int v4_authorisation(hFILE_s3 *fp, char *request, kstring_t *content, char *cqs, int uqs) {
1057
170
    s3_auth_data *ad = fp->au;
1058
170
    char content_hash[HASH_LENGTH_SHA256];
1059
170
    time_t now;
1060
1061
170
    now = time(NULL);
1062
1063
170
    if (update_time(ad, now)) {
1064
0
        return -1;
1065
0
    }
1066
1067
170
    if (ad->creds_expiry_time > 0
1068
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1069
0
        refresh_auth_data(ad);
1070
0
    }
1071
1072
170
    if (content) {
1073
0
        hash_string(content->s, content->l, content_hash, sizeof(content_hash));
1074
170
    } else {
1075
        // empty hash
1076
170
        hash_string("", 0, content_hash, sizeof(content_hash));
1077
170
    }
1078
1079
170
    ad->canonical_query_string.l = 0;
1080
1081
170
    if (cqs) {
1082
170
        kputs(cqs, &ad->canonical_query_string);
1083
1084
        /* add a user provided query string, normally only useful on upload initiation */
1085
170
        if (uqs) {
1086
0
            kputs("&", &ad->canonical_query_string);
1087
0
            kputs(ad->user_query_string.s, &ad->canonical_query_string);
1088
1089
0
            if (order_query_string(&ad->canonical_query_string)) {
1090
0
                return -1;
1091
0
            }
1092
0
        }
1093
170
    }
1094
1095
170
    if (make_authorisation(ad, request, content_hash, &fp->authorisation)) {
1096
0
        return -1;
1097
0
    }
1098
1099
170
    kputs(ad->date_html.s, &fp->date);
1100
170
    kputsn(content_hash, HASH_LENGTH_SHA256, &fp->content_hash);
1101
1102
170
    if (fp->date.l == 0 || fp->content_hash.l == 0) {
1103
0
        return -1;
1104
0
    }
1105
1106
170
    if (ad->token.l) {
1107
65
        ksprintf(&fp->token, "x-amz-security-token: %s", ad->token.s);
1108
65
    }
1109
1110
170
    return 0;
1111
170
}
1112
1113
0
static int set_region(s3_auth_data *ad, kstring_t *region) {
1114
0
    ad->region.l = 0;
1115
0
    return kputsn(region->s, region->l, &ad->region) < 0;
1116
0
}
1117
1118
//
1119
// Writing and reading handling
1120
//
1121
1122
// Some common code
1123
1124
0
#define S3_MOVED_PERMANENTLY 301
1125
0
#define S3_TEMPORARY_REDIRECT 307
1126
0
#define S3_BAD_REQUEST 400
1127
1128
static struct {
1129
    kstring_t useragent;
1130
    CURLSH *share;
1131
    pthread_mutex_t share_lock;
1132
} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER };
1133
1134
static void share_lock(CURL *handle, curl_lock_data data,
1135
1
                       curl_lock_access access, void *userptr) {
1136
1
    pthread_mutex_lock(&curl.share_lock);
1137
1
}
1138
1139
1
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
1140
1
    pthread_mutex_unlock(&curl.share_lock);
1141
1
}
1142
1143
1144
170
static void initialise_authorisation_values(hFILE_s3 *fp) {
1145
170
    ks_initialize(&fp->content_hash);
1146
170
    ks_initialize(&fp->authorisation);
1147
170
    ks_initialize(&fp->content);
1148
170
    ks_initialize(&fp->date);
1149
170
    ks_initialize(&fp->token);
1150
170
    ks_initialize(&fp->range);
1151
170
}
1152
1153
1154
170
static void clear_authorisation_values(hFILE_s3 *fp) {
1155
170
    ks_clear(&fp->content_hash);
1156
170
    ks_clear(&fp->authorisation);
1157
170
    ks_clear(&fp->content);
1158
170
    ks_clear(&fp->date);
1159
170
    ks_clear(&fp->token);
1160
170
    ks_clear(&fp->range);
1161
170
}
1162
1163
1164
340
static void free_authorisation_values(hFILE_s3 *fp) {
1165
340
    ks_free(&fp->content_hash);
1166
340
    ks_free(&fp->authorisation);
1167
340
    ks_free(&fp->content);
1168
340
    ks_free(&fp->date);
1169
340
    ks_free(&fp->token);
1170
340
    ks_free(&fp->range);
1171
340
}
1172
1173
/* As the response text is case insensitive we need a version of strstr that
1174
   is also case insensitive.  The response is small so no need to get too
1175
   complicated on the string search.
1176
*/
1177
0
static char *stristr(char *haystack, char *needle) {
1178
1179
0
    while (*haystack) {
1180
0
        char *h = haystack;
1181
0
        char *n = needle;
1182
1183
0
        while (toupper_c(*h) == toupper_c(*n)) {
1184
0
            h++, n++;
1185
0
            if (!*h || !*n) break;
1186
0
        }
1187
1188
0
        if (!*n) break;
1189
1190
0
        haystack++;
1191
0
    }
1192
1193
0
    if (!*haystack) return NULL;
1194
1195
0
    return haystack;
1196
0
}
1197
1198
1199
0
static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) {
1200
0
    char *start;
1201
0
    char *end;
1202
1203
0
    if (!in) {
1204
0
        return EOF;
1205
0
    }
1206
1207
0
    start = stristr(in, start_tag);
1208
0
    if (!start) return EOF;
1209
1210
0
    start += strlen(start_tag);
1211
0
    end = stristr(start, end_tag);
1212
1213
0
    if (!end) return EOF;
1214
1215
0
    return kputsn(start, end - start, out);
1216
0
}
1217
1218
1219
0
static int report_s3_error(kstring_t *body, long resp_code) {
1220
0
    kstring_t entry = KS_INITIALIZE;
1221
1222
0
    if (get_entry(body->s, "<Code>", "</Code>", &entry) == EOF) {
1223
0
        return -1;
1224
0
    }
1225
1226
0
    fprintf(stderr, "hfile_s3: S3 error %ld: %s\n", resp_code, entry.s);
1227
1228
0
    ks_clear(&entry);
1229
1230
0
    if (get_entry(body->s, "<Message>", "</Message>", &entry) == EOF) {
1231
0
        return -1;
1232
0
    }
1233
1234
0
    if (entry.l)
1235
0
        fprintf(stderr, "%s\n", entry.s);
1236
1237
0
    ks_free(&entry);
1238
1239
0
    return 0;
1240
0
}
1241
1242
1243
static int http_status_errno(int status)
1244
0
{
1245
0
    if (status >= 500)
1246
0
        switch (status) {
1247
0
        case 501: return ENOSYS;
1248
0
        case 503: return EBUSY;
1249
0
        case 504: return ETIMEDOUT;
1250
0
        default:  return EIO;
1251
0
        }
1252
0
    else if (status >= 400)
1253
0
        switch (status) {
1254
0
        case 401: return EPERM;
1255
0
        case 403: return EACCES;
1256
0
        case 404: return ENOENT;
1257
0
        case 405: return EROFS;
1258
0
        case 407: return EPERM;
1259
0
        case 408: return ETIMEDOUT;
1260
0
        case 410: return ENOENT;
1261
0
        default:  return EINVAL;
1262
0
        }
1263
0
    else if (status >= 300)
1264
0
        return EIO;
1265
0
    else return 0;
1266
0
}
1267
1268
1269
170
static void initialise_local(hFILE_s3 *fp) {
1270
170
    ks_initialize(&fp->buffer);
1271
170
    ks_initialize(&fp->url);
1272
170
    ks_initialize(&fp->upload_id);           // write only
1273
170
    ks_initialize(&fp->completion_message);  // write only
1274
170
}
1275
1276
1277
170
static void cleanup_local(hFILE_s3 *fp) {
1278
170
    ks_free(&fp->buffer);
1279
170
    ks_free(&fp->url);
1280
170
    ks_free(&fp->upload_id);
1281
170
    ks_free(&fp->completion_message);
1282
170
    curl_easy_cleanup(fp->curl);
1283
170
    free_authorisation_values(fp);
1284
170
}
1285
1286
1287
0
static void cleanup(hFILE_s3 *fp) {
1288
    // free up authorisation data
1289
0
    free_auth_data(fp->au);
1290
0
    cleanup_local(fp);
1291
0
}
1292
1293
0
static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) {
1294
0
    size_t realsize = size * nmemb;
1295
0
    kstring_t *resp = (kstring_t *)userp;
1296
1297
0
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
1298
0
        return 0;
1299
0
    }
1300
1301
0
    return realsize;
1302
0
}
1303
1304
1305
1.01k
static int add_header(struct curl_slist **head, char *value) {
1306
1.01k
    int err = 0;
1307
1.01k
    struct curl_slist *tmp;
1308
1309
1.01k
    if ((tmp = curl_slist_append(*head, value)) == NULL) {
1310
0
        err = 1;
1311
1.01k
    } else {
1312
1.01k
        *head = tmp;
1313
1.01k
    }
1314
1315
1.01k
    return err;
1316
1.01k
}
1317
1318
1319
static struct curl_slist *set_html_headers(hFILE_s3 *fp, kstring_t *auth, kstring_t *date,
1320
170
                 kstring_t *content, kstring_t *token, kstring_t *range) {
1321
170
    struct curl_slist *headers = NULL;
1322
170
    int err = 0;
1323
1324
    /* The next two lines have the effect of preventing curl from
1325
       adding these headers.  If they exist it can lead to conflicts
1326
       in the signature calculations (not present in all S3 systems).
1327
    */
1328
170
    err = add_header(&headers, "Content-Type:");
1329
170
    err |= add_header(&headers, "Expect:");
1330
1331
170
    if (err) goto error;
1332
1333
170
    if (auth->l)
1334
99
        if ((err = add_header(&headers, auth->s)))
1335
0
            goto error;
1336
1337
170
    if ((err = add_header(&headers, date->s)))
1338
0
        goto error;
1339
1340
170
    if (content->l)
1341
170
        if ((err = add_header(&headers, content->s)))
1342
0
            goto error;
1343
1344
170
    if (range)
1345
170
        if ((err = add_header(&headers, range->s)))
1346
0
            goto error;
1347
1348
170
    if (token->l)
1349
65
        if ((err = add_header(&headers, token->s)))
1350
0
            goto error;
1351
1352
170
    curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers);
1353
1354
170
error:
1355
1356
170
    if (err) {
1357
0
        curl_slist_free_all(headers);
1358
0
        headers = NULL;
1359
0
    }
1360
1361
170
    return headers;
1362
170
}
1363
1364
1365
/*
1366
1367
S3 Multipart Upload
1368
-------------------
1369
1370
There are several steps in the Mulitipart upload.
1371
1372
1373
1) Initiate Upload
1374
------------------
1375
1376
Initiate the upload and get an upload ID.  This ID is used in all other steps.
1377
1378
1379
2) Upload Part
1380
--------------
1381
1382
Upload a part of the data.  5Mb minimum part size (except for the last part).
1383
Each part is numbered and a successful upload returns an Etag header value that
1384
needs to used for the completion step.
1385
1386
Step repeated till all data is uploaded.
1387
1388
1389
3) Completion
1390
-------------
1391
1392
Complete the upload by sending all the part numbers along with their associated
1393
Etag values.
1394
1395
1396
Optional - Abort
1397
----------------
1398
1399
If something goes wrong this instructs the server to delete all the partial
1400
uploads and abandon the upload process.
1401
*/
1402
1403
/*
1404
   This is the writing code.
1405
*/
1406
1407
0
#define MINIMUM_S3_WRITE_SIZE 5242880
1408
1409
// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size.
1410
// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0)
1411
0
#define EXPAND_ON 1112
1412
1413
1414
1415
/*
1416
    The partially uploaded file will hang around unless the delete command is sent.
1417
*/
1418
0
static int abort_upload(hFILE_s3 *fp) {
1419
0
    kstring_t url = KS_INITIALIZE;
1420
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1421
0
    int ret = -1, save_errno;
1422
0
    struct curl_slist *headers = NULL;
1423
0
    char http_request[] = "DELETE";
1424
0
    CURLcode err;
1425
1426
0
    save_errno = errno; // keep the errno that caused the need to abort
1427
1428
0
    clear_authorisation_values(fp);
1429
1430
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
1431
0
        goto out;
1432
0
    }
1433
1434
0
    if (v4_authorisation(fp,  http_request, NULL, canonical_query_string.s, 0) != 0) {
1435
0
        goto out;
1436
0
    }
1437
1438
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1439
0
        goto out;
1440
0
    }
1441
1442
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1443
0
        goto out;
1444
0
    }
1445
1446
0
    curl_easy_reset(fp->curl);
1447
1448
0
    err = curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request);
1449
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1450
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1451
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1452
1453
0
    if (err != CURLE_OK)
1454
0
        goto out;
1455
1456
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1457
1458
0
    if (!headers)
1459
0
        goto out;
1460
1461
0
    fp->ret = curl_easy_perform(fp->curl);
1462
1463
0
    if (fp->ret == CURLE_OK) {
1464
0
        ret = 0;
1465
0
    }
1466
1467
0
 out:
1468
0
    ks_free(&url);
1469
0
    ks_free(&canonical_query_string);
1470
0
    curl_slist_free_all(headers);
1471
1472
0
    fp->aborted = 1;
1473
0
    cleanup(fp);
1474
1475
0
    errno = save_errno;
1476
0
    return ret;
1477
0
}
1478
1479
1480
0
static int complete_upload(hFILE_s3 *fp, kstring_t *resp) {
1481
0
    kstring_t url = KS_INITIALIZE;
1482
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1483
0
    int ret = -1;
1484
0
    struct curl_slist *headers = NULL;
1485
0
    char http_request[] = "POST";
1486
0
    CURLcode err;
1487
1488
0
    clear_authorisation_values(fp);
1489
1490
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
1491
0
        return -1;
1492
0
    }
1493
1494
    // finish off the completion reply
1495
0
    if (kputs("</CompleteMultipartUpload>\n", &fp->completion_message) < 0) {
1496
0
        goto out;
1497
0
    }
1498
1499
0
    if (v4_authorisation(fp,  http_request, &fp->completion_message, canonical_query_string.s, 0) != 0) {
1500
0
        goto out;
1501
0
    }
1502
1503
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1504
0
        goto out;
1505
0
    }
1506
1507
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1508
0
        goto out;
1509
0
    }
1510
1511
0
    curl_easy_reset(fp->curl);
1512
1513
0
    err = curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
1514
1515
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s);
1516
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l);
1517
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
1518
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
1519
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1520
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1521
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1522
1523
0
    if (err != CURLE_OK)
1524
0
        goto out;
1525
1526
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1527
1528
0
    if (!headers)
1529
0
        goto out;
1530
1531
0
    fp->ret = curl_easy_perform(fp->curl);
1532
1533
0
    if (fp->ret == CURLE_OK) {
1534
0
        ret = 0;
1535
0
    }
1536
1537
0
 out:
1538
0
    ks_free(&url);
1539
0
    ks_free(&canonical_query_string);
1540
0
    curl_slist_free_all(headers);
1541
1542
0
    return ret;
1543
0
}
1544
1545
1546
0
static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) {
1547
0
    size_t realsize = size * nmemb;
1548
0
    hFILE_s3 *fp = (hFILE_s3 *)stream;
1549
0
    size_t read_length;
1550
1551
0
    if (realsize > (fp->buffer.l - fp->index)) {
1552
0
        read_length = fp->buffer.l - fp->index;
1553
0
    } else {
1554
0
        read_length = realsize;
1555
0
    }
1556
1557
0
    memcpy(ptr, fp->buffer.s + fp->index, read_length);
1558
0
    fp->index += read_length;
1559
1560
0
    return read_length;
1561
0
}
1562
1563
1564
0
static int upload_part(hFILE_s3 *fp, kstring_t *resp) {
1565
0
    kstring_t url = KS_INITIALIZE;
1566
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1567
0
    int ret = -1;
1568
0
    struct curl_slist *headers = NULL;
1569
0
    char http_request[] = "PUT";
1570
0
    CURLcode err;
1571
1572
0
    clear_authorisation_values(fp);
1573
1574
0
    if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) {
1575
0
        return -1;
1576
0
    }
1577
1578
0
    if (v4_authorisation(fp, http_request, &fp->buffer, canonical_query_string.s, 0) != 0) {
1579
0
        goto out;
1580
0
    }
1581
1582
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1583
0
        goto out;
1584
0
    }
1585
1586
0
    fp->index = 0;
1587
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1588
0
        goto out;
1589
0
    }
1590
1591
0
    curl_easy_reset(fp->curl);
1592
1593
0
    err = curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L);
1594
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback);
1595
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp);
1596
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l);
1597
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1598
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
1599
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1600
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1601
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1602
1603
0
    if (err != CURLE_OK)
1604
0
        goto out;
1605
1606
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1607
1608
0
    if (!headers)
1609
0
        goto out;
1610
1611
0
    fp->ret = curl_easy_perform(fp->curl);
1612
1613
0
    if (fp->ret == CURLE_OK) {
1614
0
        ret = 0;
1615
0
    }
1616
1617
0
 out:
1618
0
    ks_free(&url);
1619
0
    ks_free(&canonical_query_string);
1620
0
    curl_slist_free_all(headers);
1621
1622
0
    return ret;
1623
0
}
1624
1625
1626
0
static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) {
1627
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1628
0
    const char *buffer  = (const char *)bufferv;
1629
0
    CURLcode cret;
1630
1631
0
    if (kputsn(buffer, nbytes, &fp->buffer) == EOF) {
1632
0
        return -1;
1633
0
    }
1634
1635
0
    if (fp->buffer.l > fp->part_size) {
1636
        // time to write out our data
1637
0
        kstring_t response = {0, 0, NULL};
1638
0
        int ret;
1639
1640
0
        ret = upload_part(fp, &response);
1641
1642
0
        if (!ret) {
1643
0
            long response_code;
1644
0
            kstring_t etag = {0, 0, NULL};
1645
1646
0
            cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1647
1648
0
            if (cret != CURLE_OK || response_code > 200) {
1649
0
                errno = http_status_errno(response_code);
1650
0
                ret = -1;
1651
0
            } else {
1652
0
                if (get_entry(response.s, "Etag: \"", "\"", &etag) == EOF) {
1653
0
                    fprintf(stderr, "hfile_s3: Failed to read Etag\n");
1654
0
                    ret = -1;
1655
0
                } else {
1656
0
                    ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
1657
0
                        fp->part_no, etag.s);
1658
1659
0
                    ks_free(&etag);
1660
0
                }
1661
0
            }
1662
0
        }
1663
1664
0
        ks_free(&response);
1665
1666
0
        if (ret) {
1667
0
            abort_upload(fp);
1668
0
            return -1;
1669
0
        }
1670
1671
0
        fp->part_no++;
1672
0
        fp->buffer.l = 0;
1673
1674
0
        if (fp->expand && (fp->part_no % EXPAND_ON == 0)) {
1675
0
            fp->part_size *= 2;
1676
0
        }
1677
0
    }
1678
1679
0
    return nbytes;
1680
0
}
1681
1682
1683
0
static int s3_write_close(hFILE *fpv) {
1684
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1685
0
    kstring_t response = {0, 0, NULL};
1686
0
    int ret = 0;
1687
0
    CURLcode cret;
1688
0
    long response_code;
1689
1690
0
    if (!fp->aborted) {
1691
1692
0
        if (fp->buffer.l) {
1693
            // write the last part
1694
1695
0
            ret = upload_part(fp, &response);
1696
1697
0
            if (!ret) {
1698
0
                kstring_t etag = {0, 0, NULL};
1699
1700
0
                cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1701
1702
0
                if (cret != CURLE_OK || response_code > 200) {
1703
0
                    errno = http_status_errno(response_code);
1704
0
                    ret = -1;
1705
0
                } else {
1706
0
                    if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
1707
0
                        ret = -1;
1708
0
                    } else {
1709
0
                        ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
1710
0
                            fp->part_no, etag.s);
1711
1712
0
                        ks_free(&etag);
1713
0
                    }
1714
0
                }
1715
0
            }
1716
1717
0
            ks_free(&response);
1718
1719
0
            if (ret) {
1720
0
                abort_upload(fp);
1721
0
                return -1;
1722
0
            }
1723
1724
0
            fp->part_no++;
1725
0
        }
1726
1727
0
        if (fp->part_no > 1) {
1728
0
            ret = complete_upload(fp, &response);
1729
1730
0
            if (!ret) {
1731
0
                if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) {
1732
0
                    ret = -1;
1733
0
                    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1734
1735
0
                    if (cret == CURLE_OK) {
1736
0
                        if (hts_verbose >= HTS_LOG_INFO) {
1737
0
                            if (report_s3_error(&response, response_code)) {
1738
0
                                fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
1739
0
                            }
1740
0
                        }
1741
1742
0
                        errno = http_status_errno(response_code);
1743
0
                    }
1744
0
                }
1745
0
            }
1746
0
        } else {
1747
0
            ret = -1;
1748
0
        }
1749
1750
0
        if (ret) {
1751
0
            abort_upload(fp);
1752
0
        } else {
1753
0
            cleanup(fp);
1754
0
        }
1755
0
    }
1756
1757
0
    ks_free(&response);
1758
1759
0
    return ret;
1760
0
}
1761
1762
1763
0
static int handle_bad_request(hFILE_s3 *fp, kstring_t *resp) {
1764
0
    kstring_t region = {0, 0, NULL};
1765
0
    int ret = -1;
1766
1767
0
    if (get_entry(resp->s, "<Region>", "</Region>", &region) == EOF) {
1768
0
        return -1;
1769
0
    }
1770
1771
0
    ret = set_region(fp->au, &region);
1772
1773
0
    ks_free(&region);
1774
1775
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: handle_bad_request: return %d\n", ret);
1776
1777
0
    return ret;
1778
0
}
1779
1780
0
static int initialise_upload(hFILE_s3 *fp, kstring_t *head, kstring_t *resp, int user_query) {
1781
0
    kstring_t url = KS_INITIALIZE;
1782
0
    int ret = -1;
1783
0
    struct curl_slist *headers = NULL;
1784
0
    char http_request[] = "POST";
1785
0
    char delimiter = '?';
1786
0
    CURLcode err;
1787
1788
0
    clear_authorisation_values(fp);
1789
1790
0
    if (user_query) {
1791
0
        delimiter = '&';
1792
0
    }
1793
1794
0
    if (v4_authorisation(fp, http_request, NULL, "uploads=", user_query) != 0) {
1795
0
        goto out;
1796
0
    }
1797
1798
0
    if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) {
1799
0
        goto out;
1800
0
    }
1801
1802
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1803
0
        goto out;
1804
0
    }
1805
1806
0
    err = curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1807
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
1808
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, "");  // send no data
1809
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
1810
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
1811
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1812
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head);
1813
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1814
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1815
1816
0
    if (err != CURLE_OK)
1817
0
        goto out;
1818
1819
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1820
1821
0
    if (!headers)
1822
0
        goto out;
1823
1824
0
    fp->ret = curl_easy_perform(fp->curl);
1825
1826
0
    if (fp->ret == CURLE_OK) {
1827
0
        ret = 0;
1828
0
    }
1829
1830
0
 out:
1831
0
    curl_slist_free_all(headers);
1832
0
    ks_free(&url);
1833
1834
0
    return ret;
1835
0
}
1836
1837
1838
0
static int get_upload_id(hFILE_s3 *fp, kstring_t *resp) {
1839
0
    int ret = 0;
1840
1841
0
    if (get_entry(resp->s, "<UploadId>", "</UploadId>", &fp->upload_id) == EOF) {
1842
0
        ret = -1;
1843
0
    }
1844
1845
0
    return ret;
1846
0
}
1847
1848
1849
/*
1850
    Now for the reading code
1851
*/
1852
1853
170
#define READ_PART_SIZE 1048576
1854
1855
0
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv) {
1856
0
    hFILE_s3 *fp = (hFILE_s3 *) fpv;
1857
0
    size_t n = size * nmemb;
1858
1859
0
    if (n) {
1860
0
        if (kputsn(ptr, n, &fp->buffer) == EOF) {
1861
0
            fprintf(stderr, "hfile_s3: error: unable to allocate memory to read data.\n");
1862
0
            return 0;
1863
0
        }
1864
0
    }
1865
1866
0
    return n;
1867
0
}
1868
1869
1870
0
static int s3_read_close(hFILE *fpv) {
1871
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1872
1873
0
    cleanup(fp);
1874
1875
0
    return 0;
1876
0
}
1877
1878
1879
170
static int get_part(hFILE_s3 *fp, kstring_t *resp) {
1880
170
    struct curl_slist *headers = NULL;
1881
170
    int ret = -1;
1882
170
    char http_request[] = "GET";
1883
170
    CURLcode err;
1884
1885
170
    ks_clear(&fp->buffer); // reset storage buffer
1886
170
    clear_authorisation_values(fp);
1887
1888
170
    if (fp->au->is_v4) {
1889
170
        if (v4_authorisation(fp, http_request, NULL, "", 0) != 0) {
1890
0
            goto out;
1891
0
        }
1892
1893
170
        if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part: v4 auth done\n");
1894
1895
170
        if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1896
0
            goto out;
1897
0
        }
1898
170
    } else {
1899
0
        if (v2_authorisation(fp, http_request) != 0) {
1900
0
            goto out;
1901
0
        }
1902
1903
0
        if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part v2 auth done\n");
1904
0
    }
1905
1906
170
    if (ksprintf(&fp->range, "Range: bytes=%zu-%zu", fp->last_read, fp->last_read + fp->part_size - 1) < 0) {
1907
0
        goto out;
1908
0
    }
1909
1910
170
    if (hts_verbose >= HTS_LOG_INFO) {
1911
0
        fprintf(stderr, "hfile_s3: get_part: range set %s\n", fp->range.s);
1912
0
        fprintf(stderr, "hfile_s3: url %s\n", fp->url.s);
1913
0
    }
1914
1915
170
    curl_easy_reset(fp->curl);
1916
1917
170
    err = curl_easy_setopt(fp->curl, CURLOPT_URL, fp->url.s);
1918
170
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, recv_callback);
1919
170
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)fp);
1920
170
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1921
170
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1922
1923
170
    if (resp) {
1924
170
        err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1925
170
        err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
1926
170
    }
1927
1928
170
    if (err != CURLE_OK)
1929
0
        goto out;
1930
1931
170
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, &fp->range);
1932
1933
170
    if (!headers)
1934
0
        goto out;
1935
1936
170
    fp->ret = curl_easy_perform(fp->curl);
1937
1938
170
    if (fp->ret == CURLE_OK) {
1939
0
        ret = 0;
1940
0
    }
1941
1942
170
out:
1943
170
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part: ret %d\n", ret);
1944
170
    curl_slist_free_all(headers);
1945
1946
170
    return ret;
1947
170
}
1948
1949
1950
0
static ssize_t s3_read(hFILE *fpv, void *bufferv, size_t nbytes) {
1951
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1952
0
    char *buffer = (char *)bufferv;
1953
0
    size_t got = 0;
1954
1955
    /* Transfer data from the fp->buffer to the calling buffer.
1956
       If there is no data left in the fp->buffer, grab another chunk of
1957
       data from s3.
1958
    */
1959
0
    while (fp->keep_going && got < nbytes) {
1960
1961
0
        if (fp->buffer.l && fp->last_read_buffer < fp->buffer.l) {
1962
            // copy data across
1963
0
            size_t to_copy;
1964
0
            size_t remaining = fp->buffer.l - fp->last_read_buffer;
1965
0
            size_t bytes_left = nbytes - got;
1966
1967
0
            if (hts_verbose >  HTS_LOG_INFO) fprintf(stderr, "hfile_s3: read - remaining %zu read %zu bytes_left %zu, nbytes %zu\n", remaining, got, bytes_left, nbytes);
1968
1969
0
            if (bytes_left < remaining) {
1970
0
                to_copy = bytes_left;
1971
0
            } else {
1972
0
                to_copy = remaining;
1973
0
            }
1974
1975
0
            memcpy(buffer + got, fp->buffer.s + fp->last_read_buffer, to_copy);
1976
0
            got += to_copy;
1977
0
            fp->last_read_buffer += to_copy;
1978
1979
0
            if ((fp->buffer.l < fp->part_size) && (fp->last_read_buffer == fp->buffer.l)) {
1980
0
                fp->keep_going = 0;
1981
0
            }
1982
0
        } else {
1983
0
            int ret;
1984
1985
0
            ret = get_part(fp, NULL);
1986
1987
0
            if (!ret) {
1988
0
                long response_code;
1989
0
                CURLcode cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1990
1991
0
                if (cret != CURLE_OK || response_code > 300) {
1992
0
                    errno = http_status_errno(response_code);
1993
0
                    ret = -1;
1994
0
                }
1995
0
            }
1996
1997
0
            if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: read - read error %d\n", ret);
1998
1999
0
            if (ret < 0)
2000
0
                return ret;
2001
2002
0
            if (fp->buffer.l == 0) {
2003
0
                fp->keep_going = 0;
2004
0
                break;
2005
0
            }
2006
2007
0
            fp->last_read_buffer = 0;
2008
0
            fp->last_read = fp->last_read + fp->buffer.l;
2009
0
        }
2010
0
    }
2011
2012
0
    return got;
2013
0
}
2014
2015
2016
0
static off_t s3_seek(hFILE *fpv, off_t offset, int whence) {
2017
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
2018
0
    off_t origin;
2019
2020
0
    if (fp->write) {
2021
        // lets not try and seek while writing
2022
0
        errno = ESPIPE;
2023
0
        return -1;
2024
0
    }
2025
2026
    // I am not sure we handle any seek other than one from the beginning
2027
0
    switch (whence) {
2028
0
        case SEEK_SET:
2029
0
            origin = 0;
2030
0
            break;
2031
0
        case SEEK_CUR:
2032
            // hseek() should convert this to SEEK_SET
2033
0
            errno = ENOSYS;
2034
0
            return -1;
2035
0
        case SEEK_END:
2036
0
            if (fp->file_size < 0) {
2037
0
                errno = ESPIPE;
2038
0
                return -1;
2039
0
            }
2040
2041
0
            origin = fp->file_size;
2042
0
            break;
2043
0
        default:
2044
0
            errno = EINVAL;
2045
0
            return -1;
2046
0
    }
2047
2048
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
2049
0
    if ((offset < 0)? origin + offset < 0
2050
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
2051
0
        errno = EINVAL;
2052
0
        return -1;
2053
0
    }
2054
2055
0
    fp->keep_going = 1;
2056
2057
0
    size_t pos = origin + offset; // origin is really only useful if we can make the other modes work
2058
2059
0
    if (pos <= fp->last_read && pos > (fp->last_read - fp->buffer.l)) {
2060
        // within the current local buffer
2061
0
        fp->last_read_buffer = pos - (fp->last_read - fp->buffer.l);
2062
0
    } else {
2063
0
        fp->last_read = pos;
2064
0
        ks_clear(&fp->buffer); // resetting fp->buffer triggers a new remote read
2065
0
    }
2066
2067
0
    return fp->last_read;
2068
0
}
2069
2070
2071
/*
2072
    Unlike upload, download does not really need an initialisation.  Here we use it to
2073
    get the size of the wanted files and as a test for redirects.
2074
*/
2075
170
static int initialise_download(hFILE_s3 *fp, kstring_t *resp) {
2076
2077
170
    fp->last_read = 0;
2078
170
    ks_clear(resp);
2079
2080
170
    return get_part(fp, resp);
2081
170
}
2082
2083
2084
0
static int s3_close(hFILE *fpv) {
2085
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
2086
0
    int ret;
2087
2088
0
    if (!fp->write) {
2089
0
        ret = s3_read_close(fpv);
2090
0
    } else {
2091
0
        ret = s3_write_close(fpv);
2092
0
    }
2093
2094
0
    return ret;
2095
0
}
2096
2097
2098
static const struct hFILE_backend s3_backend = {
2099
    s3_read, s3_write, s3_seek, NULL, s3_close
2100
};
2101
2102
/* Read and write open here, need to be after the s3_backend declaration. */
2103
0
static hFILE *s3_write_open(const char *url, s3_auth_data *auth) {
2104
0
    hFILE_s3 *fp;
2105
0
    kstring_t response = {0, 0, NULL};
2106
0
    kstring_t header   = {0, 0, NULL};
2107
0
    int has_user_query = 0;
2108
0
    char *query_start;
2109
0
    const char *env;
2110
0
    CURLcode cret;
2111
0
    long response_code;
2112
2113
2114
0
    fp = (hFILE_s3 *)hfile_init(sizeof(hFILE_s3), "w", 0);
2115
2116
0
    if (fp == NULL) {
2117
0
        return NULL;
2118
0
    }
2119
2120
0
    if ((fp->curl = curl_easy_init()) == NULL) {
2121
0
        errno = ENOMEM;
2122
0
        goto error;
2123
0
    }
2124
2125
0
    fp->au = auth;
2126
2127
0
    initialise_local(fp);
2128
0
    initialise_authorisation_values(fp);
2129
0
    fp->aborted = 0;
2130
0
    fp->part_size = MINIMUM_S3_WRITE_SIZE;
2131
0
    fp->expand = 1;
2132
0
    fp->write = 1;
2133
2134
0
    if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) {
2135
0
        int part_size = atoi(env) * 1024 * 1024;
2136
2137
0
        if (part_size > fp->part_size)
2138
0
            fp->part_size = part_size;
2139
2140
0
        fp->expand = 0;
2141
0
    }
2142
2143
0
    if (hts_verbose >= 8) {
2144
0
        fp->verbose = 1L;
2145
0
    } else {
2146
0
        fp->verbose = 0L;
2147
0
    }
2148
2149
0
    kputs(url, &fp->url);
2150
2151
0
    if ((query_start = strchr(fp->url.s, '?'))) {
2152
0
        has_user_query = 1;;
2153
0
    }
2154
2155
0
    if (initialise_upload(fp, &header, &response, has_user_query))
2156
0
        goto error;
2157
2158
0
    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2159
2160
0
    if (cret == CURLE_OK) {
2161
0
        if (response_code == S3_MOVED_PERMANENTLY || response_code == S3_TEMPORARY_REDIRECT) {
2162
0
            if (redirect_endpoint(fp, &header) == 0) {
2163
0
                ks_clear(&response);
2164
0
                ks_clear(&header);
2165
2166
0
                if (initialise_upload(fp, &header, &response, has_user_query))
2167
0
                    goto error;
2168
0
            }
2169
0
        } else if (response_code == S3_BAD_REQUEST) {
2170
0
            if (handle_bad_request(fp, &response) == 0) {
2171
0
                ks_clear(&response);
2172
0
                ks_clear(&header);
2173
2174
0
                if (initialise_upload(fp, &header, &response, has_user_query))
2175
0
                    goto error;
2176
0
            }
2177
0
        }
2178
2179
        // reget the response code (may not have changed)
2180
0
        cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2181
0
    } else {
2182
        // unable to get a response code from curl
2183
0
        goto error;
2184
0
    }
2185
2186
0
    if (response_code >= 300) {
2187
        // something went wrong with the initialisation
2188
2189
0
        if (cret == CURLE_OK) {
2190
0
            if (hts_verbose >= HTS_LOG_INFO) {
2191
0
                if (report_s3_error(&response, response_code)) {
2192
0
                    fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
2193
0
                }
2194
0
            }
2195
2196
0
            errno = http_status_errno(response_code);
2197
0
        }
2198
2199
0
        goto error;
2200
0
    }
2201
2202
0
    if (get_upload_id(fp, &response)) goto error;
2203
2204
    // start the completion message (a formatted list of parts)
2205
0
    if (kputs("<CompleteMultipartUpload>\n", &fp->completion_message) == EOF) {
2206
0
        goto error;
2207
0
    }
2208
2209
0
    fp->part_no = 1;
2210
2211
    // user query string no longer a useful part of the URL
2212
0
    if (query_start)
2213
0
         *query_start = '\0';
2214
2215
0
    fp->base.backend = &s3_backend;
2216
0
    ks_free(&response);
2217
0
    ks_free(&header);
2218
2219
0
    return &fp->base;
2220
2221
0
error:
2222
0
    ks_free(&response);
2223
0
    ks_free(&header);
2224
0
    cleanup_local(fp);
2225
0
    free_authorisation_values(fp);
2226
0
    hfile_destroy((hFILE *)fp);
2227
0
    return NULL;
2228
0
}
2229
2230
2231
170
static hFILE *s3_read_open(const char *url, s3_auth_data *auth) {
2232
170
    hFILE_s3 *fp;
2233
170
    const char *env;
2234
170
    kstring_t response   = {0, 0, NULL};
2235
170
    kstring_t file_range = {0, 0, NULL};
2236
170
    CURLcode cret;
2237
170
    long response_code = 0;
2238
2239
170
    fp = (hFILE_s3 *)hfile_init(sizeof(hFILE_s3), "r", 0);
2240
2241
170
    if (fp == NULL) {
2242
0
        return NULL;
2243
0
    }
2244
2245
170
    if ((fp->curl = curl_easy_init()) == NULL) {
2246
0
        errno = ENOMEM;
2247
0
        goto error;
2248
0
    }
2249
2250
170
    fp->au = auth;
2251
2252
170
    initialise_local(fp);
2253
170
    initialise_authorisation_values(fp);
2254
2255
170
    fp->last_read = 0; // ranges start at 0
2256
170
    fp->write = 0;
2257
2258
170
    if ((env = getenv("HTS_S3_READ_PART_SIZE")) != NULL) {
2259
0
        fp->part_size = atoi(env) * 1024 * 1024;
2260
170
    } else {
2261
170
        fp->part_size = READ_PART_SIZE;
2262
170
    }
2263
2264
170
    if (hts_verbose >= 8) {
2265
0
        fp->verbose = 1L;
2266
170
    } else {
2267
170
        fp->verbose = 0L;
2268
170
    }
2269
2270
170
    kputs(url, &fp->url);
2271
2272
170
    if (initialise_download(fp, &response))
2273
170
        goto error;
2274
2275
0
    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2276
2277
0
    if (cret == CURLE_OK) {
2278
0
        if (response_code == S3_MOVED_PERMANENTLY || response_code == S3_TEMPORARY_REDIRECT) {
2279
0
            ks_clear(&response);
2280
2281
0
            if (redirect_endpoint(fp, &response) == 0) {
2282
0
                if (initialise_download(fp, &response))
2283
0
                    goto error;
2284
0
            }
2285
0
        } else if (response_code == S3_BAD_REQUEST) {
2286
0
            ks_clear(&response);
2287
2288
0
            if (handle_bad_request(fp, &fp->buffer) == 0) {
2289
0
                if (initialise_download(fp, &response))
2290
0
                    goto error;
2291
0
            }
2292
0
        }
2293
2294
        // reget the response code (may not have changed)
2295
0
        cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2296
0
    } else {
2297
        // unable to get a response code from curl
2298
0
        goto error;
2299
0
    }
2300
2301
0
    if (response_code >= 300) {
2302
        // something went wrong with the initialisation
2303
2304
0
        if (cret == CURLE_OK) {
2305
0
            if (hts_verbose >= HTS_LOG_INFO) {
2306
0
                if (report_s3_error(&fp->buffer, response_code)) {
2307
0
                    fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
2308
0
                }
2309
0
            }
2310
2311
0
            errno = http_status_errno(response_code);
2312
0
        }
2313
2314
0
        goto error;
2315
0
    }
2316
2317
0
    if (get_entry(response.s, "content-range: bytes ", "\n", &file_range) == EOF) {
2318
0
        fprintf(stderr, "hfile_s3: warning: failed to read file size.\n");
2319
0
        fp->file_size = -1;
2320
0
    } else {
2321
0
        char *s;
2322
0
        if ((s = strchr(file_range.s, '/'))) {
2323
0
            fp->file_size = strtoll(s + 1, NULL, 10);
2324
0
        } else {
2325
0
            fp->file_size = -1;
2326
0
        }
2327
0
    }
2328
2329
0
    fp->last_read_buffer = 0;
2330
0
    fp->last_read = fp->last_read + fp->buffer.l;
2331
0
    fp->base.backend = &s3_backend;
2332
0
    fp->keep_going = 1;
2333
2334
0
    ks_free(&response);
2335
0
    ks_free(&file_range);
2336
0
    return &fp->base;
2337
2338
2339
170
 error:
2340
170
    ks_free(&response);
2341
170
    ks_free(&file_range);
2342
170
    cleanup_local(fp);
2343
170
    free_authorisation_values(fp);
2344
170
    hfile_destroy((hFILE *)fp);
2345
170
    return NULL;
2346
0
}
2347
2348
2349
170
static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
2350
170
    kstring_t url = { 0, 0, NULL };
2351
2352
170
    s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
2353
170
    hFILE *fp = NULL;
2354
2355
170
    if (ad == NULL) {
2356
0
        return NULL;
2357
0
    }
2358
2359
170
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: s3_open_v4 url %s\n", url.s);
2360
2361
170
    if (*mode == 'r') {
2362
170
        fp  = s3_read_open(url.s, ad);
2363
170
    } else {
2364
0
        fp =  s3_write_open(url.s, ad);
2365
0
    }
2366
2367
170
    ks_free(&url);
2368
170
    if (!fp)
2369
170
        free_auth_data(ad);
2370
2371
170
    return fp;
2372
170
}
2373
2374
2375
0
static hFILE *s3_open_v2(const char *s3url, const char *mode, va_list *argsp) {
2376
0
    kstring_t url = { 0, 0, NULL };
2377
2378
0
    s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
2379
0
    hFILE *fp = NULL;
2380
2381
0
    if (ad == NULL) {
2382
0
        return NULL;
2383
0
    }
2384
2385
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: s3_open_v2 url %s\n", url.s);
2386
2387
0
    if (*mode == 'r') {
2388
0
        fp  = s3_read_open(url.s, ad);
2389
0
    } else {
2390
0
        fprintf(stderr, "hfile_s3: error - signature v2 not handled for writing.\n.");
2391
0
    }
2392
2393
0
    ks_free(&url);
2394
0
    if (!fp)
2395
0
        free_auth_data(ad);
2396
2397
0
    return fp;
2398
0
}
2399
2400
2401
static hFILE *hopen_s3(const char *url, const char *mode)
2402
170
{
2403
170
    hFILE *fp;
2404
2405
170
    if (getenv("HTS_S3_V2") == NULL) {
2406
170
        fp = s3_open_v4(url, mode, NULL);
2407
170
    } else {
2408
0
        fp = s3_open_v2(url, mode, NULL);
2409
0
    }
2410
2411
170
    return fp;
2412
170
}
2413
2414
2415
static hFILE *vhopen_s3(const char *url, const char *mode, va_list args0)
2416
0
{
2417
0
    hFILE *fp;
2418
2419
    // This should handle to vargs case.  Not sure what vargs we want
2420
    // to handle
2421
0
    fp = hopen_s3(url, mode);
2422
2423
0
    return fp;
2424
0
}
2425
2426
2427
1
static void s3_exit(void) {
2428
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
2429
1
        curl.share = NULL;
2430
2431
1
    free(curl.useragent.s);
2432
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
2433
1
    curl_global_cleanup();
2434
1
}
2435
2436
2437
1
int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) {
2438
2439
1
    static const struct hFILE_scheme_handler handler =
2440
1
        { hopen_s3, hfile_always_remote, "Amazon S3",
2441
1
          2000 + 50, vhopen_s3
2442
1
        };
2443
2444
#ifdef ENABLE_PLUGINS
2445
    // Embed version string for examination via strings(1) or what(1)
2446
    static const char id[] =
2447
        "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
2448
    const char *version = strchr(id, '\t') + 1;
2449
2450
    if (hts_verbose >= 9)
2451
        fprintf(stderr, "[M::hfile_s3.init] version %s\n",
2452
                version);
2453
#else
2454
1
    const char *version = hts_version();
2455
1
#endif
2456
2457
1
    const curl_version_info_data *info;
2458
1
    CURLcode err;
2459
1
    CURLSHcode errsh;
2460
2461
1
    err = curl_global_init(CURL_GLOBAL_ALL);
2462
2463
1
    if (err != CURLE_OK) {
2464
        // look at putting in an errno here
2465
0
        return -1;
2466
0
    }
2467
2468
1
    curl.share = curl_share_init();
2469
2470
1
    if (curl.share == NULL) {
2471
0
        curl_global_cleanup();
2472
0
        errno = EIO;
2473
0
        return -1;
2474
0
    }
2475
2476
1
    errsh  = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
2477
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
2478
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
2479
2480
1
    if (errsh != 0) {
2481
0
        curl_share_cleanup(curl.share);
2482
0
        curl_global_cleanup();
2483
0
        errno = EIO;
2484
0
        return -1;
2485
0
    }
2486
2487
1
    info = curl_version_info(CURLVERSION_NOW);
2488
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
2489
2490
1
    self->name = "Amazon S3";
2491
1
    self->destroy = s3_exit;
2492
2493
1
    hfile_add_scheme_handler("s3",       &handler);
2494
1
    hfile_add_scheme_handler("s3+http",  &handler);
2495
1
    hfile_add_scheme_handler("s3+https", &handler);
2496
2497
1
    return 0;
2498
1
}
2499