Coverage Report

Created: 2026-02-14 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile_s3.c
Line
Count
Source
1
/*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2025 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <strings.h>
33
#include <time.h>
34
35
#include <errno.h>
36
#include <pthread.h>
37
38
#include "hfile_internal.h"
39
#ifdef ENABLE_PLUGINS
40
#include "version.h"
41
#endif
42
#include "htslib/hts.h"  // for hts_version() and hts_verbose
43
#include "htslib/kstring.h"
44
#include "hts_time_funcs.h"
45
46
#include <curl/curl.h>
47
48
typedef struct s3_auth_data {
49
    kstring_t id;
50
    kstring_t token;
51
    kstring_t secret;
52
    kstring_t region;
53
    kstring_t canonical_query_string;
54
    kstring_t user_query_string;
55
    kstring_t host;
56
    kstring_t profile;
57
    enum {s3_auto, s3_virtual, s3_path} url_style;
58
    time_t creds_expiry_time;
59
    char *bucket;
60
    time_t auth_time;
61
    char date[40];
62
    char date_long[17];
63
    char date_short[9];
64
    kstring_t date_html;
65
    char mode;
66
    int is_v4;
67
} s3_auth_data;
68
69
typedef struct {
70
    hFILE base;
71
    CURL *curl;
72
    CURLcode ret;
73
    s3_auth_data *au;
74
    kstring_t buffer;
75
    kstring_t url;
76
    long verbose;
77
    int write;
78
    int part_size; // size for reading or writing
79
80
    kstring_t content_hash;
81
    kstring_t authorisation;
82
    kstring_t content;
83
    kstring_t date;
84
    kstring_t token;
85
    kstring_t range;
86
87
    // write variables
88
    kstring_t upload_id;
89
    kstring_t completion_message;
90
    int part_no;
91
    int aborted;
92
    size_t index;
93
    int expand;
94
95
    // read variables
96
    size_t last_read;               // last read position (remote)
97
    size_t last_read_buffer;        // last read (local buffer)
98
    int64_t file_size;              // size of the file being read
99
    int keep_going;
100
101
} hFILE_s3;
102
103
247
#define AUTH_LIFETIME 60  // Regenerate auth headers if older than this
104
0
#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials
105
106
#if defined HAVE_COMMONCRYPTO
107
108
#include <CommonCrypto/CommonHMAC.h>
109
110
#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
111
#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
112
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
113
114
static size_t
115
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
116
{
117
    CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
118
    return CC_SHA1_DIGEST_LENGTH;
119
}
120
121
122
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
123
    CC_SHA256(in, length, out);
124
}
125
126
127
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
128
    CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
129
    *md_len = CC_SHA256_DIGEST_LENGTH;
130
}
131
132
133
#elif defined HAVE_HMAC
134
135
#include <openssl/hmac.h>
136
#include <openssl/sha.h>
137
138
#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
139
11.5k
#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
140
247
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
141
142
static size_t
143
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
144
0
{
145
0
    unsigned int len;
146
0
    HMAC(EVP_sha1(), key->s, key->l,
147
0
         (unsigned char *) message->s, message->l, digest, &len);
148
0
    return len;
149
0
}
150
151
152
344
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
153
344
    SHA256(in, length, out);
154
344
}
155
156
157
485
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
158
485
    HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
159
485
}
160
161
#else
162
#error No HMAC() routine found by configure
163
#endif
164
165
static void
166
urldecode_kput(const char *s, int len, kstring_t *str)
167
339
{
168
339
    char buf[3];
169
339
    int i = 0;
170
171
508k
    while (i < len)
172
508k
        if (s[i] == '%' && i+2 < len) {
173
539
            buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
174
539
            kputc(strtol(buf, NULL, 16), str);
175
539
            i += 3;
176
539
        }
177
507k
        else kputc(s[i++], str);
178
339
}
179
180
181
static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
182
0
{
183
0
    static const char base64[] =
184
0
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
185
186
0
    size_t i = 0;
187
0
    unsigned x = 0;
188
0
    int bits = 0, pad = 0;
189
190
0
    while (bits || i < len) {
191
0
        if (bits < 6) {
192
0
            x <<= 8, bits += 8;
193
0
            if (i < len) x |= data[i++];
194
0
            else pad++;
195
0
        }
196
197
0
        bits -= 6;
198
0
        kputc(base64[(x >> bits) & 63], str);
199
0
    }
200
201
0
    str->l -= pad;
202
0
    kputsn("==", pad, str);
203
0
}
204
205
206
static int is_dns_compliant(const char *s0, const char *slim, int is_https)
207
247
{
208
247
    int has_nondigit = 0, len = 0;
209
247
    const char *s;
210
211
97.3k
    for (s = s0; s < slim; len++, s++)
212
97.3k
        if (islower_c(*s))
213
96.9k
            has_nondigit = 1;
214
459
        else if (*s == '-') {
215
47
            has_nondigit = 1;
216
47
            if (s == s0 || s+1 == slim) return 0;
217
47
        }
218
412
        else if (isdigit_c(*s))
219
208
            ;
220
204
        else if (*s == '.') {
221
9
            if (is_https) return 0;
222
0
            if (s == s0 || ! isalnum_c(s[-1])) return 0;
223
0
            if (s+1 == slim || ! isalnum_c(s[1])) return 0;
224
0
        }
225
195
        else return 0;
226
227
34
    return has_nondigit && len >= 3 && len <= 63;
228
247
}
229
230
231
static FILE *expand_tilde_open(const char *fname, const char *mode)
232
393
{
233
393
    FILE *fp;
234
235
393
    if (strncmp(fname, "~/", 2) == 0) {
236
393
        kstring_t full_fname = { 0, 0, NULL };
237
393
        const char *home = getenv("HOME");
238
393
        if (! home) return NULL;
239
240
393
        kputs(home, &full_fname);
241
393
        kputs(&fname[1], &full_fname);
242
243
393
        fp = fopen(full_fname.s, mode);
244
393
        free(full_fname.s);
245
393
    }
246
0
    else
247
0
        fp = fopen(fname, mode);
248
249
393
    return fp;
250
393
}
251
252
253
static void parse_ini(const char *fname, const char *section, ...)
254
262
{
255
262
    kstring_t line = { 0, 0, NULL };
256
262
    int active = 1;  // Start active, so global properties are accepted
257
262
    char *s;
258
259
262
    FILE *fp = expand_tilde_open(fname, "r");
260
262
    if (fp == NULL) return;
261
262
0
    while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
263
0
        if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
264
0
            *s = '\0';
265
0
            active = (strcmp(&line.s[1], section) == 0);
266
0
        }
267
0
        else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
268
0
            const char *key = line.s, *value = &s[1], *akey;
269
0
            va_list args;
270
271
0
            while (isspace_c(*key)) key++;
272
0
            while (s > key && isspace_c(s[-1])) s--;
273
0
            *s = '\0';
274
275
0
            while (isspace_c(*value)) value++;
276
0
            while (line.l > 0 && isspace_c(line.s[line.l-1]))
277
0
                line.s[--line.l] = '\0';
278
279
0
            va_start(args, section);
280
0
            while ((akey = va_arg(args, const char *)) != NULL) {
281
0
                kstring_t *avar = va_arg(args, kstring_t *);
282
0
                if (strcmp(key, akey) == 0) {
283
0
                    avar->l = 0;
284
0
                    kputs(value, avar);
285
0
                    break; }
286
0
            }
287
0
            va_end(args);
288
0
        }
289
290
0
    fclose(fp);
291
0
    free(line.s);
292
0
}
293
294
295
static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
296
131
{
297
131
    kstring_t text = { 0, 0, NULL };
298
131
    char *s;
299
131
    size_t len;
300
301
131
    FILE *fp = expand_tilde_open(fname, "r");
302
131
    if (fp == NULL) return;
303
304
0
    while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
305
0
        kputc(' ', &text);
306
0
    fclose(fp);
307
308
0
    s = text.s;
309
0
    while (isspace_c(*s)) s++;
310
0
    kputsn(s, len = strcspn(s, " \t"), id);
311
312
0
    s += len;
313
0
    while (isspace_c(*s)) s++;
314
0
    kputsn(s, strcspn(s, " \t"), secret);
315
316
0
    free(text.s);
317
0
}
318
319
320
247
static void free_auth_data(s3_auth_data *ad) {
321
247
    free(ad->profile.s);
322
247
    free(ad->id.s);
323
247
    free(ad->token.s);
324
247
    free(ad->secret.s);
325
247
    free(ad->region.s);
326
247
    free(ad->canonical_query_string.s);
327
247
    free(ad->user_query_string.s);
328
247
    free(ad->host.s);
329
247
    free(ad->bucket);
330
247
    free(ad->date_html.s);
331
247
    free(ad);
332
247
}
333
334
static time_t parse_rfc3339_date(kstring_t *datetime)
335
0
{
336
0
    int offset = 0;
337
0
    time_t when;
338
0
    int num;
339
0
    char should_be_t = '\0', timezone[10] = { '\0' };
340
0
    unsigned int year, mon, day, hour, min, sec;
341
342
0
    if (!datetime->s)
343
0
        return 0;
344
345
    // It should be possible to do this with strptime(), but it seems
346
    // to not get on with our feature definitions.
347
0
    num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s",
348
0
                 &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone);
349
0
    if (num < 8)
350
0
        return 0;
351
0
    if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ')
352
0
        return 0;
353
0
    struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 };
354
355
0
    switch (timezone[0]) {
356
0
      case 'Z':
357
0
      case 'z':
358
0
      case '\0':
359
0
          break;
360
0
      case '+':
361
0
      case '-': {
362
0
          unsigned hr_off, min_off;
363
0
          if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) {
364
0
              if (hr_off < 24 && min_off <= 60) {
365
0
                  offset = ((hr_off * 60 + min_off)
366
0
                            * (timezone[0] == '+' ? -60 : 60));
367
0
              }
368
0
          }
369
0
          break;
370
0
      }
371
0
      default:
372
0
          return 0;
373
0
    }
374
375
0
    when = hts_time_gm(&parsed);
376
0
    return when >= 0 ? when + offset : 0;
377
0
}
378
379
0
static void refresh_auth_data(s3_auth_data *ad) {
380
    // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of
381
    // setup_auth_data(), but this only reads the authorisation parts.
382
0
    const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
383
0
    kstring_t expiry_time = KS_INITIALIZE;
384
0
    parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
385
0
              "aws_access_key_id", &ad->id,
386
0
              "aws_secret_access_key", &ad->secret,
387
0
              "aws_session_token", &ad->token,
388
0
              "expiry_time", &expiry_time);
389
0
    if (expiry_time.l) {
390
0
        ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
391
0
    }
392
0
    ks_free(&expiry_time);
393
0
}
394
395
396
/* like a escape path but for query strings '=' and '&' are untouched */
397
0
static char *escape_query(const char *qs) {
398
0
    size_t i, j = 0, length, alloced;
399
0
    char *escaped;
400
401
0
    length = strlen(qs);
402
0
    alloced = length * 3 + 1;
403
0
    if ((escaped = malloc(alloced)) == NULL) {
404
0
        return NULL;
405
0
    }
406
407
0
    for (i = 0; i < length; i++) {
408
0
        int c = qs[i];
409
410
0
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
411
0
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
412
0
            escaped[j++] = c;
413
0
        } else {
414
0
            snprintf(escaped + j, alloced - j, "%%%02X", c);
415
0
            j += 3;
416
0
        }
417
0
    }
418
419
0
    escaped[j] = '\0';
420
421
0
    return escaped;
422
0
}
423
424
425
94
static char *escape_path(const char *path) {
426
94
    size_t i, j = 0, length, alloced;
427
94
    char *escaped;
428
429
94
    length = strlen(path);
430
94
    alloced = length * 3 + 1;
431
432
94
    if ((escaped = malloc(alloced)) == NULL) {
433
0
        return NULL;
434
0
    }
435
436
31.9k
    for (i = 0; i < length; i++) {
437
31.8k
        int c = path[i];
438
439
31.8k
        if (c == '?') break; // don't escape ? or beyond
440
441
31.8k
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
442
27.8k
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
443
6.33k
            escaped[j++] = c;
444
25.4k
        } else {
445
25.4k
            snprintf(escaped + j, alloced - j, "%%%02X", c);
446
25.4k
            j += 3;
447
25.4k
        }
448
31.8k
    }
449
450
94
    if (i != length) {
451
        // in the case of a '?' copy the rest of the path across unchanged
452
53
        strcpy(escaped + j, path + i);
453
53
    } else {
454
41
        escaped[j] = '\0';
455
41
    }
456
457
94
    return escaped;
458
94
}
459
460
461
247
static int is_escaped(const char *str) {
462
247
    const char *c = str;
463
247
    int escaped = 0;
464
247
    int needs_escape = 0;
465
466
2.50M
    while (*c != '\0') {
467
2.49M
        if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
468
23.6k
            if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
469
12.3k
                escaped = 1;
470
12.3k
                c += 3;
471
12.3k
                continue;
472
12.3k
            } else {
473
                // only escaped if all % signs are escaped
474
11.3k
                escaped = 0;
475
11.3k
            }
476
23.6k
        }
477
2.48M
        if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
478
2.10M
              || (*c >= 'a' && *c <= 'z') ||
479
2.07M
              *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
480
2.05M
            needs_escape = 1;
481
2.05M
        }
482
2.48M
        c++;
483
2.48M
    }
484
485
247
    return escaped || !needs_escape;
486
247
}
487
488
489
0
static int redirect_endpoint(hFILE_s3 *fp, kstring_t *header) {
490
0
    s3_auth_data *ad = fp->au;
491
0
    kstring_t *url = &fp->url;
492
0
    char *new_region;
493
0
    char *end;
494
0
    int ret = -1;
495
496
    // get the new region from the reply header
497
0
    if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
498
499
0
        new_region += strlen("x-amz-bucket-region: ");
500
0
        end = new_region;
501
502
0
        while (isalnum_c(*end) || ispunct_c(*end)) end++;
503
504
0
        *end = 0;
505
506
0
        if (strstr(ad->host.s, "amazonaws.com")) {
507
0
            ad->region.l = 0;
508
0
            kputs(new_region, &ad->region);
509
510
0
            ad->host.l = 0;
511
512
0
            if (ad->url_style == s3_path) {
513
                // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name}
514
0
                ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
515
0
            } else {
516
                // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name}
517
                // Extract the {bucket-name} from {ad->host} to include in subdomain
518
0
                kstring_t url_prefix = KS_INITIALIZE;
519
0
                kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix);
520
521
0
                ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region);
522
0
                free(url_prefix.s);
523
0
            }
524
0
            if (ad->region.l && ad->host.l) {
525
0
               int e = 0;
526
0
               url->l = 0;
527
0
               e |= kputs("https://", url) < 0;
528
0
               e |= kputs(ad->host.s, url) < 0;
529
0
               e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0;
530
531
0
               if (!e)
532
0
                   ret = 0;
533
0
            }
534
0
            if (ad->user_query_string.l) {
535
0
                kputc('?', url);
536
0
                kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
537
0
            }
538
0
        }
539
0
    }
540
541
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: redirect_endpoint: return %d\n", ret);
542
543
0
    return ret;
544
0
}
545
546
static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
547
                                      int sigver, kstring_t *url)
548
247
{
549
247
    s3_auth_data *ad = calloc(1, sizeof(*ad));
550
247
    const char *bucket, *path;
551
247
    char *escaped = NULL;
552
247
    size_t url_path_pos;
553
247
    ptrdiff_t bucket_len;
554
247
    int is_https = 1, dns_compliant;
555
247
    char *query_start;
556
557
247
    if (!ad)
558
0
        return NULL;
559
247
    ad->mode = strchr(mode, 'r') ? 'r' : 'w';
560
247
    ad->url_style = s3_auto;
561
562
    // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
563
564
247
    if (s3url[2] == '+') {
565
0
        bucket = strchr(s3url, ':') + 1;
566
0
        if (bucket == NULL) {
567
0
            free(ad);
568
0
            return NULL;
569
0
        }
570
0
        kputsn(&s3url[3], bucket - &s3url[3], url);
571
0
        is_https = strncmp(url->s, "https:", 6) == 0;
572
0
    }
573
247
    else {
574
247
        kputs("https:", url);
575
247
        bucket = &s3url[3];
576
247
    }
577
1.10k
    while (*bucket == '/') kputc(*bucket++, url);
578
579
247
    path = bucket + strcspn(bucket, "/?#@");
580
581
247
    if (*path == '@') {
582
130
        const char *colon = strpbrk(bucket, ":@");
583
130
        if (*colon != ':') {
584
8
            urldecode_kput(bucket, colon - bucket, &ad->profile);
585
8
        }
586
122
        else {
587
122
            const char *colon2 = strpbrk(&colon[1], ":@");
588
122
            urldecode_kput(bucket, colon - bucket, &ad->id);
589
122
            urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
590
122
            if (*colon2 == ':')
591
87
                urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
592
122
        }
593
594
130
        bucket = &path[1];
595
130
        path = bucket + strcspn(bucket, "/?#");
596
130
    }
597
117
    else {
598
        // If the URL has no ID[:SECRET]@, consider environment variables.
599
117
        const char *v;
600
117
        if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
601
117
        if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
602
117
        if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
603
117
        if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
604
117
        if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
605
606
117
        if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile);
607
117
        else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile);
608
117
        else kputs("default", &ad->profile);
609
610
117
        if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) {
611
0
            if (strcasecmp(v, "virtual") == 0) {
612
0
                ad->url_style = s3_virtual;
613
0
            } else if (strcasecmp(v, "path") == 0) {
614
0
                ad->url_style = s3_path;
615
0
            }
616
0
        }
617
117
    }
618
619
247
    if (ad->id.l == 0) {
620
131
        kstring_t url_style = KS_INITIALIZE;
621
131
        kstring_t expiry_time = KS_INITIALIZE;
622
131
        const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
623
131
        parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
624
131
                  "aws_access_key_id", &ad->id,
625
131
                  "aws_secret_access_key", &ad->secret,
626
131
                  "aws_session_token", &ad->token,
627
131
                  "region", &ad->region,
628
131
                  "addressing_style", &url_style,
629
131
                  "expiry_time", &expiry_time,
630
131
                  NULL);
631
632
131
        if (url_style.l) {
633
0
            if (strcmp(url_style.s, "virtual") == 0) {
634
0
                ad->url_style = s3_virtual;
635
0
            } else if (strcmp(url_style.s, "path") == 0) {
636
0
                ad->url_style = s3_path;
637
0
            } else {
638
0
                ad->url_style = s3_auto;
639
0
            }
640
0
        }
641
131
        if (expiry_time.l) {
642
            // Not a real part of the AWS configuration file, but it allows
643
            // support for short-term credentials like those for the IAM
644
            // service.  The botocore library uses the key "expiry_time"
645
            // internally for this purpose.
646
            // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py
647
0
            ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
648
0
        }
649
650
131
        ks_free(&url_style);
651
131
        ks_free(&expiry_time);
652
131
    }
653
654
247
    if (ad->id.l == 0) {
655
131
        kstring_t url_style = KS_INITIALIZE;
656
131
        const char *v = getenv("HTS_S3_S3CFG");
657
131
        parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id,
658
131
                  "secret_key", &ad->secret, "access_token", &ad->token,
659
131
                  "host_base", &ad->host,
660
131
                  "bucket_location", &ad->region,
661
131
                  "host_bucket", &url_style,
662
131
                  NULL);
663
664
131
        if (url_style.l) {
665
            // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string
666
            // indicates use of path style adressing.
667
0
            if (strstr(url_style.s, "%(bucket)s") == NULL) {
668
0
                ad->url_style = s3_path;
669
0
            } else {
670
0
                ad->url_style = s3_auto;
671
0
            }
672
0
        }
673
674
131
        ks_free(&url_style);
675
131
    }
676
677
247
    if (ad->id.l == 0)
678
131
        parse_simple("~/.awssecret", &ad->id, &ad->secret);
679
680
681
    // if address_style is set, force the dns_compliant setting
682
247
    if (ad->url_style == s3_virtual) {
683
0
        dns_compliant = 1;
684
247
    } else if (ad->url_style == s3_path) {
685
0
        dns_compliant = 0;
686
247
    } else {
687
247
        dns_compliant = is_dns_compliant(bucket, path, is_https);
688
247
    }
689
690
247
    if (ad->host.l == 0)
691
247
        kputs("s3.amazonaws.com", &ad->host);
692
693
247
    if (!dns_compliant && ad->region.l > 0
694
0
        && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
695
        // Can avoid a redirection by including the region in the host name
696
        // (assuming the right one has been specified)
697
0
        ad->host.l = 0;
698
0
        ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
699
0
    }
700
701
247
    if (ad->region.l == 0)
702
247
        kputs("us-east-1", &ad->region);
703
704
247
    if (!is_escaped(path)) {
705
94
        escaped = escape_path(path);
706
94
        if (escaped == NULL) {
707
0
            goto error;
708
0
        }
709
94
    }
710
711
247
    bucket_len = path - bucket;
712
713
    // Use virtual hosted-style access if possible, otherwise path-style.
714
247
    if (dns_compliant) {
715
8
        size_t url_host_pos = url->l;
716
        // Append "bucket.host" to url
717
8
        kputsn_(bucket, bucket_len, url);
718
8
        kputc('.', url);
719
8
        kputsn(ad->host.s, ad->host.l, url);
720
8
        url_path_pos = url->l;
721
722
8
        if (sigver == 4) {
723
            // Copy back to ad->host to use when making the signature
724
8
            ad->host.l = 0;
725
8
            kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
726
8
        }
727
8
    }
728
239
    else {
729
        // Append "host/bucket" to url
730
239
        kputsn(ad->host.s, ad->host.l, url);
731
239
        url_path_pos = url->l;
732
239
        kputc('/', url);
733
239
        kputsn(bucket, bucket_len, url);
734
239
    }
735
736
247
    kputs(escaped == NULL ? path : escaped, url);
737
738
247
    if (sigver == 4 || !dns_compliant) {
739
247
        ad->bucket = malloc(url->l - url_path_pos + 1);
740
247
        if (ad->bucket == NULL) {
741
0
            goto error;
742
0
        }
743
247
        memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
744
247
        ad->is_v4 = 1;
745
247
    }
746
0
    else {
747
0
        ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
748
0
        if (ad->bucket == NULL) {
749
0
            goto error;
750
0
        }
751
0
        ad->bucket[0] = '/';
752
0
        memcpy(ad->bucket + 1, bucket, bucket_len);
753
0
        memcpy(ad->bucket + bucket_len + 1,
754
0
               url->s + url_path_pos, url->l - url_path_pos + 1);
755
0
        ad->is_v4 = 0;
756
0
    }
757
758
    // write any query strings to its own place to use later
759
247
    if ((query_start = strchr(ad->bucket, '?'))) {
760
64
        kputs(query_start + 1, &ad->user_query_string);
761
64
        *query_start = 0;
762
64
    }
763
764
247
    free(escaped);
765
766
247
    return ad;
767
768
0
 error:
769
0
    free(escaped);
770
0
    free_auth_data(ad);
771
0
    return NULL;
772
247
}
773
774
775
0
static int v2_authorisation(hFILE_s3 *fp, char *request) {
776
0
    s3_auth_data *ad = fp->au;
777
0
    time_t now = time(NULL);
778
779
0
#ifdef HAVE_GMTIME_R
780
0
    struct tm tm_buffer;
781
0
    struct tm *tm = gmtime_r(&now, &tm_buffer);
782
#else
783
    struct tm *tm = gmtime(&now);
784
#endif
785
786
0
    kstring_t message = KS_INITIALIZE;
787
0
    unsigned char digest[DIGEST_BUFSIZ];
788
0
    size_t digest_len;
789
790
0
    if (ad->creds_expiry_time > 0
791
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
792
0
        refresh_auth_data(ad);
793
0
    }
794
795
    // date format between v2 and v4 is different.
796
797
0
    strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
798
799
0
    kputs(ad->date, &fp->date);
800
801
0
    if (!ad->id.l || !ad->secret.l) {
802
0
        ad->auth_time = now;
803
0
        return 0;
804
0
    }
805
806
0
    if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
807
0
                 request, ad->date + 6,
808
0
                 ad->token.l ? "x-amz-security-token:" : "",
809
0
                 ad->token.l ? ad->token.s : "",
810
0
                 ad->token.l ? "\n" : "",
811
0
                 ad->bucket) < 0) {
812
0
        return -1;
813
0
    }
814
815
0
    digest_len = s3_sign(digest, &ad->secret, &message);
816
817
0
    if (ksprintf(&fp->authorisation, "Authorization: AWS %s:", ad->id.s) < 0)
818
0
        goto fail;
819
820
0
    base64_kput(digest, digest_len, &fp->authorisation);
821
822
0
    free(message.s);
823
0
    ad->auth_time = now;
824
0
    return 0;
825
826
0
 fail:
827
0
    free(message.s);
828
0
    return -1;
829
0
}
830
831
/***************************************************************
832
833
AWS S3 sig version 4 writing code
834
835
****************************************************************/
836
837
344
static void hash_string(char *in, size_t length, char *out, size_t out_len) {
838
344
    unsigned char hashed[SHA256_DIGEST_BUFSIZE];
839
344
    int i, j;
840
841
344
    s3_sha256((const unsigned char *)in, length, hashed);
842
843
11.3k
    for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
844
11.0k
        snprintf(out + j, out_len - j, "%02x", hashed[i]);
845
11.0k
    }
846
344
}
847
848
849
97
static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) {
850
97
    unsigned char date_key[SHA256_DIGEST_BUFSIZE];
851
97
    unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
852
97
    unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
853
97
    unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
854
97
    unsigned char signature[SHA256_DIGEST_BUFSIZE];
855
856
97
    const unsigned char service[] = "s3";
857
97
    const unsigned char request[] = "aws4_request";
858
859
97
    kstring_t secret_access_key = KS_INITIALIZE;
860
97
    unsigned int len;
861
97
    unsigned int i, j;
862
863
97
    ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
864
865
97
    if (secret_access_key.l == 0) {
866
0
        return -1;
867
0
    }
868
869
97
    s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
870
97
    s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
871
97
    s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
872
97
    s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
873
97
    s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
874
875
3.20k
    for (i = 0, j = 0; i < len; i++, j+= 2) {
876
3.10k
        snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]);
877
3.10k
    }
878
879
97
    ks_free(&secret_access_key);
880
881
97
    return 0;
882
97
}
883
884
885
247
static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
886
247
    kstring_t signed_headers = KS_INITIALIZE;
887
247
    kstring_t canonical_headers = KS_INITIALIZE;
888
247
    kstring_t canonical_request = KS_INITIALIZE;
889
247
    kstring_t scope = KS_INITIALIZE;
890
247
    kstring_t string_to_sign = KS_INITIALIZE;
891
247
    char cr_hash[HASH_LENGTH_SHA256];
892
247
    char signature_string[HASH_LENGTH_SHA256];
893
247
    int ret = -1;
894
895
247
    if (!ad->id.l || !ad->secret.l) {
896
150
        return 0;
897
150
    }
898
899
97
    if (!ad->token.l) {
900
33
        kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
901
64
    } else {
902
64
        kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
903
64
    }
904
905
97
    if (signed_headers.l == 0) {
906
0
        return -1;
907
0
    }
908
909
910
97
    if (!ad->token.l) {
911
33
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
912
33
        ad->host.s, content, ad->date_long);
913
64
    } else {
914
64
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
915
64
        ad->host.s, content, ad->date_long, ad->token.s);
916
64
    }
917
918
97
    if (canonical_headers.l == 0) {
919
0
        goto cleanup;
920
0
    }
921
922
    // bucket == canonical_uri
923
97
    ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
924
97
        http_request, ad->bucket, ad->canonical_query_string.s,
925
97
        canonical_headers.s, signed_headers.s, content);
926
927
97
    if (canonical_request.l == 0) {
928
0
        goto cleanup;
929
0
    }
930
931
97
    hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash));
932
933
97
    ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
934
935
97
    if (scope.l == 0) {
936
0
        goto cleanup;
937
0
    }
938
939
97
    ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
940
941
97
    if (string_to_sign.l == 0) {
942
0
        goto cleanup;
943
0
    }
944
945
97
    if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) {
946
0
        goto cleanup;
947
0
    }
948
949
97
    ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
950
97
                ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
951
952
97
    if (auth->l == 0) {
953
0
        goto cleanup;
954
0
    }
955
956
97
    ret = 0;
957
958
97
 cleanup:
959
97
    ks_free(&signed_headers);
960
97
    ks_free(&canonical_headers);
961
97
    ks_free(&canonical_request);
962
97
    ks_free(&scope);
963
97
    ks_free(&string_to_sign);
964
965
97
    return ret;
966
97
}
967
968
969
247
static int update_time(s3_auth_data *ad, time_t now) {
970
247
    int ret = -1;
971
247
#ifdef HAVE_GMTIME_R
972
247
    struct tm tm_buffer;
973
247
    struct tm *tm = gmtime_r(&now, &tm_buffer);
974
#else
975
    struct tm *tm = gmtime(&now);
976
#endif
977
978
247
    if (now - ad->auth_time > AUTH_LIFETIME) {
979
        // update timestamp
980
247
        ad->auth_time = now;
981
982
247
        if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
983
0
            return -1;
984
0
        }
985
986
247
        if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
987
0
            return -1;;
988
0
        }
989
990
247
        ad->date_html.l = 0;
991
247
        ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
992
247
    }
993
994
247
    if (ad->date_html.l) ret = 0;
995
996
247
    return ret;
997
247
}
998
999
1000
0
static int query_cmp(const void *p1, const void *p2) {
1001
0
    char **q1 = (char **)p1;
1002
0
    char **q2 = (char **)p2;
1003
1004
0
    return strcmp(*q1, *q2);
1005
0
}
1006
1007
1008
/* Query strings must be in alphabetical order for authorisation */
1009
1010
0
static int order_query_string(kstring_t *qs) {
1011
0
    int *query_offset = NULL;
1012
0
    int num_queries, i;
1013
0
    char **queries = NULL;
1014
0
    kstring_t ordered = KS_INITIALIZE;
1015
0
    char *escaped = NULL;
1016
0
    int ret = -1;
1017
1018
0
    if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
1019
0
        return -1;
1020
0
    }
1021
1022
0
    if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
1023
0
        goto err;
1024
1025
0
    for (i = 0; i < num_queries; i++) {
1026
0
        queries[i] = qs->s + query_offset[i];
1027
0
    }
1028
1029
0
    qsort(queries, num_queries, sizeof(char *), query_cmp);
1030
1031
0
    for (i = 0; i < num_queries; i++) {
1032
0
        if (i) {
1033
0
            kputs("&", &ordered);
1034
0
        }
1035
1036
0
        kputs(queries[i], &ordered);
1037
0
    }
1038
1039
0
    if ((escaped = escape_query(ordered.s)) == NULL)
1040
0
        goto err;
1041
1042
0
    qs->l = 0;
1043
0
    kputs(escaped, qs);
1044
1045
0
    ret = 0;
1046
0
 err:
1047
0
    free(ordered.s);
1048
0
    free(queries);
1049
0
    free(query_offset);
1050
0
    free(escaped);
1051
1052
0
    return ret;
1053
0
}
1054
1055
1056
247
static int v4_authorisation(hFILE_s3 *fp, char *request, kstring_t *content, char *cqs, int uqs) {
1057
247
    s3_auth_data *ad = fp->au;
1058
247
    char content_hash[HASH_LENGTH_SHA256];
1059
247
    time_t now;
1060
1061
247
    now = time(NULL);
1062
1063
247
    if (update_time(ad, now)) {
1064
0
        return -1;
1065
0
    }
1066
1067
247
    if (ad->creds_expiry_time > 0
1068
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1069
0
        refresh_auth_data(ad);
1070
0
    }
1071
1072
247
    if (content) {
1073
0
        hash_string(content->s, content->l, content_hash, sizeof(content_hash));
1074
247
    } else {
1075
        // empty hash
1076
247
        hash_string("", 0, content_hash, sizeof(content_hash));
1077
247
    }
1078
1079
247
    ad->canonical_query_string.l = 0;
1080
1081
247
    if (cqs) {
1082
247
        kputs(cqs, &ad->canonical_query_string);
1083
1084
        /* add a user provided query string, normally only useful on upload initiation */
1085
247
        if (uqs) {
1086
0
            kputs("&", &ad->canonical_query_string);
1087
0
            kputs(ad->user_query_string.s, &ad->canonical_query_string);
1088
1089
0
            if (order_query_string(&ad->canonical_query_string)) {
1090
0
                return -1;
1091
0
            }
1092
0
        }
1093
247
    }
1094
1095
247
    if (make_authorisation(ad, request, content_hash, &fp->authorisation)) {
1096
0
        return -1;
1097
0
    }
1098
1099
247
    kputs(ad->date_html.s, &fp->date);
1100
247
    kputsn(content_hash, HASH_LENGTH_SHA256, &fp->content_hash);
1101
1102
247
    if (fp->date.l == 0 || fp->content_hash.l == 0) {
1103
0
        return -1;
1104
0
    }
1105
1106
247
    if (ad->token.l) {
1107
87
        ksprintf(&fp->token, "x-amz-security-token: %s", ad->token.s);
1108
87
    }
1109
1110
247
    return 0;
1111
247
}
1112
1113
0
static int set_region(s3_auth_data *ad, kstring_t *region) {
1114
0
    ad->region.l = 0;
1115
0
    return kputsn(region->s, region->l, &ad->region) < 0;
1116
0
}
1117
1118
//
1119
// Writing and reading handling
1120
//
1121
1122
// Some common code
1123
1124
18
#define S3_MOVED_PERMANENTLY 301
1125
9
#define S3_TEMPORARY_REDIRECT 307
1126
9
#define S3_BAD_REQUEST 400
1127
1128
static struct {
1129
    kstring_t useragent;
1130
    CURLSH *share;
1131
    pthread_mutex_t share_lock;
1132
} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER };
1133
1134
static void share_lock(CURL *handle, curl_lock_data data,
1135
1
                       curl_lock_access access, void *userptr) {
1136
1
    pthread_mutex_lock(&curl.share_lock);
1137
1
}
1138
1139
1
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
1140
1
    pthread_mutex_unlock(&curl.share_lock);
1141
1
}
1142
1143
1144
247
static void initialise_authorisation_values(hFILE_s3 *fp) {
1145
247
    ks_initialize(&fp->content_hash);
1146
247
    ks_initialize(&fp->authorisation);
1147
247
    ks_initialize(&fp->content);
1148
247
    ks_initialize(&fp->date);
1149
247
    ks_initialize(&fp->token);
1150
247
    ks_initialize(&fp->range);
1151
247
}
1152
1153
1154
247
static void clear_authorisation_values(hFILE_s3 *fp) {
1155
247
    ks_clear(&fp->content_hash);
1156
247
    ks_clear(&fp->authorisation);
1157
247
    ks_clear(&fp->content);
1158
247
    ks_clear(&fp->date);
1159
247
    ks_clear(&fp->token);
1160
247
    ks_clear(&fp->range);
1161
247
}
1162
1163
1164
494
static void free_authorisation_values(hFILE_s3 *fp) {
1165
494
    ks_free(&fp->content_hash);
1166
494
    ks_free(&fp->authorisation);
1167
494
    ks_free(&fp->content);
1168
494
    ks_free(&fp->date);
1169
494
    ks_free(&fp->token);
1170
494
    ks_free(&fp->range);
1171
494
}
1172
1173
/* As the response text is case insensitive we need a version of strstr that
1174
   is also case insensitive.  The response is small so no need to get too
1175
   complicated on the string search.
1176
*/
1177
9
static char *stristr(char *haystack, char *needle) {
1178
1179
4.05k
    while (*haystack) {
1180
4.04k
        char *h = haystack;
1181
4.04k
        char *n = needle;
1182
1183
4.18k
        while (toupper_c(*h) == toupper_c(*n)) {
1184
135
            h++, n++;
1185
135
            if (!*h || !*n) break;
1186
135
        }
1187
1188
4.04k
        if (!*n) break;
1189
1190
4.04k
        haystack++;
1191
4.04k
    }
1192
1193
9
    if (!*haystack) return NULL;
1194
1195
0
    return haystack;
1196
9
}
1197
1198
1199
9
static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) {
1200
9
    char *start;
1201
9
    char *end;
1202
1203
9
    if (!in) {
1204
0
        return EOF;
1205
0
    }
1206
1207
9
    start = stristr(in, start_tag);
1208
9
    if (!start) return EOF;
1209
1210
0
    start += strlen(start_tag);
1211
0
    end = stristr(start, end_tag);
1212
1213
0
    if (!end) return EOF;
1214
1215
0
    return kputsn(start, end - start, out);
1216
0
}
1217
1218
1219
0
static int report_s3_error(kstring_t *body, long resp_code) {
1220
0
    kstring_t entry = KS_INITIALIZE;
1221
1222
0
    if (get_entry(body->s, "<Code>", "</Code>", &entry) == EOF) {
1223
0
        return -1;
1224
0
    }
1225
1226
0
    fprintf(stderr, "hfile_s3: S3 error %ld: %s\n", resp_code, entry.s);
1227
1228
0
    ks_clear(&entry);
1229
1230
0
    if (get_entry(body->s, "<Message>", "</Message>", &entry) == EOF) {
1231
0
        return -1;
1232
0
    }
1233
1234
0
    if (entry.l)
1235
0
        fprintf(stderr, "%s\n", entry.s);
1236
1237
0
    ks_free(&entry);
1238
1239
0
    return 0;
1240
0
}
1241
1242
1243
static int http_status_errno(int status)
1244
9
{
1245
9
    if (status >= 500)
1246
0
        switch (status) {
1247
0
        case 501: return ENOSYS;
1248
0
        case 503: return EBUSY;
1249
0
        case 504: return ETIMEDOUT;
1250
0
        default:  return EIO;
1251
0
        }
1252
9
    else if (status >= 400)
1253
9
        switch (status) {
1254
0
        case 401: return EPERM;
1255
0
        case 403: return EACCES;
1256
0
        case 404: return ENOENT;
1257
0
        case 405: return EROFS;
1258
0
        case 407: return EPERM;
1259
0
        case 408: return ETIMEDOUT;
1260
0
        case 410: return ENOENT;
1261
9
        default:  return EINVAL;
1262
9
        }
1263
0
    else if (status >= 300)
1264
0
        return EIO;
1265
0
    else return 0;
1266
9
}
1267
1268
1269
247
static void initialise_local(hFILE_s3 *fp) {
1270
247
    ks_initialize(&fp->buffer);
1271
247
    ks_initialize(&fp->url);
1272
247
    ks_initialize(&fp->upload_id);           // write only
1273
247
    ks_initialize(&fp->completion_message);  // write only
1274
247
}
1275
1276
1277
247
static void cleanup_local(hFILE_s3 *fp) {
1278
247
    ks_free(&fp->buffer);
1279
247
    ks_free(&fp->url);
1280
247
    ks_free(&fp->upload_id);
1281
247
    ks_free(&fp->completion_message);
1282
247
    curl_easy_cleanup(fp->curl);
1283
247
    free_authorisation_values(fp);
1284
247
}
1285
1286
1287
0
static void cleanup(hFILE_s3 *fp) {
1288
    // free up authorisation data
1289
0
    free_auth_data(fp->au);
1290
0
    cleanup_local(fp);
1291
0
}
1292
1293
81
static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) {
1294
81
    size_t realsize = size * nmemb;
1295
81
    kstring_t *resp = (kstring_t *)userp;
1296
1297
81
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
1298
0
        return 0;
1299
0
    }
1300
1301
81
    return realsize;
1302
81
}
1303
1304
1305
1.41k
static int add_header(struct curl_slist **head, char *value) {
1306
1.41k
    int err = 0;
1307
1.41k
    struct curl_slist *tmp;
1308
1309
1.41k
    if ((tmp = curl_slist_append(*head, value)) == NULL) {
1310
0
        err = 1;
1311
1.41k
    } else {
1312
1.41k
        *head = tmp;
1313
1.41k
    }
1314
1315
1.41k
    return err;
1316
1.41k
}
1317
1318
1319
static struct curl_slist *set_html_headers(hFILE_s3 *fp, kstring_t *auth, kstring_t *date,
1320
247
                 kstring_t *content, kstring_t *token, kstring_t *range) {
1321
247
    struct curl_slist *headers = NULL;
1322
247
    int err = 0;
1323
1324
    /* The next two lines have the effect of preventing curl from
1325
       adding these headers.  If they exist it can lead to conflicts
1326
       in the signature calculations (not present in all S3 systems).
1327
    */
1328
247
    err = add_header(&headers, "Content-Type:");
1329
247
    err |= add_header(&headers, "Expect:");
1330
1331
247
    if (err) goto error;
1332
1333
247
    if (auth->l)
1334
97
        if ((err = add_header(&headers, auth->s)))
1335
0
            goto error;
1336
1337
247
    if ((err = add_header(&headers, date->s)))
1338
0
        goto error;
1339
1340
247
    if (content->l)
1341
247
        if ((err = add_header(&headers, content->s)))
1342
0
            goto error;
1343
1344
247
    if (range)
1345
247
        if ((err = add_header(&headers, range->s)))
1346
0
            goto error;
1347
1348
247
    if (token->l)
1349
87
        if ((err = add_header(&headers, token->s)))
1350
0
            goto error;
1351
1352
247
    curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers);
1353
1354
247
error:
1355
1356
247
    if (err) {
1357
0
        curl_slist_free_all(headers);
1358
0
        headers = NULL;
1359
0
    }
1360
1361
247
    return headers;
1362
247
}
1363
1364
1365
/*
1366
1367
S3 Multipart Upload
1368
-------------------
1369
1370
There are several steps in the Mulitipart upload.
1371
1372
1373
1) Initiate Upload
1374
------------------
1375
1376
Initiate the upload and get an upload ID.  This ID is used in all other steps.
1377
1378
1379
2) Upload Part
1380
--------------
1381
1382
Upload a part of the data.  5Mb minimum part size (except for the last part).
1383
Each part is numbered and a successful upload returns an Etag header value that
1384
needs to used for the completion step.
1385
1386
Step repeated till all data is uploaded.
1387
1388
1389
3) Completion
1390
-------------
1391
1392
Complete the upload by sending all the part numbers along with their associated
1393
Etag values.
1394
1395
1396
Optional - Abort
1397
----------------
1398
1399
If something goes wrong this instructs the server to delete all the partial
1400
uploads and abandon the upload process.
1401
*/
1402
1403
/*
1404
   This is the writing code.
1405
*/
1406
1407
0
#define MINIMUM_S3_WRITE_SIZE 5242880
1408
1409
// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size.
1410
// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0)
1411
0
#define EXPAND_ON 1112
1412
1413
1414
1415
/*
1416
    The partially uploaded file will hang around unless the delete command is sent.
1417
*/
1418
0
static int abort_upload(hFILE_s3 *fp) {
1419
0
    kstring_t url = KS_INITIALIZE;
1420
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1421
0
    int ret = -1, save_errno;
1422
0
    struct curl_slist *headers = NULL;
1423
0
    char http_request[] = "DELETE";
1424
0
    CURLcode err;
1425
1426
0
    save_errno = errno; // keep the errno that caused the need to abort
1427
1428
0
    clear_authorisation_values(fp);
1429
1430
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
1431
0
        goto out;
1432
0
    }
1433
1434
0
    if (v4_authorisation(fp,  http_request, NULL, canonical_query_string.s, 0) != 0) {
1435
0
        goto out;
1436
0
    }
1437
1438
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1439
0
        goto out;
1440
0
    }
1441
1442
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1443
0
        goto out;
1444
0
    }
1445
1446
0
    curl_easy_reset(fp->curl);
1447
1448
0
    err = curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request);
1449
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1450
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1451
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1452
1453
0
    if (err != CURLE_OK)
1454
0
        goto out;
1455
1456
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1457
1458
0
    if (!headers)
1459
0
        goto out;
1460
1461
0
    fp->ret = curl_easy_perform(fp->curl);
1462
1463
0
    if (fp->ret == CURLE_OK) {
1464
0
        ret = 0;
1465
0
    }
1466
1467
0
 out:
1468
0
    ks_free(&url);
1469
0
    ks_free(&canonical_query_string);
1470
0
    curl_slist_free_all(headers);
1471
1472
0
    fp->aborted = 1;
1473
0
    cleanup(fp);
1474
1475
0
    errno = save_errno;
1476
0
    return ret;
1477
0
}
1478
1479
1480
0
static int complete_upload(hFILE_s3 *fp, kstring_t *resp) {
1481
0
    kstring_t url = KS_INITIALIZE;
1482
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1483
0
    int ret = -1;
1484
0
    struct curl_slist *headers = NULL;
1485
0
    char http_request[] = "POST";
1486
0
    CURLcode err;
1487
1488
0
    clear_authorisation_values(fp);
1489
1490
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
1491
0
        return -1;
1492
0
    }
1493
1494
    // finish off the completion reply
1495
0
    if (kputs("</CompleteMultipartUpload>\n", &fp->completion_message) < 0) {
1496
0
        goto out;
1497
0
    }
1498
1499
0
    if (v4_authorisation(fp,  http_request, &fp->completion_message, canonical_query_string.s, 0) != 0) {
1500
0
        goto out;
1501
0
    }
1502
1503
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1504
0
        goto out;
1505
0
    }
1506
1507
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1508
0
        goto out;
1509
0
    }
1510
1511
0
    curl_easy_reset(fp->curl);
1512
1513
0
    err = curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
1514
1515
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s);
1516
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l);
1517
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
1518
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
1519
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1520
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1521
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1522
1523
0
    if (err != CURLE_OK)
1524
0
        goto out;
1525
1526
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1527
1528
0
    if (!headers)
1529
0
        goto out;
1530
1531
0
    fp->ret = curl_easy_perform(fp->curl);
1532
1533
0
    if (fp->ret == CURLE_OK) {
1534
0
        ret = 0;
1535
0
    }
1536
1537
0
 out:
1538
0
    ks_free(&url);
1539
0
    ks_free(&canonical_query_string);
1540
0
    curl_slist_free_all(headers);
1541
1542
0
    return ret;
1543
0
}
1544
1545
1546
0
static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) {
1547
0
    size_t realsize = size * nmemb;
1548
0
    hFILE_s3 *fp = (hFILE_s3 *)stream;
1549
0
    size_t read_length;
1550
1551
0
    if (realsize > (fp->buffer.l - fp->index)) {
1552
0
        read_length = fp->buffer.l - fp->index;
1553
0
    } else {
1554
0
        read_length = realsize;
1555
0
    }
1556
1557
0
    memcpy(ptr, fp->buffer.s + fp->index, read_length);
1558
0
    fp->index += read_length;
1559
1560
0
    return read_length;
1561
0
}
1562
1563
1564
0
static int upload_part(hFILE_s3 *fp, kstring_t *resp) {
1565
0
    kstring_t url = KS_INITIALIZE;
1566
0
    kstring_t canonical_query_string = KS_INITIALIZE;
1567
0
    int ret = -1;
1568
0
    struct curl_slist *headers = NULL;
1569
0
    char http_request[] = "PUT";
1570
0
    CURLcode err;
1571
1572
0
    clear_authorisation_values(fp);
1573
1574
0
    if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) {
1575
0
        return -1;
1576
0
    }
1577
1578
0
    if (v4_authorisation(fp, http_request, &fp->buffer, canonical_query_string.s, 0) != 0) {
1579
0
        goto out;
1580
0
    }
1581
1582
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
1583
0
        goto out;
1584
0
    }
1585
1586
0
    fp->index = 0;
1587
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1588
0
        goto out;
1589
0
    }
1590
1591
0
    curl_easy_reset(fp->curl);
1592
1593
0
    err = curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L);
1594
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback);
1595
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp);
1596
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l);
1597
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1598
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
1599
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1600
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1601
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1602
1603
0
    if (err != CURLE_OK)
1604
0
        goto out;
1605
1606
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1607
1608
0
    if (!headers)
1609
0
        goto out;
1610
1611
0
    fp->ret = curl_easy_perform(fp->curl);
1612
1613
0
    if (fp->ret == CURLE_OK) {
1614
0
        ret = 0;
1615
0
    }
1616
1617
0
 out:
1618
0
    ks_free(&url);
1619
0
    ks_free(&canonical_query_string);
1620
0
    curl_slist_free_all(headers);
1621
1622
0
    return ret;
1623
0
}
1624
1625
1626
0
static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) {
1627
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1628
0
    const char *buffer  = (const char *)bufferv;
1629
0
    CURLcode cret;
1630
1631
0
    if (kputsn(buffer, nbytes, &fp->buffer) == EOF) {
1632
0
        return -1;
1633
0
    }
1634
1635
0
    if (fp->buffer.l > fp->part_size) {
1636
        // time to write out our data
1637
0
        kstring_t response = {0, 0, NULL};
1638
0
        int ret;
1639
1640
0
        ret = upload_part(fp, &response);
1641
1642
0
        if (!ret) {
1643
0
            long response_code;
1644
0
            kstring_t etag = {0, 0, NULL};
1645
1646
0
            cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1647
1648
0
            if (cret != CURLE_OK || response_code > 200) {
1649
0
                errno = http_status_errno(response_code);
1650
0
                ret = -1;
1651
0
            } else {
1652
0
                if (get_entry(response.s, "Etag: \"", "\"", &etag) == EOF) {
1653
0
                    fprintf(stderr, "hfile_s3: Failed to read Etag\n");
1654
0
                    ret = -1;
1655
0
                } else {
1656
0
                    ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
1657
0
                        fp->part_no, etag.s);
1658
1659
0
                    ks_free(&etag);
1660
0
                }
1661
0
            }
1662
0
        }
1663
1664
0
        ks_free(&response);
1665
1666
0
        if (ret) {
1667
0
            abort_upload(fp);
1668
0
            return -1;
1669
0
        }
1670
1671
0
        fp->part_no++;
1672
0
        fp->buffer.l = 0;
1673
1674
0
        if (fp->expand && (fp->part_no % EXPAND_ON == 0)) {
1675
0
            fp->part_size *= 2;
1676
0
        }
1677
0
    }
1678
1679
0
    return nbytes;
1680
0
}
1681
1682
1683
0
static int s3_write_close(hFILE *fpv) {
1684
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1685
0
    kstring_t response = {0, 0, NULL};
1686
0
    int ret = 0;
1687
0
    CURLcode cret;
1688
0
    long response_code;
1689
1690
0
    if (!fp->aborted) {
1691
1692
0
        if (fp->buffer.l) {
1693
            // write the last part
1694
1695
0
            ret = upload_part(fp, &response);
1696
1697
0
            if (!ret) {
1698
0
                kstring_t etag = {0, 0, NULL};
1699
1700
0
                cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1701
1702
0
                if (cret != CURLE_OK || response_code > 200) {
1703
0
                    errno = http_status_errno(response_code);
1704
0
                    ret = -1;
1705
0
                } else {
1706
0
                    if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
1707
0
                        ret = -1;
1708
0
                    } else {
1709
0
                        ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
1710
0
                            fp->part_no, etag.s);
1711
1712
0
                        ks_free(&etag);
1713
0
                    }
1714
0
                }
1715
0
            }
1716
1717
0
            ks_free(&response);
1718
1719
0
            if (ret) {
1720
0
                abort_upload(fp);
1721
0
                return -1;
1722
0
            }
1723
1724
0
            fp->part_no++;
1725
0
        }
1726
1727
0
        if (fp->part_no > 1) {
1728
0
            ret = complete_upload(fp, &response);
1729
1730
0
            if (!ret) {
1731
0
                if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) {
1732
0
                    ret = -1;
1733
0
                    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1734
1735
0
                    if (cret == CURLE_OK) {
1736
0
                        if (hts_verbose >= HTS_LOG_INFO) {
1737
0
                            if (report_s3_error(&response, response_code)) {
1738
0
                                fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
1739
0
                            }
1740
0
                        }
1741
1742
0
                        errno = http_status_errno(response_code);
1743
0
                    }
1744
0
                }
1745
0
            }
1746
0
        } else {
1747
0
            ret = -1;
1748
0
        }
1749
1750
0
        if (ret) {
1751
0
            abort_upload(fp);
1752
0
        } else {
1753
0
            cleanup(fp);
1754
0
        }
1755
0
    }
1756
1757
0
    ks_free(&response);
1758
1759
0
    return ret;
1760
0
}
1761
1762
1763
9
static int handle_bad_request(hFILE_s3 *fp, kstring_t *resp) {
1764
9
    kstring_t region = {0, 0, NULL};
1765
9
    int ret = -1;
1766
1767
9
    if (get_entry(resp->s, "<Region>", "</Region>", &region) == EOF) {
1768
9
        return -1;
1769
9
    }
1770
1771
0
    ret = set_region(fp->au, &region);
1772
1773
0
    ks_free(&region);
1774
1775
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: handle_bad_request: return %d\n", ret);
1776
1777
0
    return ret;
1778
9
}
1779
1780
0
static int initialise_upload(hFILE_s3 *fp, kstring_t *head, kstring_t *resp, int user_query) {
1781
0
    kstring_t url = KS_INITIALIZE;
1782
0
    int ret = -1;
1783
0
    struct curl_slist *headers = NULL;
1784
0
    char http_request[] = "POST";
1785
0
    char delimiter = '?';
1786
0
    CURLcode err;
1787
1788
0
    clear_authorisation_values(fp);
1789
1790
0
    if (user_query) {
1791
0
        delimiter = '&';
1792
0
    }
1793
1794
0
    if (v4_authorisation(fp, http_request, NULL, "uploads=", user_query) != 0) {
1795
0
        goto out;
1796
0
    }
1797
1798
0
    if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) {
1799
0
        goto out;
1800
0
    }
1801
1802
0
    if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1803
0
        goto out;
1804
0
    }
1805
1806
0
    err = curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
1807
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
1808
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, "");  // send no data
1809
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
1810
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
1811
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1812
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head);
1813
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1814
0
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1815
1816
0
    if (err != CURLE_OK)
1817
0
        goto out;
1818
1819
0
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, NULL);
1820
1821
0
    if (!headers)
1822
0
        goto out;
1823
1824
0
    fp->ret = curl_easy_perform(fp->curl);
1825
1826
0
    if (fp->ret == CURLE_OK) {
1827
0
        ret = 0;
1828
0
    }
1829
1830
0
 out:
1831
0
    curl_slist_free_all(headers);
1832
0
    ks_free(&url);
1833
1834
0
    return ret;
1835
0
}
1836
1837
1838
0
static int get_upload_id(hFILE_s3 *fp, kstring_t *resp) {
1839
0
    int ret = 0;
1840
1841
0
    if (get_entry(resp->s, "<UploadId>", "</UploadId>", &fp->upload_id) == EOF) {
1842
0
        ret = -1;
1843
0
    }
1844
1845
0
    return ret;
1846
0
}
1847
1848
1849
/*
1850
    Now for the reading code
1851
*/
1852
1853
247
#define READ_PART_SIZE 1048576
1854
1855
9
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv) {
1856
9
    hFILE_s3 *fp = (hFILE_s3 *) fpv;
1857
9
    size_t n = size * nmemb;
1858
1859
9
    if (n) {
1860
9
        if (kputsn(ptr, n, &fp->buffer) == EOF) {
1861
0
            fprintf(stderr, "hfile_s3: error: unable to allocate memory to read data.\n");
1862
0
            return 0;
1863
0
        }
1864
9
    }
1865
1866
9
    return n;
1867
9
}
1868
1869
1870
0
static int s3_read_close(hFILE *fpv) {
1871
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1872
1873
0
    cleanup(fp);
1874
1875
0
    return 0;
1876
0
}
1877
1878
1879
247
static int get_part(hFILE_s3 *fp, kstring_t *resp) {
1880
247
    struct curl_slist *headers = NULL;
1881
247
    int ret = -1;
1882
247
    char http_request[] = "GET";
1883
247
    CURLcode err;
1884
1885
247
    ks_clear(&fp->buffer); // reset storage buffer
1886
247
    clear_authorisation_values(fp);
1887
1888
247
    if (fp->au->is_v4) {
1889
247
        if (v4_authorisation(fp, http_request, NULL, "", 0) != 0) {
1890
0
            goto out;
1891
0
        }
1892
1893
247
        if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part: v4 auth done\n");
1894
1895
247
        if (ksprintf(&fp->content, "x-amz-content-sha256: %s", fp->content_hash.s) < 0) {
1896
0
            goto out;
1897
0
        }
1898
247
    } else {
1899
0
        if (v2_authorisation(fp, http_request) != 0) {
1900
0
            goto out;
1901
0
        }
1902
1903
0
        if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part v2 auth done\n");
1904
0
    }
1905
1906
247
    if (ksprintf(&fp->range, "Range: bytes=%zu-%zu", fp->last_read, fp->last_read + fp->part_size - 1) < 0) {
1907
0
        goto out;
1908
0
    }
1909
1910
247
    if (hts_verbose >= HTS_LOG_INFO) {
1911
0
        fprintf(stderr, "hfile_s3: get_part: range set %s\n", fp->range.s);
1912
0
        fprintf(stderr, "hfile_s3: url %s\n", fp->url.s);
1913
0
    }
1914
1915
247
    curl_easy_reset(fp->curl);
1916
1917
247
    err = curl_easy_setopt(fp->curl, CURLOPT_URL, fp->url.s);
1918
247
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, recv_callback);
1919
247
    err |= curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)fp);
1920
247
    err |= curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
1921
247
    err |= curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
1922
1923
247
    if (resp) {
1924
247
        err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
1925
247
        err |= curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
1926
247
    }
1927
1928
247
    if (err != CURLE_OK)
1929
0
        goto out;
1930
1931
247
    headers = set_html_headers(fp, &fp->authorisation, &fp->date, &fp->content, &fp->token, &fp->range);
1932
1933
247
    if (!headers)
1934
0
        goto out;
1935
1936
247
    fp->ret = curl_easy_perform(fp->curl);
1937
1938
247
    if (fp->ret == CURLE_OK) {
1939
9
        ret = 0;
1940
9
    }
1941
1942
247
out:
1943
247
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: get_part: ret %d\n", ret);
1944
247
    curl_slist_free_all(headers);
1945
1946
247
    return ret;
1947
247
}
1948
1949
1950
0
static ssize_t s3_read(hFILE *fpv, void *bufferv, size_t nbytes) {
1951
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
1952
0
    char *buffer = (char *)bufferv;
1953
0
    size_t got = 0;
1954
1955
    /* Transfer data from the fp->buffer to the calling buffer.
1956
       If there is no data left in the fp->buffer, grab another chunk of
1957
       data from s3.
1958
    */
1959
0
    while (fp->keep_going && got < nbytes) {
1960
1961
0
        if (fp->buffer.l && fp->last_read_buffer < fp->buffer.l) {
1962
            // copy data across
1963
0
            size_t to_copy;
1964
0
            size_t remaining = fp->buffer.l - fp->last_read_buffer;
1965
0
            size_t bytes_left = nbytes - got;
1966
1967
0
            if (hts_verbose >  HTS_LOG_INFO) fprintf(stderr, "hfile_s3: read - remaining %zu read %zu bytes_left %zu, nbytes %zu\n", remaining, got, bytes_left, nbytes);
1968
1969
0
            if (bytes_left < remaining) {
1970
0
                to_copy = bytes_left;
1971
0
            } else {
1972
0
                to_copy = remaining;
1973
0
            }
1974
1975
0
            memcpy(buffer + got, fp->buffer.s + fp->last_read_buffer, to_copy);
1976
0
            got += to_copy;
1977
0
            fp->last_read_buffer += to_copy;
1978
1979
0
            if ((fp->buffer.l < fp->part_size) && (fp->last_read_buffer == fp->buffer.l)) {
1980
0
                fp->keep_going = 0;
1981
0
            }
1982
0
        } else {
1983
0
            int ret;
1984
1985
0
            ret = get_part(fp, NULL);
1986
1987
0
            if (!ret) {
1988
0
                long response_code;
1989
0
                CURLcode cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
1990
1991
0
                if (cret != CURLE_OK || response_code > 300) {
1992
0
                    errno = http_status_errno(response_code);
1993
0
                    ret = -1;
1994
0
                }
1995
0
            }
1996
1997
0
            if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: read - read error %d\n", ret);
1998
1999
0
            if (ret < 0)
2000
0
                return ret;
2001
2002
0
            if (fp->buffer.l == 0) {
2003
0
                fp->keep_going = 0;
2004
0
                break;
2005
0
            }
2006
2007
0
            fp->last_read_buffer = 0;
2008
0
            fp->last_read = fp->last_read + fp->buffer.l;
2009
0
        }
2010
0
    }
2011
2012
0
    return got;
2013
0
}
2014
2015
2016
0
static off_t s3_seek(hFILE *fpv, off_t offset, int whence) {
2017
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
2018
0
    off_t origin;
2019
2020
0
    if (fp->write) {
2021
        // lets not try and seek while writing
2022
0
        errno = ESPIPE;
2023
0
        return -1;
2024
0
    }
2025
2026
    // I am not sure we handle any seek other than one from the beginning
2027
0
    switch (whence) {
2028
0
        case SEEK_SET:
2029
0
            origin = 0;
2030
0
            break;
2031
0
        case SEEK_CUR:
2032
            // hseek() should convert this to SEEK_SET
2033
0
            errno = ENOSYS;
2034
0
            return -1;
2035
0
        case SEEK_END:
2036
0
            if (fp->file_size < 0) {
2037
0
                errno = ESPIPE;
2038
0
                return -1;
2039
0
            }
2040
2041
0
            origin = fp->file_size;
2042
0
            break;
2043
0
        default:
2044
0
            errno = EINVAL;
2045
0
            return -1;
2046
0
    }
2047
2048
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
2049
0
    if ((offset < 0)? origin + offset < 0
2050
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
2051
0
        errno = EINVAL;
2052
0
        return -1;
2053
0
    }
2054
2055
0
    fp->keep_going = 1;
2056
2057
0
    size_t pos = origin + offset; // origin is really only useful if we can make the other modes work
2058
2059
0
    if (pos <= fp->last_read && pos > (fp->last_read - fp->buffer.l)) {
2060
        // within the current local buffer
2061
0
        fp->last_read_buffer = pos - (fp->last_read - fp->buffer.l);
2062
0
    } else {
2063
0
        fp->last_read = pos;
2064
0
        ks_clear(&fp->buffer); // resetting fp->buffer triggers a new remote read
2065
0
    }
2066
2067
0
    return fp->last_read;
2068
0
}
2069
2070
2071
/*
2072
    Unlike upload, download does not really need an initialisation.  Here we use it to
2073
    get the size of the wanted files and as a test for redirects.
2074
*/
2075
247
static int initialise_download(hFILE_s3 *fp, kstring_t *resp) {
2076
2077
247
    fp->last_read = 0;
2078
247
    ks_clear(resp);
2079
2080
247
    return get_part(fp, resp);
2081
247
}
2082
2083
2084
0
static int s3_close(hFILE *fpv) {
2085
0
    hFILE_s3 *fp = (hFILE_s3 *)fpv;
2086
0
    int ret;
2087
2088
0
    if (!fp->write) {
2089
0
        ret = s3_read_close(fpv);
2090
0
    } else {
2091
0
        ret = s3_write_close(fpv);
2092
0
    }
2093
2094
0
    return ret;
2095
0
}
2096
2097
2098
static const struct hFILE_backend s3_backend = {
2099
    s3_read, s3_write, s3_seek, NULL, s3_close
2100
};
2101
2102
/* Read and write open here, need to be after the s3_backend declaration. */
2103
0
static hFILE *s3_write_open(const char *url, s3_auth_data *auth) {
2104
0
    hFILE_s3 *fp;
2105
0
    kstring_t response = {0, 0, NULL};
2106
0
    kstring_t header   = {0, 0, NULL};
2107
0
    int has_user_query = 0;
2108
0
    char *query_start;
2109
0
    const char *env;
2110
0
    CURLcode cret;
2111
0
    long response_code;
2112
2113
2114
0
    fp = (hFILE_s3 *)hfile_init(sizeof(hFILE_s3), "w", 0);
2115
2116
0
    if (fp == NULL) {
2117
0
        return NULL;
2118
0
    }
2119
2120
0
    if ((fp->curl = curl_easy_init()) == NULL) {
2121
0
        errno = ENOMEM;
2122
0
        goto error;
2123
0
    }
2124
2125
0
    fp->au = auth;
2126
2127
0
    initialise_local(fp);
2128
0
    initialise_authorisation_values(fp);
2129
0
    fp->aborted = 0;
2130
0
    fp->part_size = MINIMUM_S3_WRITE_SIZE;
2131
0
    fp->expand = 1;
2132
0
    fp->write = 1;
2133
2134
0
    if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) {
2135
0
        int part_size = atoi(env) * 1024 * 1024;
2136
2137
0
        if (part_size > fp->part_size)
2138
0
            fp->part_size = part_size;
2139
2140
0
        fp->expand = 0;
2141
0
    }
2142
2143
0
    if (hts_verbose >= 8) {
2144
0
        fp->verbose = 1L;
2145
0
    } else {
2146
0
        fp->verbose = 0L;
2147
0
    }
2148
2149
0
    kputs(url, &fp->url);
2150
2151
0
    if ((query_start = strchr(fp->url.s, '?'))) {
2152
0
        has_user_query = 1;;
2153
0
    }
2154
2155
0
    if (initialise_upload(fp, &header, &response, has_user_query))
2156
0
        goto error;
2157
2158
0
    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2159
2160
0
    if (cret == CURLE_OK) {
2161
0
        if (response_code == S3_MOVED_PERMANENTLY || response_code == S3_TEMPORARY_REDIRECT) {
2162
0
            if (redirect_endpoint(fp, &header) == 0) {
2163
0
                ks_clear(&response);
2164
0
                ks_clear(&header);
2165
2166
0
                if (initialise_upload(fp, &header, &response, has_user_query))
2167
0
                    goto error;
2168
0
            }
2169
0
        } else if (response_code == S3_BAD_REQUEST) {
2170
0
            if (handle_bad_request(fp, &response) == 0) {
2171
0
                ks_clear(&response);
2172
0
                ks_clear(&header);
2173
2174
0
                if (initialise_upload(fp, &header, &response, has_user_query))
2175
0
                    goto error;
2176
0
            }
2177
0
        }
2178
2179
        // reget the response code (may not have changed)
2180
0
        cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2181
0
    } else {
2182
        // unable to get a response code from curl
2183
0
        goto error;
2184
0
    }
2185
2186
0
    if (response_code >= 300) {
2187
        // something went wrong with the initialisation
2188
2189
0
        if (cret == CURLE_OK) {
2190
0
            if (hts_verbose >= HTS_LOG_INFO) {
2191
0
                if (report_s3_error(&response, response_code)) {
2192
0
                    fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
2193
0
                }
2194
0
            }
2195
2196
0
            errno = http_status_errno(response_code);
2197
0
        }
2198
2199
0
        goto error;
2200
0
    }
2201
2202
0
    if (get_upload_id(fp, &response)) goto error;
2203
2204
    // start the completion message (a formatted list of parts)
2205
0
    if (kputs("<CompleteMultipartUpload>\n", &fp->completion_message) == EOF) {
2206
0
        goto error;
2207
0
    }
2208
2209
0
    fp->part_no = 1;
2210
2211
    // user query string no longer a useful part of the URL
2212
0
    if (query_start)
2213
0
         *query_start = '\0';
2214
2215
0
    fp->base.backend = &s3_backend;
2216
0
    ks_free(&response);
2217
0
    ks_free(&header);
2218
2219
0
    return &fp->base;
2220
2221
0
error:
2222
0
    ks_free(&response);
2223
0
    ks_free(&header);
2224
0
    cleanup_local(fp);
2225
0
    free_authorisation_values(fp);
2226
0
    hfile_destroy((hFILE *)fp);
2227
0
    return NULL;
2228
0
}
2229
2230
2231
247
static hFILE *s3_read_open(const char *url, s3_auth_data *auth) {
2232
247
    hFILE_s3 *fp;
2233
247
    const char *env;
2234
247
    kstring_t response   = {0, 0, NULL};
2235
247
    kstring_t file_range = {0, 0, NULL};
2236
247
    CURLcode cret;
2237
247
    long response_code = 0;
2238
2239
247
    fp = (hFILE_s3 *)hfile_init(sizeof(hFILE_s3), "r", 0);
2240
2241
247
    if (fp == NULL) {
2242
0
        return NULL;
2243
0
    }
2244
2245
247
    if ((fp->curl = curl_easy_init()) == NULL) {
2246
0
        errno = ENOMEM;
2247
0
        goto error;
2248
0
    }
2249
2250
247
    fp->au = auth;
2251
2252
247
    initialise_local(fp);
2253
247
    initialise_authorisation_values(fp);
2254
2255
247
    fp->last_read = 0; // ranges start at 0
2256
247
    fp->write = 0;
2257
2258
247
    if ((env = getenv("HTS_S3_READ_PART_SIZE")) != NULL) {
2259
0
        fp->part_size = atoi(env) * 1024 * 1024;
2260
247
    } else {
2261
247
        fp->part_size = READ_PART_SIZE;
2262
247
    }
2263
2264
247
    if (hts_verbose >= 8) {
2265
0
        fp->verbose = 1L;
2266
247
    } else {
2267
247
        fp->verbose = 0L;
2268
247
    }
2269
2270
247
    kputs(url, &fp->url);
2271
2272
247
    if (initialise_download(fp, &response))
2273
238
        goto error;
2274
2275
9
    cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2276
2277
9
    if (cret == CURLE_OK) {
2278
9
        if (response_code == S3_MOVED_PERMANENTLY || response_code == S3_TEMPORARY_REDIRECT) {
2279
0
            ks_clear(&response);
2280
2281
0
            if (redirect_endpoint(fp, &response) == 0) {
2282
0
                if (initialise_download(fp, &response))
2283
0
                    goto error;
2284
0
            }
2285
9
        } else if (response_code == S3_BAD_REQUEST) {
2286
9
            ks_clear(&response);
2287
2288
9
            if (handle_bad_request(fp, &fp->buffer) == 0) {
2289
0
                if (initialise_download(fp, &response))
2290
0
                    goto error;
2291
0
            }
2292
9
        }
2293
2294
        // reget the response code (may not have changed)
2295
9
        cret = curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
2296
9
    } else {
2297
        // unable to get a response code from curl
2298
0
        goto error;
2299
0
    }
2300
2301
9
    if (response_code >= 300) {
2302
        // something went wrong with the initialisation
2303
2304
9
        if (cret == CURLE_OK) {
2305
9
            if (hts_verbose >= HTS_LOG_INFO) {
2306
0
                if (report_s3_error(&fp->buffer, response_code)) {
2307
0
                    fprintf(stderr, "hfile_s3: warning, unable to report full S3 error status.\n");
2308
0
                }
2309
0
            }
2310
2311
9
            errno = http_status_errno(response_code);
2312
9
        }
2313
2314
9
        goto error;
2315
9
    }
2316
2317
0
    if (get_entry(response.s, "content-range: bytes ", "\n", &file_range) == EOF) {
2318
0
        fprintf(stderr, "hfile_s3: warning: failed to read file size.\n");
2319
0
        fp->file_size = -1;
2320
0
    } else {
2321
0
        char *s;
2322
0
        if ((s = strchr(file_range.s, '/'))) {
2323
0
            fp->file_size = strtoll(s + 1, NULL, 10);
2324
0
        } else {
2325
0
            fp->file_size = -1;
2326
0
        }
2327
0
    }
2328
2329
0
    fp->last_read_buffer = 0;
2330
0
    fp->last_read = fp->last_read + fp->buffer.l;
2331
0
    fp->base.backend = &s3_backend;
2332
0
    fp->keep_going = 1;
2333
2334
0
    ks_free(&response);
2335
0
    ks_free(&file_range);
2336
0
    return &fp->base;
2337
2338
2339
247
 error:
2340
247
    ks_free(&response);
2341
247
    ks_free(&file_range);
2342
247
    cleanup_local(fp);
2343
247
    free_authorisation_values(fp);
2344
247
    hfile_destroy((hFILE *)fp);
2345
247
    return NULL;
2346
9
}
2347
2348
2349
247
static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
2350
247
    kstring_t url = { 0, 0, NULL };
2351
2352
247
    s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
2353
247
    hFILE *fp = NULL;
2354
2355
247
    if (ad == NULL) {
2356
0
        return NULL;
2357
0
    }
2358
2359
247
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: s3_open_v4 url %s\n", url.s);
2360
2361
247
    if (*mode == 'r') {
2362
247
        fp  = s3_read_open(url.s, ad);
2363
247
    } else {
2364
0
        fp =  s3_write_open(url.s, ad);
2365
0
    }
2366
2367
247
    ks_free(&url);
2368
247
    if (!fp)
2369
247
        free_auth_data(ad);
2370
2371
247
    return fp;
2372
247
}
2373
2374
2375
0
static hFILE *s3_open_v2(const char *s3url, const char *mode, va_list *argsp) {
2376
0
    kstring_t url = { 0, 0, NULL };
2377
2378
0
    s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
2379
0
    hFILE *fp = NULL;
2380
2381
0
    if (ad == NULL) {
2382
0
        return NULL;
2383
0
    }
2384
2385
0
    if (hts_verbose >= HTS_LOG_INFO) fprintf(stderr, "hfile_s3: s3_open_v2 url %s\n", url.s);
2386
2387
0
    if (*mode == 'r') {
2388
0
        fp  = s3_read_open(url.s, ad);
2389
0
    } else {
2390
0
        fprintf(stderr, "hfile_s3: error - signature v2 not handled for writing.\n.");
2391
0
    }
2392
2393
0
    ks_free(&url);
2394
0
    if (!fp)
2395
0
        free_auth_data(ad);
2396
2397
0
    return fp;
2398
0
}
2399
2400
2401
static hFILE *hopen_s3(const char *url, const char *mode)
2402
247
{
2403
247
    hFILE *fp;
2404
2405
247
    if (getenv("HTS_S3_V2") == NULL) {
2406
247
        fp = s3_open_v4(url, mode, NULL);
2407
247
    } else {
2408
0
        fp = s3_open_v2(url, mode, NULL);
2409
0
    }
2410
2411
247
    return fp;
2412
247
}
2413
2414
2415
static hFILE *vhopen_s3(const char *url, const char *mode, va_list args0)
2416
0
{
2417
0
    hFILE *fp;
2418
2419
    // This should handle to vargs case.  Not sure what vargs we want
2420
    // to handle
2421
0
    fp = hopen_s3(url, mode);
2422
2423
0
    return fp;
2424
0
}
2425
2426
2427
1
static void s3_exit(void) {
2428
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
2429
1
        curl.share = NULL;
2430
2431
1
    free(curl.useragent.s);
2432
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
2433
1
    curl_global_cleanup();
2434
1
}
2435
2436
2437
1
int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) {
2438
2439
1
    static const struct hFILE_scheme_handler handler =
2440
1
        { hopen_s3, hfile_always_remote, "Amazon S3",
2441
1
          2000 + 50, vhopen_s3
2442
1
        };
2443
2444
#ifdef ENABLE_PLUGINS
2445
    // Embed version string for examination via strings(1) or what(1)
2446
    static const char id[] =
2447
        "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
2448
    const char *version = strchr(id, '\t') + 1;
2449
2450
    if (hts_verbose >= 9)
2451
        fprintf(stderr, "[M::hfile_s3.init] version %s\n",
2452
                version);
2453
#else
2454
1
    const char *version = hts_version();
2455
1
#endif
2456
2457
1
    const curl_version_info_data *info;
2458
1
    CURLcode err;
2459
1
    CURLSHcode errsh;
2460
2461
1
    err = curl_global_init(CURL_GLOBAL_ALL);
2462
2463
1
    if (err != CURLE_OK) {
2464
        // look at putting in an errno here
2465
0
        return -1;
2466
0
    }
2467
2468
1
    curl.share = curl_share_init();
2469
2470
1
    if (curl.share == NULL) {
2471
0
        curl_global_cleanup();
2472
0
        errno = EIO;
2473
0
        return -1;
2474
0
    }
2475
2476
1
    errsh  = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
2477
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
2478
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
2479
2480
1
    if (errsh != 0) {
2481
0
        curl_share_cleanup(curl.share);
2482
0
        curl_global_cleanup();
2483
0
        errno = EIO;
2484
0
        return -1;
2485
0
    }
2486
2487
1
    info = curl_version_info(CURLVERSION_NOW);
2488
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
2489
2490
1
    self->name = "Amazon S3";
2491
1
    self->destroy = s3_exit;
2492
2493
1
    hfile_add_scheme_handler("s3",       &handler);
2494
1
    hfile_add_scheme_handler("s3+http",  &handler);
2495
1
    hfile_add_scheme_handler("s3+https", &handler);
2496
2497
1
    return 0;
2498
1
}
2499