Coverage Report

Created: 2025-10-28 07:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile_s3.c
Line
Count
Source
1
/*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2024 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <strings.h>
33
#include <time.h>
34
35
#include <errno.h>
36
37
#include "hfile_internal.h"
38
#ifdef ENABLE_PLUGINS
39
#include "version.h"
40
#endif
41
#include "htslib/hts.h"  // for hts_version() and hts_verbose
42
#include "htslib/kstring.h"
43
#include "hts_time_funcs.h"
44
45
typedef struct s3_auth_data {
46
    kstring_t id;
47
    kstring_t token;
48
    kstring_t secret;
49
    kstring_t region;
50
    kstring_t canonical_query_string;
51
    kstring_t user_query_string;
52
    kstring_t host;
53
    kstring_t profile;
54
    enum {s3_auto, s3_virtual, s3_path} url_style;
55
    time_t creds_expiry_time;
56
    char *bucket;
57
    kstring_t auth_hdr;
58
    time_t auth_time;
59
    char date[40];
60
    char date_long[17];
61
    char date_short[9];
62
    kstring_t date_html;
63
    char mode;
64
    char *headers[5];
65
    int refcount;
66
} s3_auth_data;
67
68
459
#define AUTH_LIFETIME 60  // Regenerate auth headers if older than this
69
0
#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials
70
71
#if defined HAVE_COMMONCRYPTO
72
73
#include <CommonCrypto/CommonHMAC.h>
74
75
#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
76
#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
77
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
78
79
static size_t
80
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
81
{
82
    CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
83
    return CC_SHA1_DIGEST_LENGTH;
84
}
85
86
87
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
88
    CC_SHA256(in, length, out);
89
}
90
91
92
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
93
    CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
94
    *md_len = CC_SHA256_DIGEST_LENGTH;
95
}
96
97
98
#elif defined HAVE_HMAC
99
100
#include <openssl/hmac.h>
101
#include <openssl/sha.h>
102
103
#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
104
15.0k
#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
105
0
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
106
107
static size_t
108
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
109
0
{
110
0
    unsigned int len;
111
0
    HMAC(EVP_sha1(), key->s, key->l,
112
0
         (unsigned char *) message->s, message->l, digest, &len);
113
0
    return len;
114
0
}
115
116
117
456
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
118
456
    SHA256(in, length, out);
119
456
}
120
121
122
1.14k
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
123
1.14k
    HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
124
1.14k
}
125
126
#else
127
#error No HMAC() routine found by configure
128
#endif
129
130
static void
131
urldecode_kput(const char *s, int len, kstring_t *str)
132
678
{
133
678
    char buf[3];
134
678
    int i = 0;
135
136
85.0k
    while (i < len)
137
84.3k
        if (s[i] == '%' && i+2 < len) {
138
766
            buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
139
766
            kputc(strtol(buf, NULL, 16), str);
140
766
            i += 3;
141
766
        }
142
83.5k
        else kputc(s[i++], str);
143
678
}
144
145
static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
146
0
{
147
0
    static const char base64[] =
148
0
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
149
150
0
    size_t i = 0;
151
0
    unsigned x = 0;
152
0
    int bits = 0, pad = 0;
153
154
0
    while (bits || i < len) {
155
0
        if (bits < 6) {
156
0
            x <<= 8, bits += 8;
157
0
            if (i < len) x |= data[i++];
158
0
            else pad++;
159
0
        }
160
161
0
        bits -= 6;
162
0
        kputc(base64[(x >> bits) & 63], str);
163
0
    }
164
165
0
    str->l -= pad;
166
0
    kputsn("==", pad, str);
167
0
}
168
169
static int is_dns_compliant(const char *s0, const char *slim, int is_https)
170
459
{
171
459
    int has_nondigit = 0, len = 0;
172
459
    const char *s;
173
174
752
    for (s = s0; s < slim; len++, s++)
175
639
        if (islower_c(*s))
176
167
            has_nondigit = 1;
177
472
        else if (*s == '-') {
178
56
            has_nondigit = 1;
179
56
            if (s == s0 || s+1 == slim) return 0;
180
56
        }
181
416
        else if (isdigit_c(*s))
182
84
            ;
183
332
        else if (*s == '.') {
184
7
            if (is_https) return 0;
185
0
            if (s == s0 || ! isalnum_c(s[-1])) return 0;
186
0
            if (s+1 == slim || ! isalnum_c(s[1])) return 0;
187
0
        }
188
325
        else return 0;
189
190
113
    return has_nondigit && len >= 3 && len <= 63;
191
459
}
192
193
static FILE *expand_tilde_open(const char *fname, const char *mode)
194
621
{
195
621
    FILE *fp;
196
197
621
    if (strncmp(fname, "~/", 2) == 0) {
198
621
        kstring_t full_fname = { 0, 0, NULL };
199
621
        const char *home = getenv("HOME");
200
621
        if (! home) return NULL;
201
202
621
        kputs(home, &full_fname);
203
621
        kputs(&fname[1], &full_fname);
204
205
621
        fp = fopen(full_fname.s, mode);
206
621
        free(full_fname.s);
207
621
    }
208
0
    else
209
0
        fp = fopen(fname, mode);
210
211
621
    return fp;
212
621
}
213
214
static void parse_ini(const char *fname, const char *section, ...)
215
414
{
216
414
    kstring_t line = { 0, 0, NULL };
217
414
    int active = 1;  // Start active, so global properties are accepted
218
414
    char *s;
219
220
414
    FILE *fp = expand_tilde_open(fname, "r");
221
414
    if (fp == NULL) return;
222
223
0
    while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
224
0
        if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
225
0
            *s = '\0';
226
0
            active = (strcmp(&line.s[1], section) == 0);
227
0
        }
228
0
        else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
229
0
            const char *key = line.s, *value = &s[1], *akey;
230
0
            va_list args;
231
232
0
            while (isspace_c(*key)) key++;
233
0
            while (s > key && isspace_c(s[-1])) s--;
234
0
            *s = '\0';
235
236
0
            while (isspace_c(*value)) value++;
237
0
            while (line.l > 0 && isspace_c(line.s[line.l-1]))
238
0
                line.s[--line.l] = '\0';
239
240
0
            va_start(args, section);
241
0
            while ((akey = va_arg(args, const char *)) != NULL) {
242
0
                kstring_t *avar = va_arg(args, kstring_t *);
243
0
                if (strcmp(key, akey) == 0) {
244
0
                    avar->l = 0;
245
0
                    kputs(value, avar);
246
0
                    break; }
247
0
            }
248
0
            va_end(args);
249
0
        }
250
251
0
    fclose(fp);
252
0
    free(line.s);
253
0
}
254
255
static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
256
207
{
257
207
    kstring_t text = { 0, 0, NULL };
258
207
    char *s;
259
207
    size_t len;
260
261
207
    FILE *fp = expand_tilde_open(fname, "r");
262
207
    if (fp == NULL) return;
263
264
0
    while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
265
0
        kputc(' ', &text);
266
0
    fclose(fp);
267
268
0
    s = text.s;
269
0
    while (isspace_c(*s)) s++;
270
0
    kputsn(s, len = strcspn(s, " \t"), id);
271
272
0
    s += len;
273
0
    while (isspace_c(*s)) s++;
274
0
    kputsn(s, strcspn(s, " \t"), secret);
275
276
0
    free(text.s);
277
0
}
278
279
231
static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) {
280
231
    char **hdr = &ad->headers[0];
281
231
    int idx = 0;
282
231
    *hdrs = hdr;
283
284
231
    hdr[idx] = strdup(ad->date);
285
231
    if (!hdr[idx]) return -1;
286
231
    idx++;
287
288
231
    if (ad->token.l) {
289
27
        kstring_t token_hdr = KS_INITIALIZE;
290
27
        kputs("X-Amz-Security-Token: ", &token_hdr);
291
27
        kputs(ad->token.s, &token_hdr);
292
27
        if (token_hdr.s) {
293
27
            hdr[idx++] = token_hdr.s;
294
27
        } else {
295
0
            goto fail;
296
0
        }
297
27
    }
298
299
231
    if (ad->auth_hdr.l) {
300
0
        hdr[idx] = strdup(ad->auth_hdr.s);
301
0
        if (!hdr[idx]) goto fail;
302
0
        idx++;
303
0
    }
304
305
231
    hdr[idx] = NULL;
306
231
    return 0;
307
308
0
 fail:
309
0
    for (--idx; idx >= 0; --idx)
310
0
        free(hdr[idx]);
311
0
    return -1;
312
231
}
313
314
464
static void free_auth_data(s3_auth_data *ad) {
315
464
    if (ad->refcount > 0) {
316
5
        --ad->refcount;
317
5
        return;
318
5
    }
319
459
    free(ad->profile.s);
320
459
    free(ad->id.s);
321
459
    free(ad->token.s);
322
459
    free(ad->secret.s);
323
459
    free(ad->region.s);
324
459
    free(ad->canonical_query_string.s);
325
459
    free(ad->user_query_string.s);
326
459
    free(ad->host.s);
327
459
    free(ad->bucket);
328
459
    free(ad->auth_hdr.s);
329
459
    free(ad->date_html.s);
330
459
    free(ad);
331
459
}
332
333
static time_t parse_rfc3339_date(kstring_t *datetime)
334
0
{
335
0
    int offset = 0;
336
0
    time_t when;
337
0
    int num;
338
0
    char should_be_t = '\0', timezone[10] = { '\0' };
339
0
    unsigned int year, mon, day, hour, min, sec;
340
341
0
    if (!datetime->s)
342
0
        return 0;
343
344
    // It should be possible to do this with strptime(), but it seems
345
    // to not get on with our feature definitions.
346
0
    num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s",
347
0
                 &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone);
348
0
    if (num < 8)
349
0
        return 0;
350
0
    if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ')
351
0
        return 0;
352
0
    struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 };
353
354
0
    switch (timezone[0]) {
355
0
      case 'Z':
356
0
      case 'z':
357
0
      case '\0':
358
0
          break;
359
0
      case '+':
360
0
      case '-': {
361
0
          unsigned hr_off, min_off;
362
0
          if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) {
363
0
              if (hr_off < 24 && min_off <= 60) {
364
0
                  offset = ((hr_off * 60 + min_off)
365
0
                            * (timezone[0] == '+' ? -60 : 60));
366
0
              }
367
0
          }
368
0
          break;
369
0
      }
370
0
      default:
371
0
          return 0;
372
0
    }
373
374
0
    when = hts_time_gm(&parsed);
375
0
    return when >= 0 ? when + offset : 0;
376
0
}
377
378
0
static void refresh_auth_data(s3_auth_data *ad) {
379
    // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of
380
    // setup_auth_data(), but this only reads the authorisation parts.
381
0
    const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
382
0
    kstring_t expiry_time = KS_INITIALIZE;
383
0
    parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
384
0
              "aws_access_key_id", &ad->id,
385
0
              "aws_secret_access_key", &ad->secret,
386
0
              "aws_session_token", &ad->token,
387
0
              "expiry_time", &expiry_time);
388
0
    if (expiry_time.l) {
389
0
        ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
390
0
    }
391
0
    ks_free(&expiry_time);
392
0
}
393
394
0
static int auth_header_callback(void *ctx, char ***hdrs) {
395
0
    s3_auth_data *ad = (s3_auth_data *) ctx;
396
397
0
    time_t now = time(NULL);
398
0
#ifdef HAVE_GMTIME_R
399
0
    struct tm tm_buffer;
400
0
    struct tm *tm = gmtime_r(&now, &tm_buffer);
401
#else
402
    struct tm *tm = gmtime(&now);
403
#endif
404
0
    kstring_t message = { 0, 0, NULL };
405
0
    unsigned char digest[DIGEST_BUFSIZ];
406
0
    size_t digest_len;
407
408
0
    if (!hdrs) { // Closing connection
409
0
        free_auth_data(ad);
410
0
        return 0;
411
0
    }
412
413
0
    if (ad->creds_expiry_time > 0
414
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
415
0
        refresh_auth_data(ad);
416
0
    } else if (now - ad->auth_time < AUTH_LIFETIME) {
417
        // Last auth string should still be valid
418
0
        *hdrs = NULL;
419
0
        return 0;
420
0
    }
421
422
0
    strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
423
0
    if (!ad->id.l || !ad->secret.l) {
424
0
        ad->auth_time = now;
425
0
        return copy_auth_headers(ad, hdrs);
426
0
    }
427
428
0
    if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
429
0
                 ad->mode == 'r' ? "GET" : "PUT", ad->date + 6,
430
0
                 ad->token.l ? "x-amz-security-token:" : "",
431
0
                 ad->token.l ? ad->token.s : "",
432
0
                 ad->token.l ? "\n" : "",
433
0
                 ad->bucket) < 0) {
434
0
        return -1;
435
0
    }
436
437
0
    digest_len = s3_sign(digest, &ad->secret, &message);
438
0
    ad->auth_hdr.l = 0;
439
0
    if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0)
440
0
        goto fail;
441
0
    base64_kput(digest, digest_len, &ad->auth_hdr);
442
443
0
    free(message.s);
444
0
    ad->auth_time = now;
445
0
    return copy_auth_headers(ad, hdrs);
446
447
0
 fail:
448
0
    free(message.s);
449
0
    return -1;
450
0
}
451
452
453
/* like a escape path but for query strings '=' and '&' are untouched */
454
122
static char *escape_query(const char *qs) {
455
122
    size_t i, j = 0, length, alloced;
456
122
    char *escaped;
457
458
122
    length = strlen(qs);
459
122
    alloced = length * 3 + 1;
460
122
    if ((escaped = malloc(alloced)) == NULL) {
461
0
        return NULL;
462
0
    }
463
464
3.54M
    for (i = 0; i < length; i++) {
465
3.54M
        int c = qs[i];
466
467
3.54M
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
468
3.01M
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
469
611k
            escaped[j++] = c;
470
2.92M
        } else {
471
2.92M
            snprintf(escaped + j, alloced - j, "%%%02X", c);
472
2.92M
            j += 3;
473
2.92M
        }
474
3.54M
    }
475
476
122
    escaped[j] = '\0';
477
478
122
    return escaped;
479
122
}
480
481
482
174
static char *escape_path(const char *path) {
483
174
    size_t i, j = 0, length, alloced;
484
174
    char *escaped;
485
486
174
    length = strlen(path);
487
174
    alloced = length * 3 + 1;
488
489
174
    if ((escaped = malloc(alloced)) == NULL) {
490
0
        return NULL;
491
0
    }
492
493
48.9k
    for (i = 0; i < length; i++) {
494
48.9k
        int c = path[i];
495
496
48.9k
        if (c == '?') break; // don't escape ? or beyond
497
498
48.8k
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
499
26.6k
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
500
25.4k
            escaped[j++] = c;
501
25.4k
        } else {
502
23.3k
            snprintf(escaped + j, alloced - j, "%%%02X", c);
503
23.3k
            j += 3;
504
23.3k
        }
505
48.8k
    }
506
507
174
    if (i != length) {
508
        // in the case of a '?' copy the rest of the path across unchanged
509
112
        strcpy(escaped + j, path + i);
510
112
    } else {
511
62
        escaped[j] = '\0';
512
62
    }
513
514
174
    return escaped;
515
174
}
516
517
518
459
static int is_escaped(const char *str) {
519
459
    const char *c = str;
520
459
    int escaped = 0;
521
459
    int needs_escape = 0;
522
523
3.63M
    while (*c != '\0') {
524
3.63M
        if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
525
31.4k
            if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
526
15.5k
                escaped = 1;
527
15.5k
                c += 3;
528
15.5k
                continue;
529
15.8k
            } else {
530
                // only escaped if all % signs are escaped
531
15.8k
                escaped = 0;
532
15.8k
            }
533
31.4k
        }
534
3.61M
        if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
535
3.16M
              || (*c >= 'a' && *c <= 'z') ||
536
3.09M
              *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
537
3.05M
            needs_escape = 1;
538
3.05M
        }
539
3.61M
        c++;
540
3.61M
    }
541
542
459
    return escaped || !needs_escape;
543
459
}
544
545
static int redirect_endpoint_callback(void *auth, long response,
546
0
                                      kstring_t *header, kstring_t *url) {
547
0
    s3_auth_data *ad = (s3_auth_data *)auth;
548
0
    char *new_region;
549
0
    char *end;
550
0
    int ret = -1;
551
552
    // get the new region from the reply header
553
0
    if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
554
555
0
        new_region += strlen("x-amz-bucket-region: ");
556
0
        end = new_region;
557
558
0
        while (isalnum_c(*end) || ispunct_c(*end)) end++;
559
560
0
        *end = 0;
561
562
0
        if (strstr(ad->host.s, "amazonaws.com")) {
563
0
            ad->region.l = 0;
564
0
            kputs(new_region, &ad->region);
565
566
0
            ad->host.l = 0;
567
568
0
            if (ad->url_style == s3_path) {
569
                // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name}
570
0
                ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
571
0
            } else {
572
                // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name}
573
                // Extract the {bucket-name} from {ad->host} to include in subdomain
574
0
                kstring_t url_prefix = KS_INITIALIZE;
575
0
                kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix);
576
577
0
                ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region);
578
0
                free(url_prefix.s);
579
0
            }
580
0
            if (ad->region.l && ad->host.l) {
581
0
               int e = 0;
582
0
               url->l = 0;
583
0
               e |= kputs("https://", url) < 0;
584
0
               e |= kputs(ad->host.s, url) < 0;
585
0
               e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0;
586
587
0
               if (!e)
588
0
                   ret = 0;
589
0
            }
590
0
            if (ad->user_query_string.l) {
591
0
                kputc('?', url);
592
0
                kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
593
0
            }
594
0
        }
595
0
    }
596
597
0
    return ret;
598
0
}
599
600
static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
601
                                      int sigver, kstring_t *url)
602
459
{
603
459
    s3_auth_data *ad = calloc(1, sizeof(*ad));
604
459
    const char *bucket, *path;
605
459
    char *escaped = NULL;
606
459
    size_t url_path_pos;
607
459
    ptrdiff_t bucket_len;
608
459
    int is_https = 1, dns_compliant;
609
459
    char *query_start;
610
611
459
    if (!ad)
612
0
        return NULL;
613
459
    ad->mode = strchr(mode, 'r') ? 'r' : 'w';
614
459
    ad->url_style = s3_auto;
615
616
    // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
617
618
459
    if (s3url[2] == '+') {
619
0
        bucket = strchr(s3url, ':') + 1;
620
0
        if (bucket == NULL) {
621
0
            free(ad);
622
0
            return NULL;
623
0
        }
624
0
        kputsn(&s3url[3], bucket - &s3url[3], url);
625
0
        is_https = strncmp(url->s, "https:", 6) == 0;
626
0
    }
627
459
    else {
628
459
        kputs("https:", url);
629
459
        bucket = &s3url[3];
630
459
    }
631
1.34k
    while (*bucket == '/') kputc(*bucket++, url);
632
633
459
    path = bucket + strcspn(bucket, "/?#@");
634
635
459
    if (*path == '@') {
636
273
        const char *colon = strpbrk(bucket, ":@");
637
273
        if (*colon != ':') {
638
18
            urldecode_kput(bucket, colon - bucket, &ad->profile);
639
18
        }
640
255
        else {
641
255
            const char *colon2 = strpbrk(&colon[1], ":@");
642
255
            urldecode_kput(bucket, colon - bucket, &ad->id);
643
255
            urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
644
255
            if (*colon2 == ':')
645
150
                urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
646
255
        }
647
648
273
        bucket = &path[1];
649
273
        path = bucket + strcspn(bucket, "/?#");
650
273
    }
651
186
    else {
652
        // If the URL has no ID[:SECRET]@, consider environment variables.
653
186
        const char *v;
654
186
        if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
655
186
        if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
656
186
        if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
657
186
        if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
658
186
        if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
659
660
186
        if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile);
661
186
        else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile);
662
186
        else kputs("default", &ad->profile);
663
664
186
        if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) {
665
0
            if (strcasecmp(v, "virtual") == 0) {
666
0
                ad->url_style = s3_virtual;
667
0
            } else if (strcasecmp(v, "path") == 0) {
668
0
                ad->url_style = s3_path;
669
0
            }
670
0
        }
671
186
    }
672
673
459
    if (ad->id.l == 0) {
674
207
        kstring_t url_style = KS_INITIALIZE;
675
207
        kstring_t expiry_time = KS_INITIALIZE;
676
207
        const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
677
207
        parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
678
207
                  "aws_access_key_id", &ad->id,
679
207
                  "aws_secret_access_key", &ad->secret,
680
207
                  "aws_session_token", &ad->token,
681
207
                  "region", &ad->region,
682
207
                  "addressing_style", &url_style,
683
207
                  "expiry_time", &expiry_time,
684
207
                  NULL);
685
686
207
        if (url_style.l) {
687
0
            if (strcmp(url_style.s, "virtual") == 0) {
688
0
                ad->url_style = s3_virtual;
689
0
            } else if (strcmp(url_style.s, "path") == 0) {
690
0
                ad->url_style = s3_path;
691
0
            } else {
692
0
                ad->url_style = s3_auto;
693
0
            }
694
0
        }
695
207
        if (expiry_time.l) {
696
            // Not a real part of the AWS configuration file, but it allows
697
            // support for short-term credentials like those for the IAM
698
            // service.  The botocore library uses the key "expiry_time"
699
            // internally for this purpose.
700
            // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py
701
0
            ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
702
0
        }
703
704
207
        ks_free(&url_style);
705
207
        ks_free(&expiry_time);
706
207
    }
707
708
459
    if (ad->id.l == 0) {
709
207
        kstring_t url_style = KS_INITIALIZE;
710
207
        const char *v = getenv("HTS_S3_S3CFG");
711
207
        parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id,
712
207
                  "secret_key", &ad->secret, "access_token", &ad->token,
713
207
                  "host_base", &ad->host,
714
207
                  "bucket_location", &ad->region,
715
207
                  "host_bucket", &url_style,
716
207
                  NULL);
717
718
207
        if (url_style.l) {
719
            // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string
720
            // indicates use of path style adressing.
721
0
            if (strstr(url_style.s, "%(bucket)s") == NULL) {
722
0
                ad->url_style = s3_path;
723
0
            } else {
724
0
                ad->url_style = s3_auto;
725
0
            }
726
0
        }
727
728
207
        ks_free(&url_style);
729
207
    }
730
731
459
    if (ad->id.l == 0)
732
207
        parse_simple("~/.awssecret", &ad->id, &ad->secret);
733
734
735
    // if address_style is set, force the dns_compliant setting
736
459
    if (ad->url_style == s3_virtual) {
737
0
        dns_compliant = 1;
738
459
    } else if (ad->url_style == s3_path) {
739
0
        dns_compliant = 0;
740
459
    } else {
741
459
        dns_compliant = is_dns_compliant(bucket, path, is_https);
742
459
    }
743
744
459
    if (ad->host.l == 0)
745
459
        kputs("s3.amazonaws.com", &ad->host);
746
747
459
    if (!dns_compliant && ad->region.l > 0
748
0
        && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
749
        // Can avoid a redirection by including the region in the host name
750
        // (assuming the right one has been specified)
751
0
        ad->host.l = 0;
752
0
        ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
753
0
    }
754
755
459
    if (ad->region.l == 0)
756
459
        kputs("us-east-1", &ad->region);
757
758
459
    if (!is_escaped(path)) {
759
174
        escaped = escape_path(path);
760
174
        if (escaped == NULL) {
761
0
            goto error;
762
0
        }
763
174
    }
764
765
459
    bucket_len = path - bucket;
766
767
    // Use virtual hosted-style access if possible, otherwise path-style.
768
459
    if (dns_compliant) {
769
8
        size_t url_host_pos = url->l;
770
        // Append "bucket.host" to url
771
8
        kputsn_(bucket, bucket_len, url);
772
8
        kputc('.', url);
773
8
        kputsn(ad->host.s, ad->host.l, url);
774
8
        url_path_pos = url->l;
775
776
8
        if (sigver == 4) {
777
            // Copy back to ad->host to use when making the signature
778
8
            ad->host.l = 0;
779
8
            kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
780
8
        }
781
8
    }
782
451
    else {
783
        // Append "host/bucket" to url
784
451
        kputsn(ad->host.s, ad->host.l, url);
785
451
        url_path_pos = url->l;
786
451
        kputc('/', url);
787
451
        kputsn(bucket, bucket_len, url);
788
451
    }
789
790
459
    kputs(escaped == NULL ? path : escaped, url);
791
792
459
    if (sigver == 4 || !dns_compliant) {
793
459
        ad->bucket = malloc(url->l - url_path_pos + 1);
794
459
        if (ad->bucket == NULL) {
795
0
            goto error;
796
0
        }
797
459
        memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
798
459
    }
799
0
    else {
800
0
        ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
801
0
        if (ad->bucket == NULL) {
802
0
            goto error;
803
0
        }
804
0
        ad->bucket[0] = '/';
805
0
        memcpy(ad->bucket + 1, bucket, bucket_len);
806
0
        memcpy(ad->bucket + bucket_len + 1,
807
0
               url->s + url_path_pos, url->l - url_path_pos + 1);
808
0
    }
809
810
    // write any query strings to its own place to use later
811
459
    if ((query_start = strchr(ad->bucket, '?'))) {
812
132
        kputs(query_start + 1, &ad->user_query_string);
813
132
        *query_start = 0;
814
132
    }
815
816
459
    free(escaped);
817
818
459
    return ad;
819
820
0
 error:
821
0
    free(escaped);
822
0
    free_auth_data(ad);
823
0
    return NULL;
824
459
}
825
826
static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
827
0
{
828
0
    kstring_t url = { 0, 0, NULL };
829
0
    s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
830
831
0
    if (!ad)
832
0
        return NULL;
833
834
0
    hFILE *fp = hopen(url.s, mode, "va_list", argsp,
835
0
                      "httphdr_callback", auth_header_callback,
836
0
                      "httphdr_callback_data", ad,
837
0
                      "redirect_callback", redirect_endpoint_callback,
838
0
                      "redirect_callback_data", ad,
839
0
                      NULL);
840
0
    if (!fp) goto fail;
841
842
0
    free(url.s);
843
0
    return fp;
844
845
0
 fail:
846
0
    free(url.s);
847
0
    free_auth_data(ad);
848
0
    return NULL;
849
0
}
850
851
/***************************************************************
852
853
AWS S3 sig version 4 writing code
854
855
****************************************************************/
856
857
456
static void hash_string(char *in, size_t length, char *out, size_t out_len) {
858
456
    unsigned char hashed[SHA256_DIGEST_BUFSIZE];
859
456
    int i, j;
860
861
456
    s3_sha256((const unsigned char *)in, length, hashed);
862
863
15.0k
    for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
864
14.5k
        snprintf(out + j, out_len - j, "%02x", hashed[i]);
865
14.5k
    }
866
456
}
867
868
1.36k
static void ksinit(kstring_t *s) {
869
1.36k
    s->l = 0;
870
1.36k
    s->m = 0;
871
1.36k
    s->s = NULL;
872
1.36k
}
873
874
875
1.36k
static void ksfree(kstring_t *s) {
876
1.36k
    free(s->s);
877
1.36k
    ksinit(s);
878
1.36k
}
879
880
881
228
static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) {
882
228
    unsigned char date_key[SHA256_DIGEST_BUFSIZE];
883
228
    unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
884
228
    unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
885
228
    unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
886
228
    unsigned char signature[SHA256_DIGEST_BUFSIZE];
887
888
228
    const unsigned char service[] = "s3";
889
228
    const unsigned char request[] = "aws4_request";
890
891
228
    kstring_t secret_access_key = KS_INITIALIZE;
892
228
    unsigned int len;
893
228
    unsigned int i, j;
894
895
228
    ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
896
897
228
    if (secret_access_key.l == 0) {
898
0
        return -1;
899
0
    }
900
901
228
    s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
902
228
    s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
903
228
    s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
904
228
    s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
905
228
    s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
906
907
7.52k
    for (i = 0, j = 0; i < len; i++, j+= 2) {
908
7.29k
        snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]);
909
7.29k
    }
910
911
228
    ksfree(&secret_access_key);
912
913
228
    return 0;
914
228
}
915
916
917
228
static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
918
228
    kstring_t signed_headers = KS_INITIALIZE;
919
228
    kstring_t canonical_headers = KS_INITIALIZE;
920
228
    kstring_t canonical_request = KS_INITIALIZE;
921
228
    kstring_t scope = KS_INITIALIZE;
922
228
    kstring_t string_to_sign = KS_INITIALIZE;
923
228
    char cr_hash[HASH_LENGTH_SHA256];
924
228
    char signature_string[HASH_LENGTH_SHA256];
925
228
    int ret = -1;
926
927
928
228
    if (!ad->token.l) {
929
120
        kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
930
120
    } else {
931
108
        kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
932
108
    }
933
934
228
    if (signed_headers.l == 0) {
935
0
        return -1;
936
0
    }
937
938
939
228
    if (!ad->token.l) {
940
120
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
941
120
        ad->host.s, content, ad->date_long);
942
120
    } else {
943
108
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
944
108
        ad->host.s, content, ad->date_long, ad->token.s);
945
108
    }
946
947
228
    if (canonical_headers.l == 0) {
948
0
        goto cleanup;
949
0
    }
950
951
    // bucket == canonical_uri
952
228
    ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
953
228
        http_request, ad->bucket, ad->canonical_query_string.s,
954
228
        canonical_headers.s, signed_headers.s, content);
955
956
228
    if (canonical_request.l == 0) {
957
0
        goto cleanup;
958
0
    }
959
960
228
    hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash));
961
962
228
    ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
963
964
228
    if (scope.l == 0) {
965
0
        goto cleanup;
966
0
    }
967
968
228
    ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
969
970
228
    if (string_to_sign.l == 0) {
971
0
        goto cleanup;
972
0
    }
973
974
228
    if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) {
975
0
        goto cleanup;
976
0
    }
977
978
228
    ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
979
228
                ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
980
981
228
    if (auth->l == 0) {
982
0
        goto cleanup;
983
0
    }
984
985
228
    ret = 0;
986
987
228
 cleanup:
988
228
    ksfree(&signed_headers);
989
228
    ksfree(&canonical_headers);
990
228
    ksfree(&canonical_request);
991
228
    ksfree(&scope);
992
228
    ksfree(&string_to_sign);
993
994
228
    return ret;
995
228
}
996
997
998
459
static int update_time(s3_auth_data *ad, time_t now) {
999
459
    int ret = -1;
1000
459
#ifdef HAVE_GMTIME_R
1001
459
    struct tm tm_buffer;
1002
459
    struct tm *tm = gmtime_r(&now, &tm_buffer);
1003
#else
1004
    struct tm *tm = gmtime(&now);
1005
#endif
1006
1007
459
    if (now - ad->auth_time > AUTH_LIFETIME) {
1008
        // update timestamp
1009
459
        ad->auth_time = now;
1010
1011
459
        if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
1012
0
            return -1;
1013
0
        }
1014
1015
459
        if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
1016
0
            return -1;;
1017
0
        }
1018
1019
459
        ad->date_html.l = 0;
1020
459
        ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
1021
459
    }
1022
1023
459
    if (ad->date_html.l) ret = 0;
1024
1025
459
    return ret;
1026
459
}
1027
1028
1029
502k
static int query_cmp(const void *p1, const void *p2) {
1030
502k
    char **q1 = (char **)p1;
1031
502k
    char **q2 = (char **)p2;
1032
1033
502k
    return strcmp(*q1, *q2);
1034
502k
}
1035
1036
1037
/* Query strings must be in alphabetical order for authorisation */
1038
1039
122
static int order_query_string(kstring_t *qs) {
1040
122
    int *query_offset = NULL;
1041
122
    int num_queries, i;
1042
122
    char **queries = NULL;
1043
122
    kstring_t ordered = KS_INITIALIZE;
1044
122
    char *escaped = NULL;
1045
122
    int ret = -1;
1046
1047
122
    if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
1048
0
        return -1;
1049
0
    }
1050
1051
122
    if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
1052
0
        goto err;
1053
1054
46.2k
    for (i = 0; i < num_queries; i++) {
1055
46.1k
        queries[i] = qs->s + query_offset[i];
1056
46.1k
    }
1057
1058
122
    qsort(queries, num_queries, sizeof(char *), query_cmp);
1059
1060
46.2k
    for (i = 0; i < num_queries; i++) {
1061
46.1k
        if (i) {
1062
46.0k
            kputs("&", &ordered);
1063
46.0k
        }
1064
1065
46.1k
        kputs(queries[i], &ordered);
1066
46.1k
    }
1067
1068
122
    if ((escaped = escape_query(ordered.s)) == NULL)
1069
0
        goto err;
1070
1071
122
    qs->l = 0;
1072
122
    kputs(escaped, qs);
1073
1074
122
    ret = 0;
1075
122
 err:
1076
122
    free(ordered.s);
1077
122
    free(queries);
1078
122
    free(query_offset);
1079
122
    free(escaped);
1080
1081
122
    return ret;
1082
122
}
1083
1084
1085
static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs,
1086
                                        kstring_t *hash, kstring_t *auth_str, kstring_t *date,
1087
0
                                        kstring_t *token, int uqs) {
1088
0
    s3_auth_data *ad = (s3_auth_data *)auth;
1089
0
    char content_hash[HASH_LENGTH_SHA256];
1090
0
    time_t now;
1091
1092
0
    if (request == NULL) {
1093
        // signal to free auth data
1094
0
        free_auth_data(ad);
1095
0
        return 0;
1096
0
    }
1097
1098
0
    now = time(NULL);
1099
1100
0
    if (update_time(ad, now)) {
1101
0
        return -1;
1102
0
    }
1103
0
    if (ad->creds_expiry_time > 0
1104
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1105
0
        refresh_auth_data(ad);
1106
0
    }
1107
1108
0
    if (content) {
1109
0
        hash_string(content->s, content->l, content_hash, sizeof(content_hash));
1110
0
    } else {
1111
        // empty hash
1112
0
        hash_string("", 0, content_hash, sizeof(content_hash));
1113
0
    }
1114
1115
0
    ad->canonical_query_string.l = 0;
1116
0
    kputs(cqs, &ad->canonical_query_string);
1117
1118
0
    if (ad->canonical_query_string.l == 0) {
1119
0
        return -1;
1120
0
    }
1121
1122
    /* add a user provided query string, normally only useful on upload initiation */
1123
0
    if (uqs) {
1124
0
        kputs("&", &ad->canonical_query_string);
1125
0
        kputs(ad->user_query_string.s, &ad->canonical_query_string);
1126
1127
0
        if (order_query_string(&ad->canonical_query_string)) {
1128
0
            return -1;
1129
0
        }
1130
0
    }
1131
1132
0
    if (make_authorisation(ad, request, content_hash, auth_str)) {
1133
0
        return -1;
1134
0
    }
1135
1136
0
    kputs(ad->date_html.s, date);
1137
0
    kputsn(content_hash, HASH_LENGTH_SHA256, hash);
1138
1139
0
    if (date->l == 0 || hash->l == 0) {
1140
0
        return -1;
1141
0
    }
1142
1143
0
    if (ad->token.l) {
1144
0
        ksprintf(token, "x-amz-security-token: %s", ad->token.s);
1145
0
    }
1146
1147
0
    return 0;
1148
0
}
1149
1150
1151
464
static int v4_auth_header_callback(void *ctx, char ***hdrs) {
1152
464
    s3_auth_data *ad = (s3_auth_data *) ctx;
1153
464
    char content_hash[HASH_LENGTH_SHA256];
1154
464
    kstring_t content = KS_INITIALIZE;
1155
464
    kstring_t authorisation = KS_INITIALIZE;
1156
464
    kstring_t token_hdr = KS_INITIALIZE;
1157
464
    char *date_html = NULL;
1158
464
    time_t now;
1159
464
    int idx;
1160
1161
464
    if (!hdrs) { // Closing connection
1162
5
        free_auth_data(ad);
1163
5
        return 0;
1164
5
    }
1165
1166
459
    now = time(NULL);
1167
1168
459
    if (update_time(ad, now)) {
1169
0
        return -1;
1170
0
    }
1171
1172
459
    if (ad->creds_expiry_time > 0
1173
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1174
0
        refresh_auth_data(ad);
1175
0
    }
1176
1177
459
    if (!ad->id.l || !ad->secret.l) {
1178
231
        return copy_auth_headers(ad, hdrs);
1179
231
    }
1180
1181
228
    hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash
1182
1183
228
    ad->canonical_query_string.l = 0;
1184
1185
228
    if (ad->user_query_string.l > 0) {
1186
122
        kputs(ad->user_query_string.s, &ad->canonical_query_string);
1187
1188
122
        if (order_query_string(&ad->canonical_query_string)) {
1189
0
            return -1;
1190
0
        }
1191
122
    } else {
1192
106
        kputs("", &ad->canonical_query_string);
1193
106
    }
1194
1195
228
    if (make_authorisation(ad, "GET", content_hash, &authorisation)) {
1196
0
        return -1;
1197
0
    }
1198
1199
228
    ksprintf(&content, "x-amz-content-sha256: %s", content_hash);
1200
228
    date_html = strdup(ad->date_html.s);
1201
1202
228
    if (ad->token.l > 0) {
1203
108
        kputs("X-Amz-Security-Token: ", &token_hdr);
1204
108
        kputs(ad->token.s, &token_hdr);
1205
108
    }
1206
1207
228
    if (content.l == 0 || date_html == NULL) {
1208
0
        ksfree(&authorisation);
1209
0
        ksfree(&content);
1210
0
        ksfree(&token_hdr);
1211
0
        free(date_html);
1212
0
        return -1;
1213
0
    }
1214
1215
228
    *hdrs = &ad->headers[0];
1216
228
    idx = 0;
1217
228
    ad->headers[idx++] = ks_release(&authorisation);
1218
228
    ad->headers[idx++] = date_html;
1219
228
    ad->headers[idx++] = ks_release(&content);
1220
228
    if (token_hdr.s)
1221
108
        ad->headers[idx++] = ks_release(&token_hdr);
1222
228
    ad->headers[idx++] = NULL;
1223
1224
228
    return 0;
1225
228
}
1226
1227
5
static int handle_400_response(hFILE *fp, s3_auth_data *ad) {
1228
    // v4 signatures in virtual hosted mode return 400 Bad Request if the
1229
    // wrong region is used to make the signature.  The response is an xml
1230
    // document which includes the name of the correct region.  This can
1231
    // be extracted and used to generate a corrected signature.
1232
    // As the xml is fairly simple, go with something "good enough" instead
1233
    // of trying to parse it properly.
1234
1235
5
    char buffer[1024], *region, *reg_end;
1236
5
    ssize_t bytes;
1237
1238
5
    bytes = hread(fp, buffer, sizeof(buffer) - 1);
1239
5
    if (bytes < 0) {
1240
0
        return -1;
1241
0
    }
1242
5
    buffer[bytes] = '\0';
1243
5
    region = strstr(buffer, "<Region>");
1244
5
    if (region == NULL) {
1245
5
        return -1;
1246
5
    }
1247
0
    region += 8;
1248
0
    while (isspace((unsigned char) *region)) ++region;
1249
0
    reg_end = strchr(region, '<');
1250
0
    if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) {
1251
0
        return -1;
1252
0
    }
1253
0
    while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end;
1254
0
    ad->region.l = 0;
1255
0
    kputsn(region, reg_end - region, &ad->region);
1256
0
    if (ad->region.l == 0) {
1257
0
        return -1;
1258
0
    }
1259
1260
0
    return 0;
1261
0
}
1262
1263
0
static int set_region(void *adv, kstring_t *region) {
1264
0
    s3_auth_data *ad = (s3_auth_data *) adv;
1265
1266
0
    ad->region.l = 0;
1267
0
    return kputsn(region->s, region->l, &ad->region) < 0;
1268
0
}
1269
1270
static int http_status_errno(int status)
1271
0
{
1272
0
    if (status >= 500)
1273
0
        switch (status) {
1274
0
        case 501: return ENOSYS;
1275
0
        case 503: return EBUSY;
1276
0
        case 504: return ETIMEDOUT;
1277
0
        default:  return EIO;
1278
0
        }
1279
0
    else if (status >= 400)
1280
0
        switch (status) {
1281
0
        case 401: return EPERM;
1282
0
        case 403: return EACCES;
1283
0
        case 404: return ENOENT;
1284
0
        case 405: return EROFS;
1285
0
        case 407: return EPERM;
1286
0
        case 408: return ETIMEDOUT;
1287
0
        case 410: return ENOENT;
1288
0
        default:  return EINVAL;
1289
0
        }
1290
0
    else return 0;
1291
0
}
1292
1293
459
static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
1294
459
    kstring_t url = { 0, 0, NULL };
1295
1296
459
    s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
1297
459
    hFILE *fp = NULL;
1298
1299
459
    if (ad == NULL) {
1300
0
        return NULL;
1301
0
    }
1302
1303
459
    if (ad->mode == 'r') {
1304
459
        long http_response = 0;
1305
1306
459
        fp = hopen(url.s, mode, "va_list", argsp,
1307
459
                   "httphdr_callback", v4_auth_header_callback,
1308
459
                   "httphdr_callback_data", ad,
1309
459
                   "redirect_callback", redirect_endpoint_callback,
1310
459
                   "redirect_callback_data", ad,
1311
459
                   "http_response_ptr", &http_response,
1312
459
                   "fail_on_error", 0,
1313
459
                   NULL);
1314
1315
459
        if (fp == NULL) goto error;
1316
1317
5
        if (http_response == 307) {
1318
            // Follow additional redirect.
1319
0
            ad->refcount = 1;
1320
0
            hclose_abruptly(fp);
1321
1322
0
            url.l  = 0;
1323
0
            ksprintf(&url, "https://%s%s", ad->host.s, ad->bucket);
1324
1325
0
            fp = hopen(url.s, mode, "va_list", argsp,
1326
0
                   "httphdr_callback", v4_auth_header_callback,
1327
0
                   "httphdr_callback_data", ad,
1328
0
                   "redirect_callback", redirect_endpoint_callback,
1329
0
                   "redirect_callback_data", ad,
1330
0
                   "http_response_ptr", &http_response,
1331
0
                   "fail_on_error", 0,
1332
0
                   NULL);
1333
0
        }
1334
1335
5
        if (http_response == 400) {
1336
5
            ad->refcount = 1;
1337
5
            if (handle_400_response(fp, ad) != 0) {
1338
5
                goto error;
1339
5
            }
1340
0
            hclose_abruptly(fp);
1341
0
            fp = hopen(url.s, mode, "va_list", argsp,
1342
0
                       "httphdr_callback", v4_auth_header_callback,
1343
0
                       "httphdr_callback_data", ad,
1344
0
                       "redirect_callback", redirect_endpoint_callback,
1345
0
                       "redirect_callback_data", ad,
1346
0
                       NULL);
1347
0
        } else if (http_response > 400) {
1348
0
            ad->refcount = 1;
1349
0
            errno = http_status_errno(http_response);
1350
0
            goto error;
1351
0
        }
1352
1353
0
        if (fp == NULL) goto error;
1354
0
    } else {
1355
0
        kstring_t final_url = KS_INITIALIZE;
1356
1357
         // add the scheme marker
1358
0
        ksprintf(&final_url, "s3w+%s", url.s);
1359
1360
0
        if(final_url.l == 0) goto error;
1361
1362
0
        fp = hopen(final_url.s, mode, "va_list", argsp,
1363
0
                   "s3_auth_callback",  write_authorisation_callback,
1364
0
                   "s3_auth_callback_data", ad,
1365
0
                   "redirect_callback", redirect_endpoint_callback,
1366
0
                   "set_region_callback", set_region,
1367
0
                   NULL);
1368
0
        free(final_url.s);
1369
1370
0
        if (fp == NULL) goto error;
1371
0
    }
1372
1373
0
    free(url.s);
1374
1375
0
    return fp;
1376
1377
459
  error:
1378
1379
459
    if (fp) hclose_abruptly(fp);
1380
459
    free(url.s);
1381
459
    free_auth_data(ad);
1382
1383
459
    return NULL;
1384
459
}
1385
1386
1387
static hFILE *s3_open(const char *url, const char *mode)
1388
459
{
1389
459
    hFILE *fp;
1390
1391
459
    kstring_t mode_colon = { 0, 0, NULL };
1392
459
    kputs(mode, &mode_colon);
1393
459
    kputc(':', &mode_colon);
1394
1395
459
    if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1396
459
        fp = s3_open_v4(url, mode_colon.s, NULL);
1397
459
    } else {
1398
0
        fp = s3_rewrite(url, mode_colon.s, NULL);
1399
0
    }
1400
1401
459
    free(mode_colon.s);
1402
1403
459
    return fp;
1404
459
}
1405
1406
static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
1407
0
{
1408
0
    hFILE *fp;
1409
    // Need to use va_copy() as we can only take the address of an actual
1410
    // va_list object, not that of a parameter whose type may have decayed.
1411
0
    va_list args;
1412
0
    va_copy(args, args0);
1413
1414
0
    if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1415
0
        fp = s3_open_v4(url, mode_colon, &args);
1416
0
    } else {
1417
0
        fp = s3_rewrite(url, mode_colon, &args);
1418
0
    }
1419
1420
0
    va_end(args);
1421
0
    return fp;
1422
0
}
1423
1424
int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
1425
1
{
1426
1
    static const struct hFILE_scheme_handler handler =
1427
1
        { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
1428
1
        };
1429
1430
#ifdef ENABLE_PLUGINS
1431
    // Embed version string for examination via strings(1) or what(1)
1432
    static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
1433
    if (hts_verbose >= 9)
1434
        fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
1435
#endif
1436
1437
1
    self->name = "Amazon S3";
1438
1
    hfile_add_scheme_handler("s3", &handler);
1439
1
    hfile_add_scheme_handler("s3+http", &handler);
1440
1
    hfile_add_scheme_handler("s3+https", &handler);
1441
1
    return 0;
1442
1
}