Coverage Report

Created: 2023-06-07 06:43

/src/htslib/hfile_s3.c
Line
Count
Source (jump to first uncovered line)
1
/*  hfile_s3.c -- Amazon S3 backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2022 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdio.h>
30
#include <stdlib.h>
31
#include <string.h>
32
#include <strings.h>
33
#include <time.h>
34
35
#include <errno.h>
36
37
#include "hfile_internal.h"
38
#ifdef ENABLE_PLUGINS
39
#include "version.h"
40
#endif
41
#include "htslib/hts.h"  // for hts_version() and hts_verbose
42
#include "htslib/kstring.h"
43
#include "hts_time_funcs.h"
44
45
typedef struct s3_auth_data {
46
    kstring_t id;
47
    kstring_t token;
48
    kstring_t secret;
49
    kstring_t region;
50
    kstring_t canonical_query_string;
51
    kstring_t user_query_string;
52
    kstring_t host;
53
    kstring_t profile;
54
    time_t creds_expiry_time;
55
    char *bucket;
56
    kstring_t auth_hdr;
57
    time_t auth_time;
58
    char date[40];
59
    char date_long[17];
60
    char date_short[9];
61
    kstring_t date_html;
62
    char mode;
63
    char *headers[5];
64
    int refcount;
65
} s3_auth_data;
66
67
0
#define AUTH_LIFETIME 60  // Regenerate auth headers if older than this
68
0
#define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials
69
70
#if defined HAVE_COMMONCRYPTO
71
72
#include <CommonCrypto/CommonHMAC.h>
73
74
#define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH
75
#define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH
76
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
77
78
static size_t
79
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
80
{
81
    CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest);
82
    return CC_SHA1_DIGEST_LENGTH;
83
}
84
85
86
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
87
    CC_SHA256(in, length, out);
88
}
89
90
91
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
92
    CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md);
93
    *md_len = CC_SHA256_DIGEST_LENGTH;
94
}
95
96
97
#elif defined HAVE_HMAC
98
99
#include <openssl/hmac.h>
100
#include <openssl/sha.h>
101
102
#define DIGEST_BUFSIZ EVP_MAX_MD_SIZE
103
0
#define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH
104
0
#define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1
105
106
static size_t
107
s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message)
108
0
{
109
0
    unsigned int len;
110
0
    HMAC(EVP_sha1(), key->s, key->l,
111
0
         (unsigned char *) message->s, message->l, digest, &len);
112
0
    return len;
113
0
}
114
115
116
0
static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) {
117
0
    SHA256(in, length, out);
118
0
}
119
120
121
0
static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) {
122
0
    HMAC(EVP_sha256(), key, key_len, d, n, md, md_len);
123
0
}
124
125
#else
126
#error No HMAC() routine found by configure
127
#endif
128
129
static void
130
urldecode_kput(const char *s, int len, kstring_t *str)
131
0
{
132
0
    char buf[3];
133
0
    int i = 0;
134
135
0
    while (i < len)
136
0
        if (s[i] == '%' && i+2 < len) {
137
0
            buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0';
138
0
            kputc(strtol(buf, NULL, 16), str);
139
0
            i += 3;
140
0
        }
141
0
        else kputc(s[i++], str);
142
0
}
143
144
static void base64_kput(const unsigned char *data, size_t len, kstring_t *str)
145
0
{
146
0
    static const char base64[] =
147
0
        "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
148
149
0
    size_t i = 0;
150
0
    unsigned x = 0;
151
0
    int bits = 0, pad = 0;
152
153
0
    while (bits || i < len) {
154
0
        if (bits < 6) {
155
0
            x <<= 8, bits += 8;
156
0
            if (i < len) x |= data[i++];
157
0
            else pad++;
158
0
        }
159
160
0
        bits -= 6;
161
0
        kputc(base64[(x >> bits) & 63], str);
162
0
    }
163
164
0
    str->l -= pad;
165
0
    kputsn("==", pad, str);
166
0
}
167
168
static int is_dns_compliant(const char *s0, const char *slim, int is_https)
169
0
{
170
0
    int has_nondigit = 0, len = 0;
171
0
    const char *s;
172
173
0
    for (s = s0; s < slim; len++, s++)
174
0
        if (islower_c(*s))
175
0
            has_nondigit = 1;
176
0
        else if (*s == '-') {
177
0
            has_nondigit = 1;
178
0
            if (s == s0 || s+1 == slim) return 0;
179
0
        }
180
0
        else if (isdigit_c(*s))
181
0
            ;
182
0
        else if (*s == '.') {
183
0
            if (is_https) return 0;
184
0
            if (s == s0 || ! isalnum_c(s[-1])) return 0;
185
0
            if (s+1 == slim || ! isalnum_c(s[1])) return 0;
186
0
        }
187
0
        else return 0;
188
189
0
    return has_nondigit && len >= 3 && len <= 63;
190
0
}
191
192
static FILE *expand_tilde_open(const char *fname, const char *mode)
193
0
{
194
0
    FILE *fp;
195
196
0
    if (strncmp(fname, "~/", 2) == 0) {
197
0
        kstring_t full_fname = { 0, 0, NULL };
198
0
        const char *home = getenv("HOME");
199
0
        if (! home) return NULL;
200
201
0
        kputs(home, &full_fname);
202
0
        kputs(&fname[1], &full_fname);
203
204
0
        fp = fopen(full_fname.s, mode);
205
0
        free(full_fname.s);
206
0
    }
207
0
    else
208
0
        fp = fopen(fname, mode);
209
210
0
    return fp;
211
0
}
212
213
static void parse_ini(const char *fname, const char *section, ...)
214
0
{
215
0
    kstring_t line = { 0, 0, NULL };
216
0
    int active = 1;  // Start active, so global properties are accepted
217
0
    char *s;
218
219
0
    FILE *fp = expand_tilde_open(fname, "r");
220
0
    if (fp == NULL) return;
221
222
0
    while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0)
223
0
        if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) {
224
0
            *s = '\0';
225
0
            active = (strcmp(&line.s[1], section) == 0);
226
0
        }
227
0
        else if (active && (s = strpbrk(line.s, ":=")) != NULL) {
228
0
            const char *key = line.s, *value = &s[1], *akey;
229
0
            va_list args;
230
231
0
            while (isspace_c(*key)) key++;
232
0
            while (s > key && isspace_c(s[-1])) s--;
233
0
            *s = '\0';
234
235
0
            while (isspace_c(*value)) value++;
236
0
            while (line.l > 0 && isspace_c(line.s[line.l-1]))
237
0
                line.s[--line.l] = '\0';
238
239
0
            va_start(args, section);
240
0
            while ((akey = va_arg(args, const char *)) != NULL) {
241
0
                kstring_t *avar = va_arg(args, kstring_t *);
242
0
                if (strcmp(key, akey) == 0) {
243
0
                    avar->l = 0;
244
0
                    kputs(value, avar);
245
0
                    break; }
246
0
            }
247
0
            va_end(args);
248
0
        }
249
250
0
    fclose(fp);
251
0
    free(line.s);
252
0
}
253
254
static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret)
255
0
{
256
0
    kstring_t text = { 0, 0, NULL };
257
0
    char *s;
258
0
    size_t len;
259
260
0
    FILE *fp = expand_tilde_open(fname, "r");
261
0
    if (fp == NULL) return;
262
263
0
    while (kgetline(&text, (kgets_func *) fgets, fp) >= 0)
264
0
        kputc(' ', &text);
265
0
    fclose(fp);
266
267
0
    s = text.s;
268
0
    while (isspace_c(*s)) s++;
269
0
    kputsn(s, len = strcspn(s, " \t"), id);
270
271
0
    s += len;
272
0
    while (isspace_c(*s)) s++;
273
0
    kputsn(s, strcspn(s, " \t"), secret);
274
275
0
    free(text.s);
276
0
}
277
278
0
static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) {
279
0
    char **hdr = &ad->headers[0];
280
0
    int idx = 0;
281
0
    *hdrs = hdr;
282
283
0
    hdr[idx] = strdup(ad->date);
284
0
    if (!hdr[idx]) return -1;
285
0
    idx++;
286
287
0
    if (ad->token.l) {
288
0
        kstring_t token_hdr = KS_INITIALIZE;
289
0
        kputs("X-Amz-Security-Token: ", &token_hdr);
290
0
        kputs(ad->token.s, &token_hdr);
291
0
        if (token_hdr.s) {
292
0
            hdr[idx++] = token_hdr.s;
293
0
        } else {
294
0
            goto fail;
295
0
        }
296
0
    }
297
298
0
    if (ad->auth_hdr.l) {
299
0
        hdr[idx] = strdup(ad->auth_hdr.s);
300
0
        if (!hdr[idx]) goto fail;
301
0
        idx++;
302
0
    }
303
304
0
    hdr[idx] = NULL;
305
0
    return 0;
306
307
0
 fail:
308
0
    for (--idx; idx >= 0; --idx)
309
0
        free(hdr[idx]);
310
0
    return -1;
311
0
}
312
313
0
static void free_auth_data(s3_auth_data *ad) {
314
0
    if (ad->refcount > 0) {
315
0
        --ad->refcount;
316
0
        return;
317
0
    }
318
0
    free(ad->profile.s);
319
0
    free(ad->id.s);
320
0
    free(ad->token.s);
321
0
    free(ad->secret.s);
322
0
    free(ad->region.s);
323
0
    free(ad->canonical_query_string.s);
324
0
    free(ad->user_query_string.s);
325
0
    free(ad->host.s);
326
0
    free(ad->bucket);
327
0
    free(ad->auth_hdr.s);
328
0
    free(ad->date_html.s);
329
0
    free(ad);
330
0
}
331
332
static time_t parse_rfc3339_date(kstring_t *datetime)
333
0
{
334
0
    int offset = 0;
335
0
    time_t when;
336
0
    int num;
337
0
    char should_be_t = '\0', timezone[10] = { '\0' };
338
0
    unsigned int year, mon, day, hour, min, sec;
339
340
0
    if (!datetime->s)
341
0
        return 0;
342
343
    // It should be possible to do this with strptime(), but it seems
344
    // to not get on with our feature definitions.
345
0
    num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s",
346
0
                 &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone);
347
0
    if (num < 8)
348
0
        return 0;
349
0
    if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ')
350
0
        return 0;
351
0
    struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 };
352
353
0
    switch (timezone[0]) {
354
0
      case 'Z':
355
0
      case 'z':
356
0
      case '\0':
357
0
          break;
358
0
      case '+':
359
0
      case '-': {
360
0
          unsigned hr_off, min_off;
361
0
          if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) {
362
0
              if (hr_off < 24 && min_off <= 60) {
363
0
                  offset = ((hr_off * 60 + min_off)
364
0
                            * (timezone[0] == '+' ? -60 : 60));
365
0
              }
366
0
          }
367
0
          break;
368
0
      }
369
0
      default:
370
0
          return 0;
371
0
    }
372
373
0
    when = hts_time_gm(&parsed);
374
0
    return when >= 0 ? when + offset : 0;
375
0
}
376
377
0
static void refresh_auth_data(s3_auth_data *ad) {
378
    // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of
379
    // setup_auth_data(), but this only reads the authorisation parts.
380
0
    const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
381
0
    kstring_t expiry_time = KS_INITIALIZE;
382
0
    parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
383
0
              "aws_access_key_id", &ad->id,
384
0
              "aws_secret_access_key", &ad->secret,
385
0
              "aws_session_token", &ad->token,
386
0
              "expiry_time", &expiry_time);
387
0
    if (expiry_time.l) {
388
0
        ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
389
0
    }
390
0
    ks_free(&expiry_time);
391
0
}
392
393
0
static int auth_header_callback(void *ctx, char ***hdrs) {
394
0
    s3_auth_data *ad = (s3_auth_data *) ctx;
395
396
0
    time_t now = time(NULL);
397
0
#ifdef HAVE_GMTIME_R
398
0
    struct tm tm_buffer;
399
0
    struct tm *tm = gmtime_r(&now, &tm_buffer);
400
#else
401
    struct tm *tm = gmtime(&now);
402
#endif
403
0
    kstring_t message = { 0, 0, NULL };
404
0
    unsigned char digest[DIGEST_BUFSIZ];
405
0
    size_t digest_len;
406
407
0
    if (!hdrs) { // Closing connection
408
0
        free_auth_data(ad);
409
0
        return 0;
410
0
    }
411
412
0
    if (ad->creds_expiry_time > 0
413
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
414
0
        refresh_auth_data(ad);
415
0
    } else if (now - ad->auth_time < AUTH_LIFETIME) {
416
        // Last auth string should still be valid
417
0
        *hdrs = NULL;
418
0
        return 0;
419
0
    }
420
421
0
    strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm);
422
0
    if (!ad->id.l || !ad->secret.l) {
423
0
        ad->auth_time = now;
424
0
        return copy_auth_headers(ad, hdrs);
425
0
    }
426
427
0
    if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s",
428
0
                 ad->mode == 'r' ? "GET" : "PUT", ad->date + 6,
429
0
                 ad->token.l ? "x-amz-security-token:" : "",
430
0
                 ad->token.l ? ad->token.s : "",
431
0
                 ad->token.l ? "\n" : "",
432
0
                 ad->bucket) < 0) {
433
0
        return -1;
434
0
    }
435
436
0
    digest_len = s3_sign(digest, &ad->secret, &message);
437
0
    ad->auth_hdr.l = 0;
438
0
    if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0)
439
0
        goto fail;
440
0
    base64_kput(digest, digest_len, &ad->auth_hdr);
441
442
0
    free(message.s);
443
0
    ad->auth_time = now;
444
0
    return copy_auth_headers(ad, hdrs);
445
446
0
 fail:
447
0
    free(message.s);
448
0
    return -1;
449
0
}
450
451
452
/* like a escape path but for query strings '=' and '&' are untouched */
453
0
static char *escape_query(const char *qs) {
454
0
    size_t i, j = 0, length, alloced;
455
0
    char *escaped;
456
457
0
    length = strlen(qs);
458
0
    alloced = length * 3 + 1;
459
0
    if ((escaped = malloc(alloced)) == NULL) {
460
0
        return NULL;
461
0
    }
462
463
0
    for (i = 0; i < length; i++) {
464
0
        int c = qs[i];
465
466
0
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
467
0
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') {
468
0
            escaped[j++] = c;
469
0
        } else {
470
0
            snprintf(escaped + j, alloced - j, "%%%02X", c);
471
0
            j += 3;
472
0
        }
473
0
    }
474
475
0
    escaped[j] = '\0';
476
477
0
    return escaped;
478
0
}
479
480
481
0
static char *escape_path(const char *path) {
482
0
    size_t i, j = 0, length, alloced;
483
0
    char *escaped;
484
485
0
    length = strlen(path);
486
0
    alloced = length * 3 + 1;
487
488
0
    if ((escaped = malloc(alloced)) == NULL) {
489
0
        return NULL;
490
0
    }
491
492
0
    for (i = 0; i < length; i++) {
493
0
        int c = path[i];
494
495
0
        if (c == '?') break; // don't escape ? or beyond
496
497
0
        if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') ||
498
0
             c == '_' || c == '-' || c == '~' || c == '.' || c == '/') {
499
0
            escaped[j++] = c;
500
0
        } else {
501
0
            snprintf(escaped + j, alloced - j, "%%%02X", c);
502
0
            j += 3;
503
0
        }
504
0
    }
505
506
0
    if (i != length) {
507
        // in the case of a '?' copy the rest of the path across unchanged
508
0
        strcpy(escaped + j, path + i);
509
0
    } else {
510
0
        escaped[j] = '\0';
511
0
    }
512
513
0
    return escaped;
514
0
}
515
516
517
0
static int is_escaped(const char *str) {
518
0
    const char *c = str;
519
0
    int escaped = 0;
520
0
    int needs_escape = 0;
521
522
0
    while (*c != '\0') {
523
0
        if (*c == '%' && c[1] != '\0' && c[2] != '\0') {
524
0
            if (isxdigit_c(c[1]) && isxdigit_c(c[2])) {
525
0
                escaped = 1;
526
0
                c += 3;
527
0
                continue;
528
0
            } else {
529
                // only escaped if all % signs are escaped
530
0
                escaped = 0;
531
0
            }
532
0
        }
533
0
        if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z')
534
0
              || (*c >= 'a' && *c <= 'z') ||
535
0
              *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) {
536
0
            needs_escape = 1;
537
0
        }
538
0
        c++;
539
0
    }
540
541
0
    return escaped || !needs_escape;
542
0
}
543
544
static int redirect_endpoint_callback(void *auth, long response,
545
0
                                      kstring_t *header, kstring_t *url) {
546
0
    s3_auth_data *ad = (s3_auth_data *)auth;
547
0
    char *new_region;
548
0
    char *end;
549
0
    int ret = -1;
550
551
    // get the new region from the reply header
552
0
    if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) {
553
554
0
        new_region += strlen("x-amz-bucket-region: ");
555
0
        end = new_region;
556
557
0
        while (isalnum_c(*end) || ispunct_c(*end)) end++;
558
559
0
        *end = 0;
560
561
0
        if (strstr(ad->host.s, "amazonaws.com")) {
562
0
            ad->region.l = 0;
563
0
            kputs(new_region, &ad->region);
564
565
0
            ad->host.l = 0;
566
0
            ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region);
567
568
0
            if (ad->region.l && ad->host.l) {
569
0
               url->l = 0;
570
0
               kputs(ad->host.s, url);
571
0
               kputsn(ad->bucket, strlen(ad->bucket), url);
572
0
               if (ad->user_query_string.l) {
573
0
                   kputc('?', url);
574
0
                   kputsn(ad->user_query_string.s, ad->user_query_string.l, url);
575
0
               }
576
0
               ret = 0;
577
0
            }
578
0
        }
579
0
    }
580
581
0
    return ret;
582
0
}
583
584
static s3_auth_data * setup_auth_data(const char *s3url, const char *mode,
585
                                      int sigver, kstring_t *url)
586
0
{
587
0
    s3_auth_data *ad = calloc(1, sizeof(*ad));
588
0
    const char *bucket, *path;
589
0
    char *escaped = NULL;
590
0
    size_t url_path_pos;
591
0
    ptrdiff_t bucket_len;
592
0
    int is_https = 1, dns_compliant;
593
0
    char *query_start;
594
0
    enum {s3_auto, s3_virtual, s3_path} address_style = s3_auto;
595
596
0
    if (!ad)
597
0
        return NULL;
598
0
    ad->mode = strchr(mode, 'r') ? 'r' : 'w';
599
600
    // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH
601
602
0
    if (s3url[2] == '+') {
603
0
        bucket = strchr(s3url, ':') + 1;
604
0
        if (bucket == NULL) {
605
0
            free(ad);
606
0
            return NULL;
607
0
        }
608
0
        kputsn(&s3url[3], bucket - &s3url[3], url);
609
0
        is_https = strncmp(url->s, "https:", 6) == 0;
610
0
    }
611
0
    else {
612
0
        kputs("https:", url);
613
0
        bucket = &s3url[3];
614
0
    }
615
0
    while (*bucket == '/') kputc(*bucket++, url);
616
617
0
    path = bucket + strcspn(bucket, "/?#@");
618
619
0
    if (*path == '@') {
620
0
        const char *colon = strpbrk(bucket, ":@");
621
0
        if (*colon != ':') {
622
0
            urldecode_kput(bucket, colon - bucket, &ad->profile);
623
0
        }
624
0
        else {
625
0
            const char *colon2 = strpbrk(&colon[1], ":@");
626
0
            urldecode_kput(bucket, colon - bucket, &ad->id);
627
0
            urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret);
628
0
            if (*colon2 == ':')
629
0
                urldecode_kput(&colon2[1], path - &colon2[1], &ad->token);
630
0
        }
631
632
0
        bucket = &path[1];
633
0
        path = bucket + strcspn(bucket, "/?#");
634
0
    }
635
0
    else {
636
        // If the URL has no ID[:SECRET]@, consider environment variables.
637
0
        const char *v;
638
0
        if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id);
639
0
        if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret);
640
0
        if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token);
641
0
        if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region);
642
0
        if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host);
643
644
0
        if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile);
645
0
        else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile);
646
0
        else kputs("default", &ad->profile);
647
648
0
        if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) {
649
0
            if (strcasecmp(v, "virtual") == 0) {
650
0
                address_style = s3_virtual;
651
0
            } else if (strcasecmp(v, "path") == 0) {
652
0
                address_style = s3_path;
653
0
            }
654
0
        }
655
0
    }
656
657
0
    if (ad->id.l == 0) {
658
0
        kstring_t url_style = KS_INITIALIZE;
659
0
        kstring_t expiry_time = KS_INITIALIZE;
660
0
        const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE");
661
0
        parse_ini(v? v : "~/.aws/credentials", ad->profile.s,
662
0
                  "aws_access_key_id", &ad->id,
663
0
                  "aws_secret_access_key", &ad->secret,
664
0
                  "aws_session_token", &ad->token,
665
0
                  "region", &ad->region,
666
0
                  "addressing_style", &url_style,
667
0
                  "expiry_time", &expiry_time,
668
0
                  NULL);
669
670
0
        if (url_style.l) {
671
0
            if (strcmp(url_style.s, "virtual") == 0) {
672
0
                address_style = s3_virtual;
673
0
            } else if (strcmp(url_style.s, "path") == 0) {
674
0
                address_style = s3_path;
675
0
            } else {
676
0
                address_style = s3_auto;
677
0
            }
678
0
        }
679
0
        if (expiry_time.l) {
680
            // Not a real part of the AWS configuration file, but it allows
681
            // support for short-term credentials like those for the IAM
682
            // service.  The botocore library uses the key "expiry_time"
683
            // internally for this purpose.
684
            // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py
685
0
            ad->creds_expiry_time = parse_rfc3339_date(&expiry_time);
686
0
        }
687
688
0
        ks_free(&url_style);
689
0
        ks_free(&expiry_time);
690
0
    }
691
692
0
    if (ad->id.l == 0) {
693
0
        kstring_t url_style = KS_INITIALIZE;
694
0
        const char *v = getenv("HTS_S3_S3CFG");
695
0
        parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id,
696
0
                  "secret_key", &ad->secret, "access_token", &ad->token,
697
0
                  "host_base", &ad->host,
698
0
                  "bucket_location", &ad->region,
699
0
                  "host_bucket", &url_style,
700
0
                  NULL);
701
702
0
        if (url_style.l) {
703
            // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string
704
            // indicates use of path style adressing.
705
0
            if (strstr(url_style.s, "%(bucket)s") == NULL) {
706
0
                address_style = s3_path;
707
0
            } else {
708
0
                address_style = s3_auto;
709
0
            }
710
0
        }
711
712
0
        ks_free(&url_style);
713
0
    }
714
715
0
    if (ad->id.l == 0)
716
0
        parse_simple("~/.awssecret", &ad->id, &ad->secret);
717
718
719
    // if address_style is set, force the dns_compliant setting
720
0
    if (address_style == s3_virtual) {
721
0
        dns_compliant = 1;
722
0
    } else if (address_style == s3_path) {
723
0
        dns_compliant = 0;
724
0
    } else {
725
0
        dns_compliant = is_dns_compliant(bucket, path, is_https);
726
0
    }
727
728
0
    if (ad->host.l == 0)
729
0
        kputs("s3.amazonaws.com", &ad->host);
730
731
0
    if (!dns_compliant && ad->region.l > 0
732
0
        && strcmp(ad->host.s, "s3.amazonaws.com") == 0) {
733
        // Can avoid a redirection by including the region in the host name
734
        // (assuming the right one has been specified)
735
0
        ad->host.l = 0;
736
0
        ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s);
737
0
    }
738
739
0
    if (ad->region.l == 0)
740
0
        kputs("us-east-1", &ad->region);
741
742
0
    if (!is_escaped(path)) {
743
0
        escaped = escape_path(path);
744
0
        if (escaped == NULL) {
745
0
            goto error;
746
0
        }
747
0
    }
748
749
0
    bucket_len = path - bucket;
750
751
    // Use virtual hosted-style access if possible, otherwise path-style.
752
0
    if (dns_compliant) {
753
0
        size_t url_host_pos = url->l;
754
        // Append "bucket.host" to url
755
0
        kputsn_(bucket, bucket_len, url);
756
0
        kputc('.', url);
757
0
        kputsn(ad->host.s, ad->host.l, url);
758
0
        url_path_pos = url->l;
759
760
0
        if (sigver == 4) {
761
            // Copy back to ad->host to use when making the signature
762
0
            ad->host.l = 0;
763
0
            kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host);
764
0
        }
765
0
    }
766
0
    else {
767
        // Append "host/bucket" to url
768
0
        kputsn(ad->host.s, ad->host.l, url);
769
0
        url_path_pos = url->l;
770
0
        kputc('/', url);
771
0
        kputsn(bucket, bucket_len, url);
772
0
    }
773
774
0
    kputs(escaped == NULL ? path : escaped, url);
775
776
0
    if (sigver == 4 || !dns_compliant) {
777
0
        ad->bucket = malloc(url->l - url_path_pos + 1);
778
0
        if (ad->bucket == NULL) {
779
0
            goto error;
780
0
        }
781
0
        memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1);
782
0
    }
783
0
    else {
784
0
        ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2);
785
0
        if (ad->bucket == NULL) {
786
0
            goto error;
787
0
        }
788
0
        ad->bucket[0] = '/';
789
0
        memcpy(ad->bucket + 1, bucket, bucket_len);
790
0
        memcpy(ad->bucket + bucket_len + 1,
791
0
               url->s + url_path_pos, url->l - url_path_pos + 1);
792
0
    }
793
794
    // write any query strings to its own place to use later
795
0
    if ((query_start = strchr(ad->bucket, '?'))) {
796
0
        kputs(query_start + 1, &ad->user_query_string);
797
0
        *query_start = 0;
798
0
    }
799
800
0
    free(escaped);
801
802
0
    return ad;
803
804
0
 error:
805
0
    free(escaped);
806
0
    free_auth_data(ad);
807
0
    return NULL;
808
0
}
809
810
static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp)
811
0
{
812
0
    kstring_t url = { 0, 0, NULL };
813
0
    s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url);
814
815
0
    if (!ad)
816
0
        return NULL;
817
818
0
    hFILE *fp = hopen(url.s, mode, "va_list", argsp,
819
0
                      "httphdr_callback", auth_header_callback,
820
0
                      "httphdr_callback_data", ad,
821
0
                      "redirect_callback", redirect_endpoint_callback,
822
0
                      "redirect_callback_data", ad,
823
0
                      NULL);
824
0
    if (!fp) goto fail;
825
826
0
    free(url.s);
827
0
    return fp;
828
829
0
 fail:
830
0
    free(url.s);
831
0
    free_auth_data(ad);
832
0
    return NULL;
833
0
}
834
835
/***************************************************************
836
837
AWS S3 sig version 4 writing code
838
839
****************************************************************/
840
841
0
static void hash_string(char *in, size_t length, char *out, size_t out_len) {
842
0
    unsigned char hashed[SHA256_DIGEST_BUFSIZE];
843
0
    int i, j;
844
845
0
    s3_sha256((const unsigned char *)in, length, hashed);
846
847
0
    for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) {
848
0
        snprintf(out + j, out_len - j, "%02x", hashed[i]);
849
0
    }
850
0
}
851
852
0
static void ksinit(kstring_t *s) {
853
0
    s->l = 0;
854
0
    s->m = 0;
855
0
    s->s = NULL;
856
0
}
857
858
859
0
static void ksfree(kstring_t *s) {
860
0
    free(s->s);
861
0
    ksinit(s);
862
0
}
863
864
865
0
static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) {
866
0
    unsigned char date_key[SHA256_DIGEST_BUFSIZE];
867
0
    unsigned char date_region_key[SHA256_DIGEST_BUFSIZE];
868
0
    unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE];
869
0
    unsigned char signing_key[SHA256_DIGEST_BUFSIZE];
870
0
    unsigned char signature[SHA256_DIGEST_BUFSIZE];
871
872
0
    const unsigned char service[] = "s3";
873
0
    const unsigned char request[] = "aws4_request";
874
875
0
    kstring_t secret_access_key = {0, 0, NULL};
876
0
    unsigned int len;
877
0
    unsigned int i, j;
878
879
0
    ksprintf(&secret_access_key, "AWS4%s", ad->secret.s);
880
881
0
    if (secret_access_key.l == 0) {
882
0
        return -1;
883
0
    }
884
885
0
    s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len);
886
0
    s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len);
887
0
    s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len);
888
0
    s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len);
889
0
    s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len);
890
891
0
    for (i = 0, j = 0; i < len; i++, j+= 2) {
892
0
        snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]);
893
0
    }
894
895
0
    ksfree(&secret_access_key);
896
897
0
    return 0;
898
0
}
899
900
901
0
static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) {
902
0
    kstring_t signed_headers = {0, 0, NULL};
903
0
    kstring_t canonical_headers = {0, 0, NULL};
904
0
    kstring_t canonical_request = {0, 0, NULL};
905
0
    kstring_t scope = {0, 0, NULL};
906
0
    kstring_t string_to_sign = {0, 0, NULL};
907
0
    char cr_hash[HASH_LENGTH_SHA256];
908
0
    char signature_string[HASH_LENGTH_SHA256];
909
0
    int ret = -1;
910
911
912
0
    if (!ad->token.l) {
913
0
        kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers);
914
0
    } else {
915
0
        kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers);
916
0
    }
917
918
0
    if (signed_headers.l == 0) {
919
0
        return -1;
920
0
    }
921
922
923
0
    if (!ad->token.l) {
924
0
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n",
925
0
        ad->host.s, content, ad->date_long);
926
0
    } else {
927
0
        ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n",
928
0
        ad->host.s, content, ad->date_long, ad->token.s);
929
0
    }
930
931
0
    if (canonical_headers.l == 0) {
932
0
        goto cleanup;
933
0
    }
934
935
    // bucket == canonical_uri
936
0
    ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s",
937
0
        http_request, ad->bucket, ad->canonical_query_string.s,
938
0
        canonical_headers.s, signed_headers.s, content);
939
940
0
    if (canonical_request.l == 0) {
941
0
        goto cleanup;
942
0
    }
943
944
0
    hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash));
945
946
0
    ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s);
947
948
0
    if (scope.l == 0) {
949
0
        goto cleanup;
950
0
    }
951
952
0
    ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash);
953
954
0
    if (string_to_sign.l == 0) {
955
0
        goto cleanup;
956
0
    }
957
958
0
    if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) {
959
0
        goto cleanup;
960
0
    }
961
962
0
    ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s",
963
0
                ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string);
964
965
0
    if (auth->l == 0) {
966
0
        goto cleanup;
967
0
    }
968
969
0
    ret = 0;
970
971
0
 cleanup:
972
0
    ksfree(&signed_headers);
973
0
    ksfree(&canonical_headers);
974
0
    ksfree(&canonical_request);
975
0
    ksfree(&scope);
976
0
    ksfree(&string_to_sign);
977
978
0
    return ret;
979
0
}
980
981
982
0
static int update_time(s3_auth_data *ad, time_t now) {
983
0
    int ret = -1;
984
0
#ifdef HAVE_GMTIME_R
985
0
    struct tm tm_buffer;
986
0
    struct tm *tm = gmtime_r(&now, &tm_buffer);
987
#else
988
    struct tm *tm = gmtime(&now);
989
#endif
990
991
0
    if (now - ad->auth_time > AUTH_LIFETIME) {
992
        // update timestamp
993
0
        ad->auth_time = now;
994
995
0
        if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) {
996
0
            return -1;
997
0
        }
998
999
0
        if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) {
1000
0
            return -1;;
1001
0
        }
1002
1003
0
        ad->date_html.l = 0;
1004
0
        ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long);
1005
0
    }
1006
1007
0
    if (ad->date_html.l) ret = 0;
1008
1009
0
    return ret;
1010
0
}
1011
1012
1013
0
static int query_cmp(const void *p1, const void *p2) {
1014
0
    char **q1 = (char **)p1;
1015
0
    char **q2 = (char **)p2;
1016
1017
0
    return strcmp(*q1, *q2);
1018
0
}
1019
1020
1021
/* Query strings must be in alphabetical order for authorisation */
1022
1023
0
static int order_query_string(kstring_t *qs) {
1024
0
    int *query_offset = NULL;
1025
0
    int num_queries, i;
1026
0
    char **queries = NULL;
1027
0
    kstring_t ordered = {0, 0, NULL};
1028
0
    char *escaped = NULL;
1029
0
    int ret = -1;
1030
1031
0
    if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) {
1032
0
        return -1;
1033
0
    }
1034
1035
0
    if ((queries = malloc(num_queries * sizeof(char*))) == NULL)
1036
0
        goto err;
1037
1038
0
    for (i = 0; i < num_queries; i++) {
1039
0
        queries[i] = qs->s + query_offset[i];
1040
0
    }
1041
1042
0
    qsort(queries, num_queries, sizeof(char *), query_cmp);
1043
1044
0
    for (i = 0; i < num_queries; i++) {
1045
0
        if (i) {
1046
0
            kputs("&", &ordered);
1047
0
        }
1048
1049
0
        kputs(queries[i], &ordered);
1050
0
    }
1051
1052
0
    if ((escaped = escape_query(ordered.s)) == NULL)
1053
0
        goto err;
1054
1055
0
    qs->l = 0;
1056
0
    kputs(escaped, qs);
1057
1058
0
    ret = 0;
1059
0
 err:
1060
0
    free(ordered.s);
1061
0
    free(queries);
1062
0
    free(query_offset);
1063
0
    free(escaped);
1064
1065
0
    return ret;
1066
0
}
1067
1068
1069
static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs,
1070
                                        kstring_t *hash, kstring_t *auth_str, kstring_t *date,
1071
0
                                        kstring_t *token, int uqs) {
1072
0
    s3_auth_data *ad = (s3_auth_data *)auth;
1073
0
    char content_hash[HASH_LENGTH_SHA256];
1074
0
    time_t now;
1075
1076
0
    if (request == NULL) {
1077
        // signal to free auth data
1078
0
        free_auth_data(ad);
1079
0
        return 0;
1080
0
    }
1081
1082
0
    now = time(NULL);
1083
1084
0
    if (update_time(ad, now)) {
1085
0
        return -1;
1086
0
    }
1087
0
    if (ad->creds_expiry_time > 0
1088
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1089
0
        refresh_auth_data(ad);
1090
0
    }
1091
1092
0
    if (content) {
1093
0
        hash_string(content->s, content->l, content_hash, sizeof(content_hash));
1094
0
    } else {
1095
        // empty hash
1096
0
        hash_string("", 0, content_hash, sizeof(content_hash));
1097
0
    }
1098
1099
0
    ad->canonical_query_string.l = 0;
1100
0
    kputs(cqs, &ad->canonical_query_string);
1101
1102
0
    if (ad->canonical_query_string.l == 0) {
1103
0
        return -1;
1104
0
    }
1105
1106
    /* add a user provided query string, normally only useful on upload initiation */
1107
0
    if (uqs) {
1108
0
        kputs("&", &ad->canonical_query_string);
1109
0
        kputs(ad->user_query_string.s, &ad->canonical_query_string);
1110
1111
0
        if (order_query_string(&ad->canonical_query_string)) {
1112
0
            return -1;
1113
0
        }
1114
0
    }
1115
1116
0
    if (make_authorisation(ad, request, content_hash, auth_str)) {
1117
0
        return -1;
1118
0
    }
1119
1120
0
    kputs(ad->date_html.s, date);
1121
0
    kputsn(content_hash, HASH_LENGTH_SHA256, hash);
1122
1123
0
    if (date->l == 0 || hash->l == 0) {
1124
0
        return -1;
1125
0
    }
1126
1127
0
    if (ad->token.l) {
1128
0
        ksprintf(token, "x-amz-security-token: %s", ad->token.s);
1129
0
    }
1130
1131
0
    return 0;
1132
0
}
1133
1134
1135
0
static int v4_auth_header_callback(void *ctx, char ***hdrs) {
1136
0
    s3_auth_data *ad = (s3_auth_data *) ctx;
1137
0
    char content_hash[HASH_LENGTH_SHA256];
1138
0
    kstring_t content = KS_INITIALIZE;
1139
0
    kstring_t authorisation = KS_INITIALIZE;
1140
0
    kstring_t token_hdr = KS_INITIALIZE;
1141
0
    char *date_html = NULL;
1142
0
    time_t now;
1143
0
    int idx;
1144
1145
0
    if (!hdrs) { // Closing connection
1146
0
        free_auth_data(ad);
1147
0
        return 0;
1148
0
    }
1149
1150
0
    now = time(NULL);
1151
1152
0
    if (update_time(ad, now)) {
1153
0
        return -1;
1154
0
    }
1155
1156
0
    if (ad->creds_expiry_time > 0
1157
0
        && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) {
1158
0
        refresh_auth_data(ad);
1159
0
    }
1160
1161
0
    if (!ad->id.l || !ad->secret.l) {
1162
0
        return copy_auth_headers(ad, hdrs);
1163
0
    }
1164
1165
0
    hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash
1166
1167
0
    ad->canonical_query_string.l = 0;
1168
1169
0
    if (ad->user_query_string.l > 0) {
1170
0
        kputs(ad->user_query_string.s, &ad->canonical_query_string);
1171
1172
0
        if (order_query_string(&ad->canonical_query_string)) {
1173
0
            return -1;
1174
0
        }
1175
0
    } else {
1176
0
        kputs("", &ad->canonical_query_string);
1177
0
    }
1178
1179
0
    if (make_authorisation(ad, "GET", content_hash, &authorisation)) {
1180
0
        return -1;
1181
0
    }
1182
1183
0
    ksprintf(&content, "x-amz-content-sha256: %s", content_hash);
1184
0
    date_html = strdup(ad->date_html.s);
1185
1186
0
    if (ad->token.l > 0) {
1187
0
        kputs("X-Amz-Security-Token: ", &token_hdr);
1188
0
        kputs(ad->token.s, &token_hdr);
1189
0
    }
1190
1191
0
    if (content.l == 0 || date_html == NULL) {
1192
0
        ksfree(&authorisation);
1193
0
        ksfree(&content);
1194
0
        ksfree(&token_hdr);
1195
0
        free(date_html);
1196
0
        return -1;
1197
0
    }
1198
1199
0
    *hdrs = &ad->headers[0];
1200
0
    idx = 0;
1201
0
    ad->headers[idx++] = ks_release(&authorisation);
1202
0
    ad->headers[idx++] = date_html;
1203
0
    ad->headers[idx++] = ks_release(&content);
1204
0
    if (token_hdr.s)
1205
0
        ad->headers[idx++] = ks_release(&token_hdr);
1206
0
    ad->headers[idx++] = NULL;
1207
1208
0
    return 0;
1209
0
}
1210
1211
0
static int handle_400_response(hFILE *fp, s3_auth_data *ad) {
1212
    // v4 signatures in virtual hosted mode return 400 Bad Request if the
1213
    // wrong region is used to make the signature.  The response is an xml
1214
    // document which includes the name of the correct region.  This can
1215
    // be extracted and used to generate a corrected signature.
1216
    // As the xml is fairly simple, go with something "good enough" instead
1217
    // of trying to parse it properly.
1218
1219
0
    char buffer[1024], *region, *reg_end;
1220
0
    ssize_t bytes;
1221
1222
0
    bytes = hread(fp, buffer, sizeof(buffer) - 1);
1223
0
    if (bytes < 0) {
1224
0
        return -1;
1225
0
    }
1226
0
    buffer[bytes] = '\0';
1227
0
    region = strstr(buffer, "<Region>");
1228
0
    if (region == NULL) {
1229
0
        return -1;
1230
0
    }
1231
0
    region += 8;
1232
0
    while (isspace((unsigned char) *region)) ++region;
1233
0
    reg_end = strchr(region, '<');
1234
0
    if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) {
1235
0
        return -1;
1236
0
    }
1237
0
    while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end;
1238
0
    ad->region.l = 0;
1239
0
    kputsn(region, reg_end - region, &ad->region);
1240
0
    if (ad->region.l == 0) {
1241
0
        return -1;
1242
0
    }
1243
1244
0
    return 0;
1245
0
}
1246
1247
0
static int set_region(void *adv, kstring_t *region) {
1248
0
    s3_auth_data *ad = (s3_auth_data *) adv;
1249
1250
0
    ad->region.l = 0;
1251
0
    return kputsn(region->s, region->l, &ad->region) < 0;
1252
0
}
1253
1254
static int http_status_errno(int status)
1255
0
{
1256
0
    if (status >= 500)
1257
0
        switch (status) {
1258
0
        case 501: return ENOSYS;
1259
0
        case 503: return EBUSY;
1260
0
        case 504: return ETIMEDOUT;
1261
0
        default:  return EIO;
1262
0
        }
1263
0
    else if (status >= 400)
1264
0
        switch (status) {
1265
0
        case 401: return EPERM;
1266
0
        case 403: return EACCES;
1267
0
        case 404: return ENOENT;
1268
0
        case 405: return EROFS;
1269
0
        case 407: return EPERM;
1270
0
        case 408: return ETIMEDOUT;
1271
0
        case 410: return ENOENT;
1272
0
        default:  return EINVAL;
1273
0
        }
1274
0
    else return 0;
1275
0
}
1276
1277
0
static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) {
1278
0
    kstring_t url = { 0, 0, NULL };
1279
1280
0
    s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url);
1281
0
    hFILE *fp = NULL;
1282
1283
0
    if (ad == NULL) {
1284
0
        return NULL;
1285
0
    }
1286
1287
0
    if (ad->mode == 'r') {
1288
0
        long http_response = 0;
1289
1290
0
        fp = hopen(url.s, mode, "va_list", argsp,
1291
0
                   "httphdr_callback", v4_auth_header_callback,
1292
0
                   "httphdr_callback_data", ad,
1293
0
                   "redirect_callback", redirect_endpoint_callback,
1294
0
                   "redirect_callback_data", ad,
1295
0
                   "http_response_ptr", &http_response,
1296
0
                   "fail_on_error", 0,
1297
0
                   NULL);
1298
1299
0
        if (fp == NULL) goto error;
1300
1301
0
        if (http_response == 400) {
1302
0
            ad->refcount = 1;
1303
0
            if (handle_400_response(fp, ad) != 0) {
1304
0
                goto error;
1305
0
            }
1306
0
            hclose_abruptly(fp);
1307
0
            fp = hopen(url.s, mode, "va_list", argsp,
1308
0
                       "httphdr_callback", v4_auth_header_callback,
1309
0
                       "httphdr_callback_data", ad,
1310
0
                       "redirect_callback", redirect_endpoint_callback,
1311
0
                       "redirect_callback_data", ad,
1312
0
                       NULL);
1313
0
        } else if (http_response > 400) {
1314
0
            ad->refcount = 1;
1315
0
            errno = http_status_errno(http_response);
1316
0
            goto error;
1317
0
        }
1318
1319
0
        if (fp == NULL) goto error;
1320
0
    } else {
1321
0
        kstring_t final_url = {0, 0, NULL};
1322
1323
         // add the scheme marker
1324
0
        ksprintf(&final_url, "s3w+%s", url.s);
1325
1326
0
        if(final_url.l == 0) goto error;
1327
1328
0
        fp = hopen(final_url.s, mode, "va_list", argsp,
1329
0
                   "s3_auth_callback",  write_authorisation_callback,
1330
0
                   "s3_auth_callback_data", ad,
1331
0
                   "redirect_callback", redirect_endpoint_callback,
1332
0
                   "set_region_callback", set_region,
1333
0
                   NULL);
1334
0
        free(final_url.s);
1335
1336
0
        if (fp == NULL) goto error;
1337
0
    }
1338
1339
0
    free(url.s);
1340
1341
0
    return fp;
1342
1343
0
  error:
1344
1345
0
    if (fp) hclose_abruptly(fp);
1346
0
    free(url.s);
1347
0
    free_auth_data(ad);
1348
1349
0
    return NULL;
1350
0
}
1351
1352
1353
static hFILE *s3_open(const char *url, const char *mode)
1354
0
{
1355
0
    hFILE *fp;
1356
1357
0
    kstring_t mode_colon = { 0, 0, NULL };
1358
0
    kputs(mode, &mode_colon);
1359
0
    kputc(':', &mode_colon);
1360
1361
0
    if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1362
0
        fp = s3_open_v4(url, mode_colon.s, NULL);
1363
0
    } else {
1364
0
        fp = s3_rewrite(url, mode_colon.s, NULL);
1365
0
    }
1366
1367
0
    free(mode_colon.s);
1368
1369
0
    return fp;
1370
0
}
1371
1372
static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0)
1373
0
{
1374
0
    hFILE *fp;
1375
    // Need to use va_copy() as we can only take the address of an actual
1376
    // va_list object, not that of a parameter whose type may have decayed.
1377
0
    va_list args;
1378
0
    va_copy(args, args0);
1379
1380
0
    if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code
1381
0
        fp = s3_open_v4(url, mode_colon, &args);
1382
0
    } else {
1383
0
        fp = s3_rewrite(url, mode_colon, &args);
1384
0
    }
1385
1386
0
    va_end(args);
1387
0
    return fp;
1388
0
}
1389
1390
int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self)
1391
1
{
1392
1
    static const struct hFILE_scheme_handler handler =
1393
1
        { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen
1394
1
        };
1395
1396
#ifdef ENABLE_PLUGINS
1397
    // Embed version string for examination via strings(1) or what(1)
1398
    static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT;
1399
    if (hts_verbose >= 9)
1400
        fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1);
1401
#endif
1402
1403
1
    self->name = "Amazon S3";
1404
1
    hfile_add_scheme_handler("s3", &handler);
1405
1
    hfile_add_scheme_handler("s3+http", &handler);
1406
1
    hfile_add_scheme_handler("s3+https", &handler);
1407
1
    return 0;
1408
1
}