Line | Count | Source |
1 | | /* hfile_s3.c -- Amazon S3 backend for low-level file streams. |
2 | | |
3 | | Copyright (C) 2015-2017, 2019-2024 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
26 | | #include <config.h> |
27 | | |
28 | | #include <stdarg.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | #include <strings.h> |
33 | | #include <time.h> |
34 | | |
35 | | #include <errno.h> |
36 | | |
37 | | #include "hfile_internal.h" |
38 | | #ifdef ENABLE_PLUGINS |
39 | | #include "version.h" |
40 | | #endif |
41 | | #include "htslib/hts.h" // for hts_version() and hts_verbose |
42 | | #include "htslib/kstring.h" |
43 | | #include "hts_time_funcs.h" |
44 | | |
45 | | typedef struct s3_auth_data { |
46 | | kstring_t id; |
47 | | kstring_t token; |
48 | | kstring_t secret; |
49 | | kstring_t region; |
50 | | kstring_t canonical_query_string; |
51 | | kstring_t user_query_string; |
52 | | kstring_t host; |
53 | | kstring_t profile; |
54 | | enum {s3_auto, s3_virtual, s3_path} url_style; |
55 | | time_t creds_expiry_time; |
56 | | char *bucket; |
57 | | kstring_t auth_hdr; |
58 | | time_t auth_time; |
59 | | char date[40]; |
60 | | char date_long[17]; |
61 | | char date_short[9]; |
62 | | kstring_t date_html; |
63 | | char mode; |
64 | | char *headers[5]; |
65 | | int refcount; |
66 | | } s3_auth_data; |
67 | | |
68 | 459 | #define AUTH_LIFETIME 60 // Regenerate auth headers if older than this |
69 | 0 | #define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials |
70 | | |
71 | | #if defined HAVE_COMMONCRYPTO |
72 | | |
73 | | #include <CommonCrypto/CommonHMAC.h> |
74 | | |
75 | | #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH |
76 | | #define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH |
77 | | #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 |
78 | | |
79 | | static size_t |
80 | | s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) |
81 | | { |
82 | | CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest); |
83 | | return CC_SHA1_DIGEST_LENGTH; |
84 | | } |
85 | | |
86 | | |
87 | | static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { |
88 | | CC_SHA256(in, length, out); |
89 | | } |
90 | | |
91 | | |
92 | | static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { |
93 | | CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md); |
94 | | *md_len = CC_SHA256_DIGEST_LENGTH; |
95 | | } |
96 | | |
97 | | |
98 | | #elif defined HAVE_HMAC |
99 | | |
100 | | #include <openssl/hmac.h> |
101 | | #include <openssl/sha.h> |
102 | | |
103 | | #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE |
104 | 15.0k | #define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH |
105 | 0 | #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 |
106 | | |
107 | | static size_t |
108 | | s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) |
109 | 0 | { |
110 | 0 | unsigned int len; |
111 | 0 | HMAC(EVP_sha1(), key->s, key->l, |
112 | 0 | (unsigned char *) message->s, message->l, digest, &len); |
113 | 0 | return len; |
114 | 0 | } |
115 | | |
116 | | |
117 | 456 | static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { |
118 | 456 | SHA256(in, length, out); |
119 | 456 | } |
120 | | |
121 | | |
122 | 1.14k | static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { |
123 | 1.14k | HMAC(EVP_sha256(), key, key_len, d, n, md, md_len); |
124 | 1.14k | } |
125 | | |
126 | | #else |
127 | | #error No HMAC() routine found by configure |
128 | | #endif |
129 | | |
130 | | static void |
131 | | urldecode_kput(const char *s, int len, kstring_t *str) |
132 | 678 | { |
133 | 678 | char buf[3]; |
134 | 678 | int i = 0; |
135 | | |
136 | 85.0k | while (i < len) |
137 | 84.3k | if (s[i] == '%' && i+2 < len) { |
138 | 766 | buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0'; |
139 | 766 | kputc(strtol(buf, NULL, 16), str); |
140 | 766 | i += 3; |
141 | 766 | } |
142 | 83.5k | else kputc(s[i++], str); |
143 | 678 | } |
144 | | |
145 | | static void base64_kput(const unsigned char *data, size_t len, kstring_t *str) |
146 | 0 | { |
147 | 0 | static const char base64[] = |
148 | 0 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
149 | |
|
150 | 0 | size_t i = 0; |
151 | 0 | unsigned x = 0; |
152 | 0 | int bits = 0, pad = 0; |
153 | |
|
154 | 0 | while (bits || i < len) { |
155 | 0 | if (bits < 6) { |
156 | 0 | x <<= 8, bits += 8; |
157 | 0 | if (i < len) x |= data[i++]; |
158 | 0 | else pad++; |
159 | 0 | } |
160 | |
|
161 | 0 | bits -= 6; |
162 | 0 | kputc(base64[(x >> bits) & 63], str); |
163 | 0 | } |
164 | |
|
165 | 0 | str->l -= pad; |
166 | 0 | kputsn("==", pad, str); |
167 | 0 | } |
168 | | |
169 | | static int is_dns_compliant(const char *s0, const char *slim, int is_https) |
170 | 459 | { |
171 | 459 | int has_nondigit = 0, len = 0; |
172 | 459 | const char *s; |
173 | | |
174 | 752 | for (s = s0; s < slim; len++, s++) |
175 | 639 | if (islower_c(*s)) |
176 | 167 | has_nondigit = 1; |
177 | 472 | else if (*s == '-') { |
178 | 56 | has_nondigit = 1; |
179 | 56 | if (s == s0 || s+1 == slim) return 0; |
180 | 56 | } |
181 | 416 | else if (isdigit_c(*s)) |
182 | 84 | ; |
183 | 332 | else if (*s == '.') { |
184 | 7 | if (is_https) return 0; |
185 | 0 | if (s == s0 || ! isalnum_c(s[-1])) return 0; |
186 | 0 | if (s+1 == slim || ! isalnum_c(s[1])) return 0; |
187 | 0 | } |
188 | 325 | else return 0; |
189 | | |
190 | 113 | return has_nondigit && len >= 3 && len <= 63; |
191 | 459 | } |
192 | | |
193 | | static FILE *expand_tilde_open(const char *fname, const char *mode) |
194 | 621 | { |
195 | 621 | FILE *fp; |
196 | | |
197 | 621 | if (strncmp(fname, "~/", 2) == 0) { |
198 | 621 | kstring_t full_fname = { 0, 0, NULL }; |
199 | 621 | const char *home = getenv("HOME"); |
200 | 621 | if (! home) return NULL; |
201 | | |
202 | 621 | kputs(home, &full_fname); |
203 | 621 | kputs(&fname[1], &full_fname); |
204 | | |
205 | 621 | fp = fopen(full_fname.s, mode); |
206 | 621 | free(full_fname.s); |
207 | 621 | } |
208 | 0 | else |
209 | 0 | fp = fopen(fname, mode); |
210 | | |
211 | 621 | return fp; |
212 | 621 | } |
213 | | |
214 | | static void parse_ini(const char *fname, const char *section, ...) |
215 | 414 | { |
216 | 414 | kstring_t line = { 0, 0, NULL }; |
217 | 414 | int active = 1; // Start active, so global properties are accepted |
218 | 414 | char *s; |
219 | | |
220 | 414 | FILE *fp = expand_tilde_open(fname, "r"); |
221 | 414 | if (fp == NULL) return; |
222 | | |
223 | 0 | while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) |
224 | 0 | if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { |
225 | 0 | *s = '\0'; |
226 | 0 | active = (strcmp(&line.s[1], section) == 0); |
227 | 0 | } |
228 | 0 | else if (active && (s = strpbrk(line.s, ":=")) != NULL) { |
229 | 0 | const char *key = line.s, *value = &s[1], *akey; |
230 | 0 | va_list args; |
231 | |
|
232 | 0 | while (isspace_c(*key)) key++; |
233 | 0 | while (s > key && isspace_c(s[-1])) s--; |
234 | 0 | *s = '\0'; |
235 | |
|
236 | 0 | while (isspace_c(*value)) value++; |
237 | 0 | while (line.l > 0 && isspace_c(line.s[line.l-1])) |
238 | 0 | line.s[--line.l] = '\0'; |
239 | |
|
240 | 0 | va_start(args, section); |
241 | 0 | while ((akey = va_arg(args, const char *)) != NULL) { |
242 | 0 | kstring_t *avar = va_arg(args, kstring_t *); |
243 | 0 | if (strcmp(key, akey) == 0) { |
244 | 0 | avar->l = 0; |
245 | 0 | kputs(value, avar); |
246 | 0 | break; } |
247 | 0 | } |
248 | 0 | va_end(args); |
249 | 0 | } |
250 | |
|
251 | 0 | fclose(fp); |
252 | 0 | free(line.s); |
253 | 0 | } |
254 | | |
255 | | static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) |
256 | 207 | { |
257 | 207 | kstring_t text = { 0, 0, NULL }; |
258 | 207 | char *s; |
259 | 207 | size_t len; |
260 | | |
261 | 207 | FILE *fp = expand_tilde_open(fname, "r"); |
262 | 207 | if (fp == NULL) return; |
263 | | |
264 | 0 | while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) |
265 | 0 | kputc(' ', &text); |
266 | 0 | fclose(fp); |
267 | |
|
268 | 0 | s = text.s; |
269 | 0 | while (isspace_c(*s)) s++; |
270 | 0 | kputsn(s, len = strcspn(s, " \t"), id); |
271 | |
|
272 | 0 | s += len; |
273 | 0 | while (isspace_c(*s)) s++; |
274 | 0 | kputsn(s, strcspn(s, " \t"), secret); |
275 | |
|
276 | 0 | free(text.s); |
277 | 0 | } |
278 | | |
279 | 231 | static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) { |
280 | 231 | char **hdr = &ad->headers[0]; |
281 | 231 | int idx = 0; |
282 | 231 | *hdrs = hdr; |
283 | | |
284 | 231 | hdr[idx] = strdup(ad->date); |
285 | 231 | if (!hdr[idx]) return -1; |
286 | 231 | idx++; |
287 | | |
288 | 231 | if (ad->token.l) { |
289 | 27 | kstring_t token_hdr = KS_INITIALIZE; |
290 | 27 | kputs("X-Amz-Security-Token: ", &token_hdr); |
291 | 27 | kputs(ad->token.s, &token_hdr); |
292 | 27 | if (token_hdr.s) { |
293 | 27 | hdr[idx++] = token_hdr.s; |
294 | 27 | } else { |
295 | 0 | goto fail; |
296 | 0 | } |
297 | 27 | } |
298 | | |
299 | 231 | if (ad->auth_hdr.l) { |
300 | 0 | hdr[idx] = strdup(ad->auth_hdr.s); |
301 | 0 | if (!hdr[idx]) goto fail; |
302 | 0 | idx++; |
303 | 0 | } |
304 | | |
305 | 231 | hdr[idx] = NULL; |
306 | 231 | return 0; |
307 | | |
308 | 0 | fail: |
309 | 0 | for (--idx; idx >= 0; --idx) |
310 | 0 | free(hdr[idx]); |
311 | 0 | return -1; |
312 | 231 | } |
313 | | |
314 | 464 | static void free_auth_data(s3_auth_data *ad) { |
315 | 464 | if (ad->refcount > 0) { |
316 | 5 | --ad->refcount; |
317 | 5 | return; |
318 | 5 | } |
319 | 459 | free(ad->profile.s); |
320 | 459 | free(ad->id.s); |
321 | 459 | free(ad->token.s); |
322 | 459 | free(ad->secret.s); |
323 | 459 | free(ad->region.s); |
324 | 459 | free(ad->canonical_query_string.s); |
325 | 459 | free(ad->user_query_string.s); |
326 | 459 | free(ad->host.s); |
327 | 459 | free(ad->bucket); |
328 | 459 | free(ad->auth_hdr.s); |
329 | 459 | free(ad->date_html.s); |
330 | 459 | free(ad); |
331 | 459 | } |
332 | | |
333 | | static time_t parse_rfc3339_date(kstring_t *datetime) |
334 | 0 | { |
335 | 0 | int offset = 0; |
336 | 0 | time_t when; |
337 | 0 | int num; |
338 | 0 | char should_be_t = '\0', timezone[10] = { '\0' }; |
339 | 0 | unsigned int year, mon, day, hour, min, sec; |
340 | |
|
341 | 0 | if (!datetime->s) |
342 | 0 | return 0; |
343 | | |
344 | | // It should be possible to do this with strptime(), but it seems |
345 | | // to not get on with our feature definitions. |
346 | 0 | num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s", |
347 | 0 | &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone); |
348 | 0 | if (num < 8) |
349 | 0 | return 0; |
350 | 0 | if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ') |
351 | 0 | return 0; |
352 | 0 | struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 }; |
353 | |
|
354 | 0 | switch (timezone[0]) { |
355 | 0 | case 'Z': |
356 | 0 | case 'z': |
357 | 0 | case '\0': |
358 | 0 | break; |
359 | 0 | case '+': |
360 | 0 | case '-': { |
361 | 0 | unsigned hr_off, min_off; |
362 | 0 | if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) { |
363 | 0 | if (hr_off < 24 && min_off <= 60) { |
364 | 0 | offset = ((hr_off * 60 + min_off) |
365 | 0 | * (timezone[0] == '+' ? -60 : 60)); |
366 | 0 | } |
367 | 0 | } |
368 | 0 | break; |
369 | 0 | } |
370 | 0 | default: |
371 | 0 | return 0; |
372 | 0 | } |
373 | | |
374 | 0 | when = hts_time_gm(&parsed); |
375 | 0 | return when >= 0 ? when + offset : 0; |
376 | 0 | } |
377 | | |
378 | 0 | static void refresh_auth_data(s3_auth_data *ad) { |
379 | | // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of |
380 | | // setup_auth_data(), but this only reads the authorisation parts. |
381 | 0 | const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); |
382 | 0 | kstring_t expiry_time = KS_INITIALIZE; |
383 | 0 | parse_ini(v? v : "~/.aws/credentials", ad->profile.s, |
384 | 0 | "aws_access_key_id", &ad->id, |
385 | 0 | "aws_secret_access_key", &ad->secret, |
386 | 0 | "aws_session_token", &ad->token, |
387 | 0 | "expiry_time", &expiry_time); |
388 | 0 | if (expiry_time.l) { |
389 | 0 | ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); |
390 | 0 | } |
391 | 0 | ks_free(&expiry_time); |
392 | 0 | } |
393 | | |
394 | 0 | static int auth_header_callback(void *ctx, char ***hdrs) { |
395 | 0 | s3_auth_data *ad = (s3_auth_data *) ctx; |
396 | |
|
397 | 0 | time_t now = time(NULL); |
398 | 0 | #ifdef HAVE_GMTIME_R |
399 | 0 | struct tm tm_buffer; |
400 | 0 | struct tm *tm = gmtime_r(&now, &tm_buffer); |
401 | | #else |
402 | | struct tm *tm = gmtime(&now); |
403 | | #endif |
404 | 0 | kstring_t message = { 0, 0, NULL }; |
405 | 0 | unsigned char digest[DIGEST_BUFSIZ]; |
406 | 0 | size_t digest_len; |
407 | |
|
408 | 0 | if (!hdrs) { // Closing connection |
409 | 0 | free_auth_data(ad); |
410 | 0 | return 0; |
411 | 0 | } |
412 | | |
413 | 0 | if (ad->creds_expiry_time > 0 |
414 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
415 | 0 | refresh_auth_data(ad); |
416 | 0 | } else if (now - ad->auth_time < AUTH_LIFETIME) { |
417 | | // Last auth string should still be valid |
418 | 0 | *hdrs = NULL; |
419 | 0 | return 0; |
420 | 0 | } |
421 | | |
422 | 0 | strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm); |
423 | 0 | if (!ad->id.l || !ad->secret.l) { |
424 | 0 | ad->auth_time = now; |
425 | 0 | return copy_auth_headers(ad, hdrs); |
426 | 0 | } |
427 | | |
428 | 0 | if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s", |
429 | 0 | ad->mode == 'r' ? "GET" : "PUT", ad->date + 6, |
430 | 0 | ad->token.l ? "x-amz-security-token:" : "", |
431 | 0 | ad->token.l ? ad->token.s : "", |
432 | 0 | ad->token.l ? "\n" : "", |
433 | 0 | ad->bucket) < 0) { |
434 | 0 | return -1; |
435 | 0 | } |
436 | | |
437 | 0 | digest_len = s3_sign(digest, &ad->secret, &message); |
438 | 0 | ad->auth_hdr.l = 0; |
439 | 0 | if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0) |
440 | 0 | goto fail; |
441 | 0 | base64_kput(digest, digest_len, &ad->auth_hdr); |
442 | |
|
443 | 0 | free(message.s); |
444 | 0 | ad->auth_time = now; |
445 | 0 | return copy_auth_headers(ad, hdrs); |
446 | | |
447 | 0 | fail: |
448 | 0 | free(message.s); |
449 | 0 | return -1; |
450 | 0 | } |
451 | | |
452 | | |
453 | | /* like a escape path but for query strings '=' and '&' are untouched */ |
454 | 122 | static char *escape_query(const char *qs) { |
455 | 122 | size_t i, j = 0, length, alloced; |
456 | 122 | char *escaped; |
457 | | |
458 | 122 | length = strlen(qs); |
459 | 122 | alloced = length * 3 + 1; |
460 | 122 | if ((escaped = malloc(alloced)) == NULL) { |
461 | 0 | return NULL; |
462 | 0 | } |
463 | | |
464 | 3.54M | for (i = 0; i < length; i++) { |
465 | 3.54M | int c = qs[i]; |
466 | | |
467 | 3.54M | if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
468 | 3.01M | c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') { |
469 | 611k | escaped[j++] = c; |
470 | 2.92M | } else { |
471 | 2.92M | snprintf(escaped + j, alloced - j, "%%%02X", c); |
472 | 2.92M | j += 3; |
473 | 2.92M | } |
474 | 3.54M | } |
475 | | |
476 | 122 | escaped[j] = '\0'; |
477 | | |
478 | 122 | return escaped; |
479 | 122 | } |
480 | | |
481 | | |
482 | 174 | static char *escape_path(const char *path) { |
483 | 174 | size_t i, j = 0, length, alloced; |
484 | 174 | char *escaped; |
485 | | |
486 | 174 | length = strlen(path); |
487 | 174 | alloced = length * 3 + 1; |
488 | | |
489 | 174 | if ((escaped = malloc(alloced)) == NULL) { |
490 | 0 | return NULL; |
491 | 0 | } |
492 | | |
493 | 48.9k | for (i = 0; i < length; i++) { |
494 | 48.9k | int c = path[i]; |
495 | | |
496 | 48.9k | if (c == '?') break; // don't escape ? or beyond |
497 | | |
498 | 48.8k | if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
499 | 26.6k | c == '_' || c == '-' || c == '~' || c == '.' || c == '/') { |
500 | 25.4k | escaped[j++] = c; |
501 | 25.4k | } else { |
502 | 23.3k | snprintf(escaped + j, alloced - j, "%%%02X", c); |
503 | 23.3k | j += 3; |
504 | 23.3k | } |
505 | 48.8k | } |
506 | | |
507 | 174 | if (i != length) { |
508 | | // in the case of a '?' copy the rest of the path across unchanged |
509 | 112 | strcpy(escaped + j, path + i); |
510 | 112 | } else { |
511 | 62 | escaped[j] = '\0'; |
512 | 62 | } |
513 | | |
514 | 174 | return escaped; |
515 | 174 | } |
516 | | |
517 | | |
518 | 459 | static int is_escaped(const char *str) { |
519 | 459 | const char *c = str; |
520 | 459 | int escaped = 0; |
521 | 459 | int needs_escape = 0; |
522 | | |
523 | 3.63M | while (*c != '\0') { |
524 | 3.63M | if (*c == '%' && c[1] != '\0' && c[2] != '\0') { |
525 | 31.4k | if (isxdigit_c(c[1]) && isxdigit_c(c[2])) { |
526 | 15.5k | escaped = 1; |
527 | 15.5k | c += 3; |
528 | 15.5k | continue; |
529 | 15.8k | } else { |
530 | | // only escaped if all % signs are escaped |
531 | 15.8k | escaped = 0; |
532 | 15.8k | } |
533 | 31.4k | } |
534 | 3.61M | if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z') |
535 | 3.16M | || (*c >= 'a' && *c <= 'z') || |
536 | 3.09M | *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) { |
537 | 3.05M | needs_escape = 1; |
538 | 3.05M | } |
539 | 3.61M | c++; |
540 | 3.61M | } |
541 | | |
542 | 459 | return escaped || !needs_escape; |
543 | 459 | } |
544 | | |
545 | | static int redirect_endpoint_callback(void *auth, long response, |
546 | 0 | kstring_t *header, kstring_t *url) { |
547 | 0 | s3_auth_data *ad = (s3_auth_data *)auth; |
548 | 0 | char *new_region; |
549 | 0 | char *end; |
550 | 0 | int ret = -1; |
551 | | |
552 | | // get the new region from the reply header |
553 | 0 | if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) { |
554 | |
|
555 | 0 | new_region += strlen("x-amz-bucket-region: "); |
556 | 0 | end = new_region; |
557 | |
|
558 | 0 | while (isalnum_c(*end) || ispunct_c(*end)) end++; |
559 | |
|
560 | 0 | *end = 0; |
561 | |
|
562 | 0 | if (strstr(ad->host.s, "amazonaws.com")) { |
563 | 0 | ad->region.l = 0; |
564 | 0 | kputs(new_region, &ad->region); |
565 | |
|
566 | 0 | ad->host.l = 0; |
567 | |
|
568 | 0 | if (ad->url_style == s3_path) { |
569 | | // Path style https://s3.{region-code}.amazonaws.com/{bucket-name}/{key-name} |
570 | 0 | ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region); |
571 | 0 | } else { |
572 | | // Virtual https://{bucket-name}.s3.{region-code}.amazonaws.com/{key-name} |
573 | | // Extract the {bucket-name} from {ad->host} to include in subdomain |
574 | 0 | kstring_t url_prefix = KS_INITIALIZE; |
575 | 0 | kputsn(ad->host.s, strcspn(ad->host.s, "."), &url_prefix); |
576 | |
|
577 | 0 | ksprintf(&ad->host, "%s.s3.%s.amazonaws.com", url_prefix.s, new_region); |
578 | 0 | free(url_prefix.s); |
579 | 0 | } |
580 | 0 | if (ad->region.l && ad->host.l) { |
581 | 0 | int e = 0; |
582 | 0 | url->l = 0; |
583 | 0 | e |= kputs("https://", url) < 0; |
584 | 0 | e |= kputs(ad->host.s, url) < 0; |
585 | 0 | e |= kputsn(ad->bucket, strlen(ad->bucket), url) < 0; |
586 | |
|
587 | 0 | if (!e) |
588 | 0 | ret = 0; |
589 | 0 | } |
590 | 0 | if (ad->user_query_string.l) { |
591 | 0 | kputc('?', url); |
592 | 0 | kputsn(ad->user_query_string.s, ad->user_query_string.l, url); |
593 | 0 | } |
594 | 0 | } |
595 | 0 | } |
596 | |
|
597 | 0 | return ret; |
598 | 0 | } |
599 | | |
600 | | static s3_auth_data * setup_auth_data(const char *s3url, const char *mode, |
601 | | int sigver, kstring_t *url) |
602 | 459 | { |
603 | 459 | s3_auth_data *ad = calloc(1, sizeof(*ad)); |
604 | 459 | const char *bucket, *path; |
605 | 459 | char *escaped = NULL; |
606 | 459 | size_t url_path_pos; |
607 | 459 | ptrdiff_t bucket_len; |
608 | 459 | int is_https = 1, dns_compliant; |
609 | 459 | char *query_start; |
610 | | |
611 | 459 | if (!ad) |
612 | 0 | return NULL; |
613 | 459 | ad->mode = strchr(mode, 'r') ? 'r' : 'w'; |
614 | 459 | ad->url_style = s3_auto; |
615 | | |
616 | | // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH |
617 | | |
618 | 459 | if (s3url[2] == '+') { |
619 | 0 | bucket = strchr(s3url, ':') + 1; |
620 | 0 | if (bucket == NULL) { |
621 | 0 | free(ad); |
622 | 0 | return NULL; |
623 | 0 | } |
624 | 0 | kputsn(&s3url[3], bucket - &s3url[3], url); |
625 | 0 | is_https = strncmp(url->s, "https:", 6) == 0; |
626 | 0 | } |
627 | 459 | else { |
628 | 459 | kputs("https:", url); |
629 | 459 | bucket = &s3url[3]; |
630 | 459 | } |
631 | 1.34k | while (*bucket == '/') kputc(*bucket++, url); |
632 | | |
633 | 459 | path = bucket + strcspn(bucket, "/?#@"); |
634 | | |
635 | 459 | if (*path == '@') { |
636 | 273 | const char *colon = strpbrk(bucket, ":@"); |
637 | 273 | if (*colon != ':') { |
638 | 18 | urldecode_kput(bucket, colon - bucket, &ad->profile); |
639 | 18 | } |
640 | 255 | else { |
641 | 255 | const char *colon2 = strpbrk(&colon[1], ":@"); |
642 | 255 | urldecode_kput(bucket, colon - bucket, &ad->id); |
643 | 255 | urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret); |
644 | 255 | if (*colon2 == ':') |
645 | 150 | urldecode_kput(&colon2[1], path - &colon2[1], &ad->token); |
646 | 255 | } |
647 | | |
648 | 273 | bucket = &path[1]; |
649 | 273 | path = bucket + strcspn(bucket, "/?#"); |
650 | 273 | } |
651 | 186 | else { |
652 | | // If the URL has no ID[:SECRET]@, consider environment variables. |
653 | 186 | const char *v; |
654 | 186 | if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id); |
655 | 186 | if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret); |
656 | 186 | if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token); |
657 | 186 | if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region); |
658 | 186 | if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host); |
659 | | |
660 | 186 | if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile); |
661 | 186 | else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile); |
662 | 186 | else kputs("default", &ad->profile); |
663 | | |
664 | 186 | if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) { |
665 | 0 | if (strcasecmp(v, "virtual") == 0) { |
666 | 0 | ad->url_style = s3_virtual; |
667 | 0 | } else if (strcasecmp(v, "path") == 0) { |
668 | 0 | ad->url_style = s3_path; |
669 | 0 | } |
670 | 0 | } |
671 | 186 | } |
672 | | |
673 | 459 | if (ad->id.l == 0) { |
674 | 207 | kstring_t url_style = KS_INITIALIZE; |
675 | 207 | kstring_t expiry_time = KS_INITIALIZE; |
676 | 207 | const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); |
677 | 207 | parse_ini(v? v : "~/.aws/credentials", ad->profile.s, |
678 | 207 | "aws_access_key_id", &ad->id, |
679 | 207 | "aws_secret_access_key", &ad->secret, |
680 | 207 | "aws_session_token", &ad->token, |
681 | 207 | "region", &ad->region, |
682 | 207 | "addressing_style", &url_style, |
683 | 207 | "expiry_time", &expiry_time, |
684 | 207 | NULL); |
685 | | |
686 | 207 | if (url_style.l) { |
687 | 0 | if (strcmp(url_style.s, "virtual") == 0) { |
688 | 0 | ad->url_style = s3_virtual; |
689 | 0 | } else if (strcmp(url_style.s, "path") == 0) { |
690 | 0 | ad->url_style = s3_path; |
691 | 0 | } else { |
692 | 0 | ad->url_style = s3_auto; |
693 | 0 | } |
694 | 0 | } |
695 | 207 | if (expiry_time.l) { |
696 | | // Not a real part of the AWS configuration file, but it allows |
697 | | // support for short-term credentials like those for the IAM |
698 | | // service. The botocore library uses the key "expiry_time" |
699 | | // internally for this purpose. |
700 | | // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py |
701 | 0 | ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); |
702 | 0 | } |
703 | | |
704 | 207 | ks_free(&url_style); |
705 | 207 | ks_free(&expiry_time); |
706 | 207 | } |
707 | | |
708 | 459 | if (ad->id.l == 0) { |
709 | 207 | kstring_t url_style = KS_INITIALIZE; |
710 | 207 | const char *v = getenv("HTS_S3_S3CFG"); |
711 | 207 | parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id, |
712 | 207 | "secret_key", &ad->secret, "access_token", &ad->token, |
713 | 207 | "host_base", &ad->host, |
714 | 207 | "bucket_location", &ad->region, |
715 | 207 | "host_bucket", &url_style, |
716 | 207 | NULL); |
717 | | |
718 | 207 | if (url_style.l) { |
719 | | // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string |
720 | | // indicates use of path style adressing. |
721 | 0 | if (strstr(url_style.s, "%(bucket)s") == NULL) { |
722 | 0 | ad->url_style = s3_path; |
723 | 0 | } else { |
724 | 0 | ad->url_style = s3_auto; |
725 | 0 | } |
726 | 0 | } |
727 | | |
728 | 207 | ks_free(&url_style); |
729 | 207 | } |
730 | | |
731 | 459 | if (ad->id.l == 0) |
732 | 207 | parse_simple("~/.awssecret", &ad->id, &ad->secret); |
733 | | |
734 | | |
735 | | // if address_style is set, force the dns_compliant setting |
736 | 459 | if (ad->url_style == s3_virtual) { |
737 | 0 | dns_compliant = 1; |
738 | 459 | } else if (ad->url_style == s3_path) { |
739 | 0 | dns_compliant = 0; |
740 | 459 | } else { |
741 | 459 | dns_compliant = is_dns_compliant(bucket, path, is_https); |
742 | 459 | } |
743 | | |
744 | 459 | if (ad->host.l == 0) |
745 | 459 | kputs("s3.amazonaws.com", &ad->host); |
746 | | |
747 | 459 | if (!dns_compliant && ad->region.l > 0 |
748 | 0 | && strcmp(ad->host.s, "s3.amazonaws.com") == 0) { |
749 | | // Can avoid a redirection by including the region in the host name |
750 | | // (assuming the right one has been specified) |
751 | 0 | ad->host.l = 0; |
752 | 0 | ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s); |
753 | 0 | } |
754 | | |
755 | 459 | if (ad->region.l == 0) |
756 | 459 | kputs("us-east-1", &ad->region); |
757 | | |
758 | 459 | if (!is_escaped(path)) { |
759 | 174 | escaped = escape_path(path); |
760 | 174 | if (escaped == NULL) { |
761 | 0 | goto error; |
762 | 0 | } |
763 | 174 | } |
764 | | |
765 | 459 | bucket_len = path - bucket; |
766 | | |
767 | | // Use virtual hosted-style access if possible, otherwise path-style. |
768 | 459 | if (dns_compliant) { |
769 | 8 | size_t url_host_pos = url->l; |
770 | | // Append "bucket.host" to url |
771 | 8 | kputsn_(bucket, bucket_len, url); |
772 | 8 | kputc('.', url); |
773 | 8 | kputsn(ad->host.s, ad->host.l, url); |
774 | 8 | url_path_pos = url->l; |
775 | | |
776 | 8 | if (sigver == 4) { |
777 | | // Copy back to ad->host to use when making the signature |
778 | 8 | ad->host.l = 0; |
779 | 8 | kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host); |
780 | 8 | } |
781 | 8 | } |
782 | 451 | else { |
783 | | // Append "host/bucket" to url |
784 | 451 | kputsn(ad->host.s, ad->host.l, url); |
785 | 451 | url_path_pos = url->l; |
786 | 451 | kputc('/', url); |
787 | 451 | kputsn(bucket, bucket_len, url); |
788 | 451 | } |
789 | | |
790 | 459 | kputs(escaped == NULL ? path : escaped, url); |
791 | | |
792 | 459 | if (sigver == 4 || !dns_compliant) { |
793 | 459 | ad->bucket = malloc(url->l - url_path_pos + 1); |
794 | 459 | if (ad->bucket == NULL) { |
795 | 0 | goto error; |
796 | 0 | } |
797 | 459 | memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1); |
798 | 459 | } |
799 | 0 | else { |
800 | 0 | ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2); |
801 | 0 | if (ad->bucket == NULL) { |
802 | 0 | goto error; |
803 | 0 | } |
804 | 0 | ad->bucket[0] = '/'; |
805 | 0 | memcpy(ad->bucket + 1, bucket, bucket_len); |
806 | 0 | memcpy(ad->bucket + bucket_len + 1, |
807 | 0 | url->s + url_path_pos, url->l - url_path_pos + 1); |
808 | 0 | } |
809 | | |
810 | | // write any query strings to its own place to use later |
811 | 459 | if ((query_start = strchr(ad->bucket, '?'))) { |
812 | 132 | kputs(query_start + 1, &ad->user_query_string); |
813 | 132 | *query_start = 0; |
814 | 132 | } |
815 | | |
816 | 459 | free(escaped); |
817 | | |
818 | 459 | return ad; |
819 | | |
820 | 0 | error: |
821 | 0 | free(escaped); |
822 | 0 | free_auth_data(ad); |
823 | 0 | return NULL; |
824 | 459 | } |
825 | | |
826 | | static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp) |
827 | 0 | { |
828 | 0 | kstring_t url = { 0, 0, NULL }; |
829 | 0 | s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url); |
830 | |
|
831 | 0 | if (!ad) |
832 | 0 | return NULL; |
833 | | |
834 | 0 | hFILE *fp = hopen(url.s, mode, "va_list", argsp, |
835 | 0 | "httphdr_callback", auth_header_callback, |
836 | 0 | "httphdr_callback_data", ad, |
837 | 0 | "redirect_callback", redirect_endpoint_callback, |
838 | 0 | "redirect_callback_data", ad, |
839 | 0 | NULL); |
840 | 0 | if (!fp) goto fail; |
841 | | |
842 | 0 | free(url.s); |
843 | 0 | return fp; |
844 | | |
845 | 0 | fail: |
846 | 0 | free(url.s); |
847 | 0 | free_auth_data(ad); |
848 | 0 | return NULL; |
849 | 0 | } |
850 | | |
851 | | /*************************************************************** |
852 | | |
853 | | AWS S3 sig version 4 writing code |
854 | | |
855 | | ****************************************************************/ |
856 | | |
857 | 456 | static void hash_string(char *in, size_t length, char *out, size_t out_len) { |
858 | 456 | unsigned char hashed[SHA256_DIGEST_BUFSIZE]; |
859 | 456 | int i, j; |
860 | | |
861 | 456 | s3_sha256((const unsigned char *)in, length, hashed); |
862 | | |
863 | 15.0k | for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) { |
864 | 14.5k | snprintf(out + j, out_len - j, "%02x", hashed[i]); |
865 | 14.5k | } |
866 | 456 | } |
867 | | |
868 | 1.36k | static void ksinit(kstring_t *s) { |
869 | 1.36k | s->l = 0; |
870 | 1.36k | s->m = 0; |
871 | 1.36k | s->s = NULL; |
872 | 1.36k | } |
873 | | |
874 | | |
875 | 1.36k | static void ksfree(kstring_t *s) { |
876 | 1.36k | free(s->s); |
877 | 1.36k | ksinit(s); |
878 | 1.36k | } |
879 | | |
880 | | |
881 | 228 | static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) { |
882 | 228 | unsigned char date_key[SHA256_DIGEST_BUFSIZE]; |
883 | 228 | unsigned char date_region_key[SHA256_DIGEST_BUFSIZE]; |
884 | 228 | unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE]; |
885 | 228 | unsigned char signing_key[SHA256_DIGEST_BUFSIZE]; |
886 | 228 | unsigned char signature[SHA256_DIGEST_BUFSIZE]; |
887 | | |
888 | 228 | const unsigned char service[] = "s3"; |
889 | 228 | const unsigned char request[] = "aws4_request"; |
890 | | |
891 | 228 | kstring_t secret_access_key = KS_INITIALIZE; |
892 | 228 | unsigned int len; |
893 | 228 | unsigned int i, j; |
894 | | |
895 | 228 | ksprintf(&secret_access_key, "AWS4%s", ad->secret.s); |
896 | | |
897 | 228 | if (secret_access_key.l == 0) { |
898 | 0 | return -1; |
899 | 0 | } |
900 | | |
901 | 228 | s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len); |
902 | 228 | s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len); |
903 | 228 | s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len); |
904 | 228 | s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len); |
905 | 228 | s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len); |
906 | | |
907 | 7.52k | for (i = 0, j = 0; i < len; i++, j+= 2) { |
908 | 7.29k | snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]); |
909 | 7.29k | } |
910 | | |
911 | 228 | ksfree(&secret_access_key); |
912 | | |
913 | 228 | return 0; |
914 | 228 | } |
915 | | |
916 | | |
917 | 228 | static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) { |
918 | 228 | kstring_t signed_headers = KS_INITIALIZE; |
919 | 228 | kstring_t canonical_headers = KS_INITIALIZE; |
920 | 228 | kstring_t canonical_request = KS_INITIALIZE; |
921 | 228 | kstring_t scope = KS_INITIALIZE; |
922 | 228 | kstring_t string_to_sign = KS_INITIALIZE; |
923 | 228 | char cr_hash[HASH_LENGTH_SHA256]; |
924 | 228 | char signature_string[HASH_LENGTH_SHA256]; |
925 | 228 | int ret = -1; |
926 | | |
927 | | |
928 | 228 | if (!ad->token.l) { |
929 | 120 | kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers); |
930 | 120 | } else { |
931 | 108 | kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers); |
932 | 108 | } |
933 | | |
934 | 228 | if (signed_headers.l == 0) { |
935 | 0 | return -1; |
936 | 0 | } |
937 | | |
938 | | |
939 | 228 | if (!ad->token.l) { |
940 | 120 | ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n", |
941 | 120 | ad->host.s, content, ad->date_long); |
942 | 120 | } else { |
943 | 108 | ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n", |
944 | 108 | ad->host.s, content, ad->date_long, ad->token.s); |
945 | 108 | } |
946 | | |
947 | 228 | if (canonical_headers.l == 0) { |
948 | 0 | goto cleanup; |
949 | 0 | } |
950 | | |
951 | | // bucket == canonical_uri |
952 | 228 | ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s", |
953 | 228 | http_request, ad->bucket, ad->canonical_query_string.s, |
954 | 228 | canonical_headers.s, signed_headers.s, content); |
955 | | |
956 | 228 | if (canonical_request.l == 0) { |
957 | 0 | goto cleanup; |
958 | 0 | } |
959 | | |
960 | 228 | hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash)); |
961 | | |
962 | 228 | ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s); |
963 | | |
964 | 228 | if (scope.l == 0) { |
965 | 0 | goto cleanup; |
966 | 0 | } |
967 | | |
968 | 228 | ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash); |
969 | | |
970 | 228 | if (string_to_sign.l == 0) { |
971 | 0 | goto cleanup; |
972 | 0 | } |
973 | | |
974 | 228 | if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) { |
975 | 0 | goto cleanup; |
976 | 0 | } |
977 | | |
978 | 228 | ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s", |
979 | 228 | ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string); |
980 | | |
981 | 228 | if (auth->l == 0) { |
982 | 0 | goto cleanup; |
983 | 0 | } |
984 | | |
985 | 228 | ret = 0; |
986 | | |
987 | 228 | cleanup: |
988 | 228 | ksfree(&signed_headers); |
989 | 228 | ksfree(&canonical_headers); |
990 | 228 | ksfree(&canonical_request); |
991 | 228 | ksfree(&scope); |
992 | 228 | ksfree(&string_to_sign); |
993 | | |
994 | 228 | return ret; |
995 | 228 | } |
996 | | |
997 | | |
998 | 459 | static int update_time(s3_auth_data *ad, time_t now) { |
999 | 459 | int ret = -1; |
1000 | 459 | #ifdef HAVE_GMTIME_R |
1001 | 459 | struct tm tm_buffer; |
1002 | 459 | struct tm *tm = gmtime_r(&now, &tm_buffer); |
1003 | | #else |
1004 | | struct tm *tm = gmtime(&now); |
1005 | | #endif |
1006 | | |
1007 | 459 | if (now - ad->auth_time > AUTH_LIFETIME) { |
1008 | | // update timestamp |
1009 | 459 | ad->auth_time = now; |
1010 | | |
1011 | 459 | if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) { |
1012 | 0 | return -1; |
1013 | 0 | } |
1014 | | |
1015 | 459 | if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) { |
1016 | 0 | return -1;; |
1017 | 0 | } |
1018 | | |
1019 | 459 | ad->date_html.l = 0; |
1020 | 459 | ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long); |
1021 | 459 | } |
1022 | | |
1023 | 459 | if (ad->date_html.l) ret = 0; |
1024 | | |
1025 | 459 | return ret; |
1026 | 459 | } |
1027 | | |
1028 | | |
1029 | 502k | static int query_cmp(const void *p1, const void *p2) { |
1030 | 502k | char **q1 = (char **)p1; |
1031 | 502k | char **q2 = (char **)p2; |
1032 | | |
1033 | 502k | return strcmp(*q1, *q2); |
1034 | 502k | } |
1035 | | |
1036 | | |
1037 | | /* Query strings must be in alphabetical order for authorisation */ |
1038 | | |
1039 | 122 | static int order_query_string(kstring_t *qs) { |
1040 | 122 | int *query_offset = NULL; |
1041 | 122 | int num_queries, i; |
1042 | 122 | char **queries = NULL; |
1043 | 122 | kstring_t ordered = KS_INITIALIZE; |
1044 | 122 | char *escaped = NULL; |
1045 | 122 | int ret = -1; |
1046 | | |
1047 | 122 | if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) { |
1048 | 0 | return -1; |
1049 | 0 | } |
1050 | | |
1051 | 122 | if ((queries = malloc(num_queries * sizeof(char*))) == NULL) |
1052 | 0 | goto err; |
1053 | | |
1054 | 46.2k | for (i = 0; i < num_queries; i++) { |
1055 | 46.1k | queries[i] = qs->s + query_offset[i]; |
1056 | 46.1k | } |
1057 | | |
1058 | 122 | qsort(queries, num_queries, sizeof(char *), query_cmp); |
1059 | | |
1060 | 46.2k | for (i = 0; i < num_queries; i++) { |
1061 | 46.1k | if (i) { |
1062 | 46.0k | kputs("&", &ordered); |
1063 | 46.0k | } |
1064 | | |
1065 | 46.1k | kputs(queries[i], &ordered); |
1066 | 46.1k | } |
1067 | | |
1068 | 122 | if ((escaped = escape_query(ordered.s)) == NULL) |
1069 | 0 | goto err; |
1070 | | |
1071 | 122 | qs->l = 0; |
1072 | 122 | kputs(escaped, qs); |
1073 | | |
1074 | 122 | ret = 0; |
1075 | 122 | err: |
1076 | 122 | free(ordered.s); |
1077 | 122 | free(queries); |
1078 | 122 | free(query_offset); |
1079 | 122 | free(escaped); |
1080 | | |
1081 | 122 | return ret; |
1082 | 122 | } |
1083 | | |
1084 | | |
1085 | | static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs, |
1086 | | kstring_t *hash, kstring_t *auth_str, kstring_t *date, |
1087 | 0 | kstring_t *token, int uqs) { |
1088 | 0 | s3_auth_data *ad = (s3_auth_data *)auth; |
1089 | 0 | char content_hash[HASH_LENGTH_SHA256]; |
1090 | 0 | time_t now; |
1091 | |
|
1092 | 0 | if (request == NULL) { |
1093 | | // signal to free auth data |
1094 | 0 | free_auth_data(ad); |
1095 | 0 | return 0; |
1096 | 0 | } |
1097 | | |
1098 | 0 | now = time(NULL); |
1099 | |
|
1100 | 0 | if (update_time(ad, now)) { |
1101 | 0 | return -1; |
1102 | 0 | } |
1103 | 0 | if (ad->creds_expiry_time > 0 |
1104 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
1105 | 0 | refresh_auth_data(ad); |
1106 | 0 | } |
1107 | |
|
1108 | 0 | if (content) { |
1109 | 0 | hash_string(content->s, content->l, content_hash, sizeof(content_hash)); |
1110 | 0 | } else { |
1111 | | // empty hash |
1112 | 0 | hash_string("", 0, content_hash, sizeof(content_hash)); |
1113 | 0 | } |
1114 | |
|
1115 | 0 | ad->canonical_query_string.l = 0; |
1116 | 0 | kputs(cqs, &ad->canonical_query_string); |
1117 | |
|
1118 | 0 | if (ad->canonical_query_string.l == 0) { |
1119 | 0 | return -1; |
1120 | 0 | } |
1121 | | |
1122 | | /* add a user provided query string, normally only useful on upload initiation */ |
1123 | 0 | if (uqs) { |
1124 | 0 | kputs("&", &ad->canonical_query_string); |
1125 | 0 | kputs(ad->user_query_string.s, &ad->canonical_query_string); |
1126 | |
|
1127 | 0 | if (order_query_string(&ad->canonical_query_string)) { |
1128 | 0 | return -1; |
1129 | 0 | } |
1130 | 0 | } |
1131 | | |
1132 | 0 | if (make_authorisation(ad, request, content_hash, auth_str)) { |
1133 | 0 | return -1; |
1134 | 0 | } |
1135 | | |
1136 | 0 | kputs(ad->date_html.s, date); |
1137 | 0 | kputsn(content_hash, HASH_LENGTH_SHA256, hash); |
1138 | |
|
1139 | 0 | if (date->l == 0 || hash->l == 0) { |
1140 | 0 | return -1; |
1141 | 0 | } |
1142 | | |
1143 | 0 | if (ad->token.l) { |
1144 | 0 | ksprintf(token, "x-amz-security-token: %s", ad->token.s); |
1145 | 0 | } |
1146 | |
|
1147 | 0 | return 0; |
1148 | 0 | } |
1149 | | |
1150 | | |
1151 | 464 | static int v4_auth_header_callback(void *ctx, char ***hdrs) { |
1152 | 464 | s3_auth_data *ad = (s3_auth_data *) ctx; |
1153 | 464 | char content_hash[HASH_LENGTH_SHA256]; |
1154 | 464 | kstring_t content = KS_INITIALIZE; |
1155 | 464 | kstring_t authorisation = KS_INITIALIZE; |
1156 | 464 | kstring_t token_hdr = KS_INITIALIZE; |
1157 | 464 | char *date_html = NULL; |
1158 | 464 | time_t now; |
1159 | 464 | int idx; |
1160 | | |
1161 | 464 | if (!hdrs) { // Closing connection |
1162 | 5 | free_auth_data(ad); |
1163 | 5 | return 0; |
1164 | 5 | } |
1165 | | |
1166 | 459 | now = time(NULL); |
1167 | | |
1168 | 459 | if (update_time(ad, now)) { |
1169 | 0 | return -1; |
1170 | 0 | } |
1171 | | |
1172 | 459 | if (ad->creds_expiry_time > 0 |
1173 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
1174 | 0 | refresh_auth_data(ad); |
1175 | 0 | } |
1176 | | |
1177 | 459 | if (!ad->id.l || !ad->secret.l) { |
1178 | 231 | return copy_auth_headers(ad, hdrs); |
1179 | 231 | } |
1180 | | |
1181 | 228 | hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash |
1182 | | |
1183 | 228 | ad->canonical_query_string.l = 0; |
1184 | | |
1185 | 228 | if (ad->user_query_string.l > 0) { |
1186 | 122 | kputs(ad->user_query_string.s, &ad->canonical_query_string); |
1187 | | |
1188 | 122 | if (order_query_string(&ad->canonical_query_string)) { |
1189 | 0 | return -1; |
1190 | 0 | } |
1191 | 122 | } else { |
1192 | 106 | kputs("", &ad->canonical_query_string); |
1193 | 106 | } |
1194 | | |
1195 | 228 | if (make_authorisation(ad, "GET", content_hash, &authorisation)) { |
1196 | 0 | return -1; |
1197 | 0 | } |
1198 | | |
1199 | 228 | ksprintf(&content, "x-amz-content-sha256: %s", content_hash); |
1200 | 228 | date_html = strdup(ad->date_html.s); |
1201 | | |
1202 | 228 | if (ad->token.l > 0) { |
1203 | 108 | kputs("X-Amz-Security-Token: ", &token_hdr); |
1204 | 108 | kputs(ad->token.s, &token_hdr); |
1205 | 108 | } |
1206 | | |
1207 | 228 | if (content.l == 0 || date_html == NULL) { |
1208 | 0 | ksfree(&authorisation); |
1209 | 0 | ksfree(&content); |
1210 | 0 | ksfree(&token_hdr); |
1211 | 0 | free(date_html); |
1212 | 0 | return -1; |
1213 | 0 | } |
1214 | | |
1215 | 228 | *hdrs = &ad->headers[0]; |
1216 | 228 | idx = 0; |
1217 | 228 | ad->headers[idx++] = ks_release(&authorisation); |
1218 | 228 | ad->headers[idx++] = date_html; |
1219 | 228 | ad->headers[idx++] = ks_release(&content); |
1220 | 228 | if (token_hdr.s) |
1221 | 108 | ad->headers[idx++] = ks_release(&token_hdr); |
1222 | 228 | ad->headers[idx++] = NULL; |
1223 | | |
1224 | 228 | return 0; |
1225 | 228 | } |
1226 | | |
1227 | 5 | static int handle_400_response(hFILE *fp, s3_auth_data *ad) { |
1228 | | // v4 signatures in virtual hosted mode return 400 Bad Request if the |
1229 | | // wrong region is used to make the signature. The response is an xml |
1230 | | // document which includes the name of the correct region. This can |
1231 | | // be extracted and used to generate a corrected signature. |
1232 | | // As the xml is fairly simple, go with something "good enough" instead |
1233 | | // of trying to parse it properly. |
1234 | | |
1235 | 5 | char buffer[1024], *region, *reg_end; |
1236 | 5 | ssize_t bytes; |
1237 | | |
1238 | 5 | bytes = hread(fp, buffer, sizeof(buffer) - 1); |
1239 | 5 | if (bytes < 0) { |
1240 | 0 | return -1; |
1241 | 0 | } |
1242 | 5 | buffer[bytes] = '\0'; |
1243 | 5 | region = strstr(buffer, "<Region>"); |
1244 | 5 | if (region == NULL) { |
1245 | 5 | return -1; |
1246 | 5 | } |
1247 | 0 | region += 8; |
1248 | 0 | while (isspace((unsigned char) *region)) ++region; |
1249 | 0 | reg_end = strchr(region, '<'); |
1250 | 0 | if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) { |
1251 | 0 | return -1; |
1252 | 0 | } |
1253 | 0 | while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end; |
1254 | 0 | ad->region.l = 0; |
1255 | 0 | kputsn(region, reg_end - region, &ad->region); |
1256 | 0 | if (ad->region.l == 0) { |
1257 | 0 | return -1; |
1258 | 0 | } |
1259 | | |
1260 | 0 | return 0; |
1261 | 0 | } |
1262 | | |
1263 | 0 | static int set_region(void *adv, kstring_t *region) { |
1264 | 0 | s3_auth_data *ad = (s3_auth_data *) adv; |
1265 | |
|
1266 | 0 | ad->region.l = 0; |
1267 | 0 | return kputsn(region->s, region->l, &ad->region) < 0; |
1268 | 0 | } |
1269 | | |
1270 | | static int http_status_errno(int status) |
1271 | 0 | { |
1272 | 0 | if (status >= 500) |
1273 | 0 | switch (status) { |
1274 | 0 | case 501: return ENOSYS; |
1275 | 0 | case 503: return EBUSY; |
1276 | 0 | case 504: return ETIMEDOUT; |
1277 | 0 | default: return EIO; |
1278 | 0 | } |
1279 | 0 | else if (status >= 400) |
1280 | 0 | switch (status) { |
1281 | 0 | case 401: return EPERM; |
1282 | 0 | case 403: return EACCES; |
1283 | 0 | case 404: return ENOENT; |
1284 | 0 | case 405: return EROFS; |
1285 | 0 | case 407: return EPERM; |
1286 | 0 | case 408: return ETIMEDOUT; |
1287 | 0 | case 410: return ENOENT; |
1288 | 0 | default: return EINVAL; |
1289 | 0 | } |
1290 | 0 | else return 0; |
1291 | 0 | } |
1292 | | |
1293 | 459 | static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) { |
1294 | 459 | kstring_t url = { 0, 0, NULL }; |
1295 | | |
1296 | 459 | s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url); |
1297 | 459 | hFILE *fp = NULL; |
1298 | | |
1299 | 459 | if (ad == NULL) { |
1300 | 0 | return NULL; |
1301 | 0 | } |
1302 | | |
1303 | 459 | if (ad->mode == 'r') { |
1304 | 459 | long http_response = 0; |
1305 | | |
1306 | 459 | fp = hopen(url.s, mode, "va_list", argsp, |
1307 | 459 | "httphdr_callback", v4_auth_header_callback, |
1308 | 459 | "httphdr_callback_data", ad, |
1309 | 459 | "redirect_callback", redirect_endpoint_callback, |
1310 | 459 | "redirect_callback_data", ad, |
1311 | 459 | "http_response_ptr", &http_response, |
1312 | 459 | "fail_on_error", 0, |
1313 | 459 | NULL); |
1314 | | |
1315 | 459 | if (fp == NULL) goto error; |
1316 | | |
1317 | 5 | if (http_response == 307) { |
1318 | | // Follow additional redirect. |
1319 | 0 | ad->refcount = 1; |
1320 | 0 | hclose_abruptly(fp); |
1321 | |
|
1322 | 0 | url.l = 0; |
1323 | 0 | ksprintf(&url, "https://%s%s", ad->host.s, ad->bucket); |
1324 | |
|
1325 | 0 | fp = hopen(url.s, mode, "va_list", argsp, |
1326 | 0 | "httphdr_callback", v4_auth_header_callback, |
1327 | 0 | "httphdr_callback_data", ad, |
1328 | 0 | "redirect_callback", redirect_endpoint_callback, |
1329 | 0 | "redirect_callback_data", ad, |
1330 | 0 | "http_response_ptr", &http_response, |
1331 | 0 | "fail_on_error", 0, |
1332 | 0 | NULL); |
1333 | 0 | } |
1334 | | |
1335 | 5 | if (http_response == 400) { |
1336 | 5 | ad->refcount = 1; |
1337 | 5 | if (handle_400_response(fp, ad) != 0) { |
1338 | 5 | goto error; |
1339 | 5 | } |
1340 | 0 | hclose_abruptly(fp); |
1341 | 0 | fp = hopen(url.s, mode, "va_list", argsp, |
1342 | 0 | "httphdr_callback", v4_auth_header_callback, |
1343 | 0 | "httphdr_callback_data", ad, |
1344 | 0 | "redirect_callback", redirect_endpoint_callback, |
1345 | 0 | "redirect_callback_data", ad, |
1346 | 0 | NULL); |
1347 | 0 | } else if (http_response > 400) { |
1348 | 0 | ad->refcount = 1; |
1349 | 0 | errno = http_status_errno(http_response); |
1350 | 0 | goto error; |
1351 | 0 | } |
1352 | | |
1353 | 0 | if (fp == NULL) goto error; |
1354 | 0 | } else { |
1355 | 0 | kstring_t final_url = KS_INITIALIZE; |
1356 | | |
1357 | | // add the scheme marker |
1358 | 0 | ksprintf(&final_url, "s3w+%s", url.s); |
1359 | |
|
1360 | 0 | if(final_url.l == 0) goto error; |
1361 | | |
1362 | 0 | fp = hopen(final_url.s, mode, "va_list", argsp, |
1363 | 0 | "s3_auth_callback", write_authorisation_callback, |
1364 | 0 | "s3_auth_callback_data", ad, |
1365 | 0 | "redirect_callback", redirect_endpoint_callback, |
1366 | 0 | "set_region_callback", set_region, |
1367 | 0 | NULL); |
1368 | 0 | free(final_url.s); |
1369 | |
|
1370 | 0 | if (fp == NULL) goto error; |
1371 | 0 | } |
1372 | | |
1373 | 0 | free(url.s); |
1374 | |
|
1375 | 0 | return fp; |
1376 | | |
1377 | 459 | error: |
1378 | | |
1379 | 459 | if (fp) hclose_abruptly(fp); |
1380 | 459 | free(url.s); |
1381 | 459 | free_auth_data(ad); |
1382 | | |
1383 | 459 | return NULL; |
1384 | 459 | } |
1385 | | |
1386 | | |
1387 | | static hFILE *s3_open(const char *url, const char *mode) |
1388 | 459 | { |
1389 | 459 | hFILE *fp; |
1390 | | |
1391 | 459 | kstring_t mode_colon = { 0, 0, NULL }; |
1392 | 459 | kputs(mode, &mode_colon); |
1393 | 459 | kputc(':', &mode_colon); |
1394 | | |
1395 | 459 | if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code |
1396 | 459 | fp = s3_open_v4(url, mode_colon.s, NULL); |
1397 | 459 | } else { |
1398 | 0 | fp = s3_rewrite(url, mode_colon.s, NULL); |
1399 | 0 | } |
1400 | | |
1401 | 459 | free(mode_colon.s); |
1402 | | |
1403 | 459 | return fp; |
1404 | 459 | } |
1405 | | |
1406 | | static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0) |
1407 | 0 | { |
1408 | 0 | hFILE *fp; |
1409 | | // Need to use va_copy() as we can only take the address of an actual |
1410 | | // va_list object, not that of a parameter whose type may have decayed. |
1411 | 0 | va_list args; |
1412 | 0 | va_copy(args, args0); |
1413 | |
|
1414 | 0 | if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code |
1415 | 0 | fp = s3_open_v4(url, mode_colon, &args); |
1416 | 0 | } else { |
1417 | 0 | fp = s3_rewrite(url, mode_colon, &args); |
1418 | 0 | } |
1419 | |
|
1420 | 0 | va_end(args); |
1421 | 0 | return fp; |
1422 | 0 | } |
1423 | | |
1424 | | int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) |
1425 | 1 | { |
1426 | 1 | static const struct hFILE_scheme_handler handler = |
1427 | 1 | { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen |
1428 | 1 | }; |
1429 | | |
1430 | | #ifdef ENABLE_PLUGINS |
1431 | | // Embed version string for examination via strings(1) or what(1) |
1432 | | static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT; |
1433 | | if (hts_verbose >= 9) |
1434 | | fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1); |
1435 | | #endif |
1436 | | |
1437 | 1 | self->name = "Amazon S3"; |
1438 | 1 | hfile_add_scheme_handler("s3", &handler); |
1439 | 1 | hfile_add_scheme_handler("s3+http", &handler); |
1440 | 1 | hfile_add_scheme_handler("s3+https", &handler); |
1441 | 1 | return 0; |
1442 | 1 | } |