Line | Count | Source (jump to first uncovered line) |
1 | | /* hfile_s3.c -- Amazon S3 backend for low-level file streams. |
2 | | |
3 | | Copyright (C) 2015-2017, 2019-2022 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
26 | | #include <config.h> |
27 | | |
28 | | #include <stdarg.h> |
29 | | #include <stdio.h> |
30 | | #include <stdlib.h> |
31 | | #include <string.h> |
32 | | #include <strings.h> |
33 | | #include <time.h> |
34 | | |
35 | | #include <errno.h> |
36 | | |
37 | | #include "hfile_internal.h" |
38 | | #ifdef ENABLE_PLUGINS |
39 | | #include "version.h" |
40 | | #endif |
41 | | #include "htslib/hts.h" // for hts_version() and hts_verbose |
42 | | #include "htslib/kstring.h" |
43 | | #include "hts_time_funcs.h" |
44 | | |
45 | | typedef struct s3_auth_data { |
46 | | kstring_t id; |
47 | | kstring_t token; |
48 | | kstring_t secret; |
49 | | kstring_t region; |
50 | | kstring_t canonical_query_string; |
51 | | kstring_t user_query_string; |
52 | | kstring_t host; |
53 | | kstring_t profile; |
54 | | time_t creds_expiry_time; |
55 | | char *bucket; |
56 | | kstring_t auth_hdr; |
57 | | time_t auth_time; |
58 | | char date[40]; |
59 | | char date_long[17]; |
60 | | char date_short[9]; |
61 | | kstring_t date_html; |
62 | | char mode; |
63 | | char *headers[5]; |
64 | | int refcount; |
65 | | } s3_auth_data; |
66 | | |
67 | 0 | #define AUTH_LIFETIME 60 // Regenerate auth headers if older than this |
68 | 0 | #define CREDENTIAL_LIFETIME 60 // Seconds before expiry to reread credentials |
69 | | |
70 | | #if defined HAVE_COMMONCRYPTO |
71 | | |
72 | | #include <CommonCrypto/CommonHMAC.h> |
73 | | |
74 | | #define DIGEST_BUFSIZ CC_SHA1_DIGEST_LENGTH |
75 | | #define SHA256_DIGEST_BUFSIZE CC_SHA256_DIGEST_LENGTH |
76 | | #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 |
77 | | |
78 | | static size_t |
79 | | s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) |
80 | | { |
81 | | CCHmac(kCCHmacAlgSHA1, key->s, key->l, message->s, message->l, digest); |
82 | | return CC_SHA1_DIGEST_LENGTH; |
83 | | } |
84 | | |
85 | | |
86 | | static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { |
87 | | CC_SHA256(in, length, out); |
88 | | } |
89 | | |
90 | | |
91 | | static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { |
92 | | CCHmac(kCCHmacAlgSHA256, key, key_len, d, n, md); |
93 | | *md_len = CC_SHA256_DIGEST_LENGTH; |
94 | | } |
95 | | |
96 | | |
97 | | #elif defined HAVE_HMAC |
98 | | |
99 | | #include <openssl/hmac.h> |
100 | | #include <openssl/sha.h> |
101 | | |
102 | | #define DIGEST_BUFSIZ EVP_MAX_MD_SIZE |
103 | 0 | #define SHA256_DIGEST_BUFSIZE SHA256_DIGEST_LENGTH |
104 | 0 | #define HASH_LENGTH_SHA256 (SHA256_DIGEST_BUFSIZE * 2) + 1 |
105 | | |
106 | | static size_t |
107 | | s3_sign(unsigned char *digest, kstring_t *key, kstring_t *message) |
108 | 0 | { |
109 | 0 | unsigned int len; |
110 | 0 | HMAC(EVP_sha1(), key->s, key->l, |
111 | 0 | (unsigned char *) message->s, message->l, digest, &len); |
112 | 0 | return len; |
113 | 0 | } |
114 | | |
115 | | |
116 | 0 | static void s3_sha256(const unsigned char *in, size_t length, unsigned char *out) { |
117 | 0 | SHA256(in, length, out); |
118 | 0 | } |
119 | | |
120 | | |
121 | 0 | static void s3_sign_sha256(const void *key, int key_len, const unsigned char *d, int n, unsigned char *md, unsigned int *md_len) { |
122 | 0 | HMAC(EVP_sha256(), key, key_len, d, n, md, md_len); |
123 | 0 | } |
124 | | |
125 | | #else |
126 | | #error No HMAC() routine found by configure |
127 | | #endif |
128 | | |
129 | | static void |
130 | | urldecode_kput(const char *s, int len, kstring_t *str) |
131 | 0 | { |
132 | 0 | char buf[3]; |
133 | 0 | int i = 0; |
134 | |
|
135 | 0 | while (i < len) |
136 | 0 | if (s[i] == '%' && i+2 < len) { |
137 | 0 | buf[0] = s[i+1], buf[1] = s[i+2], buf[2] = '\0'; |
138 | 0 | kputc(strtol(buf, NULL, 16), str); |
139 | 0 | i += 3; |
140 | 0 | } |
141 | 0 | else kputc(s[i++], str); |
142 | 0 | } |
143 | | |
144 | | static void base64_kput(const unsigned char *data, size_t len, kstring_t *str) |
145 | 0 | { |
146 | 0 | static const char base64[] = |
147 | 0 | "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
148 | |
|
149 | 0 | size_t i = 0; |
150 | 0 | unsigned x = 0; |
151 | 0 | int bits = 0, pad = 0; |
152 | |
|
153 | 0 | while (bits || i < len) { |
154 | 0 | if (bits < 6) { |
155 | 0 | x <<= 8, bits += 8; |
156 | 0 | if (i < len) x |= data[i++]; |
157 | 0 | else pad++; |
158 | 0 | } |
159 | |
|
160 | 0 | bits -= 6; |
161 | 0 | kputc(base64[(x >> bits) & 63], str); |
162 | 0 | } |
163 | |
|
164 | 0 | str->l -= pad; |
165 | 0 | kputsn("==", pad, str); |
166 | 0 | } |
167 | | |
168 | | static int is_dns_compliant(const char *s0, const char *slim, int is_https) |
169 | 0 | { |
170 | 0 | int has_nondigit = 0, len = 0; |
171 | 0 | const char *s; |
172 | |
|
173 | 0 | for (s = s0; s < slim; len++, s++) |
174 | 0 | if (islower_c(*s)) |
175 | 0 | has_nondigit = 1; |
176 | 0 | else if (*s == '-') { |
177 | 0 | has_nondigit = 1; |
178 | 0 | if (s == s0 || s+1 == slim) return 0; |
179 | 0 | } |
180 | 0 | else if (isdigit_c(*s)) |
181 | 0 | ; |
182 | 0 | else if (*s == '.') { |
183 | 0 | if (is_https) return 0; |
184 | 0 | if (s == s0 || ! isalnum_c(s[-1])) return 0; |
185 | 0 | if (s+1 == slim || ! isalnum_c(s[1])) return 0; |
186 | 0 | } |
187 | 0 | else return 0; |
188 | | |
189 | 0 | return has_nondigit && len >= 3 && len <= 63; |
190 | 0 | } |
191 | | |
192 | | static FILE *expand_tilde_open(const char *fname, const char *mode) |
193 | 0 | { |
194 | 0 | FILE *fp; |
195 | |
|
196 | 0 | if (strncmp(fname, "~/", 2) == 0) { |
197 | 0 | kstring_t full_fname = { 0, 0, NULL }; |
198 | 0 | const char *home = getenv("HOME"); |
199 | 0 | if (! home) return NULL; |
200 | | |
201 | 0 | kputs(home, &full_fname); |
202 | 0 | kputs(&fname[1], &full_fname); |
203 | |
|
204 | 0 | fp = fopen(full_fname.s, mode); |
205 | 0 | free(full_fname.s); |
206 | 0 | } |
207 | 0 | else |
208 | 0 | fp = fopen(fname, mode); |
209 | | |
210 | 0 | return fp; |
211 | 0 | } |
212 | | |
213 | | static void parse_ini(const char *fname, const char *section, ...) |
214 | 0 | { |
215 | 0 | kstring_t line = { 0, 0, NULL }; |
216 | 0 | int active = 1; // Start active, so global properties are accepted |
217 | 0 | char *s; |
218 | |
|
219 | 0 | FILE *fp = expand_tilde_open(fname, "r"); |
220 | 0 | if (fp == NULL) return; |
221 | | |
222 | 0 | while (line.l = 0, kgetline(&line, (kgets_func *) fgets, fp) >= 0) |
223 | 0 | if (line.s[0] == '[' && (s = strchr(line.s, ']')) != NULL) { |
224 | 0 | *s = '\0'; |
225 | 0 | active = (strcmp(&line.s[1], section) == 0); |
226 | 0 | } |
227 | 0 | else if (active && (s = strpbrk(line.s, ":=")) != NULL) { |
228 | 0 | const char *key = line.s, *value = &s[1], *akey; |
229 | 0 | va_list args; |
230 | |
|
231 | 0 | while (isspace_c(*key)) key++; |
232 | 0 | while (s > key && isspace_c(s[-1])) s--; |
233 | 0 | *s = '\0'; |
234 | |
|
235 | 0 | while (isspace_c(*value)) value++; |
236 | 0 | while (line.l > 0 && isspace_c(line.s[line.l-1])) |
237 | 0 | line.s[--line.l] = '\0'; |
238 | |
|
239 | 0 | va_start(args, section); |
240 | 0 | while ((akey = va_arg(args, const char *)) != NULL) { |
241 | 0 | kstring_t *avar = va_arg(args, kstring_t *); |
242 | 0 | if (strcmp(key, akey) == 0) { |
243 | 0 | avar->l = 0; |
244 | 0 | kputs(value, avar); |
245 | 0 | break; } |
246 | 0 | } |
247 | 0 | va_end(args); |
248 | 0 | } |
249 | |
|
250 | 0 | fclose(fp); |
251 | 0 | free(line.s); |
252 | 0 | } |
253 | | |
254 | | static void parse_simple(const char *fname, kstring_t *id, kstring_t *secret) |
255 | 0 | { |
256 | 0 | kstring_t text = { 0, 0, NULL }; |
257 | 0 | char *s; |
258 | 0 | size_t len; |
259 | |
|
260 | 0 | FILE *fp = expand_tilde_open(fname, "r"); |
261 | 0 | if (fp == NULL) return; |
262 | | |
263 | 0 | while (kgetline(&text, (kgets_func *) fgets, fp) >= 0) |
264 | 0 | kputc(' ', &text); |
265 | 0 | fclose(fp); |
266 | |
|
267 | 0 | s = text.s; |
268 | 0 | while (isspace_c(*s)) s++; |
269 | 0 | kputsn(s, len = strcspn(s, " \t"), id); |
270 | |
|
271 | 0 | s += len; |
272 | 0 | while (isspace_c(*s)) s++; |
273 | 0 | kputsn(s, strcspn(s, " \t"), secret); |
274 | |
|
275 | 0 | free(text.s); |
276 | 0 | } |
277 | | |
278 | 0 | static int copy_auth_headers(s3_auth_data *ad, char ***hdrs) { |
279 | 0 | char **hdr = &ad->headers[0]; |
280 | 0 | int idx = 0; |
281 | 0 | *hdrs = hdr; |
282 | |
|
283 | 0 | hdr[idx] = strdup(ad->date); |
284 | 0 | if (!hdr[idx]) return -1; |
285 | 0 | idx++; |
286 | |
|
287 | 0 | if (ad->token.l) { |
288 | 0 | kstring_t token_hdr = KS_INITIALIZE; |
289 | 0 | kputs("X-Amz-Security-Token: ", &token_hdr); |
290 | 0 | kputs(ad->token.s, &token_hdr); |
291 | 0 | if (token_hdr.s) { |
292 | 0 | hdr[idx++] = token_hdr.s; |
293 | 0 | } else { |
294 | 0 | goto fail; |
295 | 0 | } |
296 | 0 | } |
297 | | |
298 | 0 | if (ad->auth_hdr.l) { |
299 | 0 | hdr[idx] = strdup(ad->auth_hdr.s); |
300 | 0 | if (!hdr[idx]) goto fail; |
301 | 0 | idx++; |
302 | 0 | } |
303 | | |
304 | 0 | hdr[idx] = NULL; |
305 | 0 | return 0; |
306 | | |
307 | 0 | fail: |
308 | 0 | for (--idx; idx >= 0; --idx) |
309 | 0 | free(hdr[idx]); |
310 | 0 | return -1; |
311 | 0 | } |
312 | | |
313 | 0 | static void free_auth_data(s3_auth_data *ad) { |
314 | 0 | if (ad->refcount > 0) { |
315 | 0 | --ad->refcount; |
316 | 0 | return; |
317 | 0 | } |
318 | 0 | free(ad->profile.s); |
319 | 0 | free(ad->id.s); |
320 | 0 | free(ad->token.s); |
321 | 0 | free(ad->secret.s); |
322 | 0 | free(ad->region.s); |
323 | 0 | free(ad->canonical_query_string.s); |
324 | 0 | free(ad->user_query_string.s); |
325 | 0 | free(ad->host.s); |
326 | 0 | free(ad->bucket); |
327 | 0 | free(ad->auth_hdr.s); |
328 | 0 | free(ad->date_html.s); |
329 | 0 | free(ad); |
330 | 0 | } |
331 | | |
332 | | static time_t parse_rfc3339_date(kstring_t *datetime) |
333 | 0 | { |
334 | 0 | int offset = 0; |
335 | 0 | time_t when; |
336 | 0 | int num; |
337 | 0 | char should_be_t = '\0', timezone[10] = { '\0' }; |
338 | 0 | unsigned int year, mon, day, hour, min, sec; |
339 | |
|
340 | 0 | if (!datetime->s) |
341 | 0 | return 0; |
342 | | |
343 | | // It should be possible to do this with strptime(), but it seems |
344 | | // to not get on with our feature definitions. |
345 | 0 | num = sscanf(datetime->s, "%4u-%2u-%2u%c%2u:%2u:%2u%9s", |
346 | 0 | &year, &mon, &day, &should_be_t, &hour, &min, &sec, timezone); |
347 | 0 | if (num < 8) |
348 | 0 | return 0; |
349 | 0 | if (should_be_t != 'T' && should_be_t != 't' && should_be_t != ' ') |
350 | 0 | return 0; |
351 | 0 | struct tm parsed = { sec, min, hour, day, mon - 1, year - 1900, 0, 0, 0 }; |
352 | |
|
353 | 0 | switch (timezone[0]) { |
354 | 0 | case 'Z': |
355 | 0 | case 'z': |
356 | 0 | case '\0': |
357 | 0 | break; |
358 | 0 | case '+': |
359 | 0 | case '-': { |
360 | 0 | unsigned hr_off, min_off; |
361 | 0 | if (sscanf(timezone + 1, "%2u:%2u", &hr_off, &min_off)) { |
362 | 0 | if (hr_off < 24 && min_off <= 60) { |
363 | 0 | offset = ((hr_off * 60 + min_off) |
364 | 0 | * (timezone[0] == '+' ? -60 : 60)); |
365 | 0 | } |
366 | 0 | } |
367 | 0 | break; |
368 | 0 | } |
369 | 0 | default: |
370 | 0 | return 0; |
371 | 0 | } |
372 | | |
373 | 0 | when = hts_time_gm(&parsed); |
374 | 0 | return when >= 0 ? when + offset : 0; |
375 | 0 | } |
376 | | |
377 | 0 | static void refresh_auth_data(s3_auth_data *ad) { |
378 | | // Basically a copy of the AWS_SHARED_CREDENTIALS_FILE part of |
379 | | // setup_auth_data(), but this only reads the authorisation parts. |
380 | 0 | const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); |
381 | 0 | kstring_t expiry_time = KS_INITIALIZE; |
382 | 0 | parse_ini(v? v : "~/.aws/credentials", ad->profile.s, |
383 | 0 | "aws_access_key_id", &ad->id, |
384 | 0 | "aws_secret_access_key", &ad->secret, |
385 | 0 | "aws_session_token", &ad->token, |
386 | 0 | "expiry_time", &expiry_time); |
387 | 0 | if (expiry_time.l) { |
388 | 0 | ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); |
389 | 0 | } |
390 | 0 | ks_free(&expiry_time); |
391 | 0 | } |
392 | | |
393 | 0 | static int auth_header_callback(void *ctx, char ***hdrs) { |
394 | 0 | s3_auth_data *ad = (s3_auth_data *) ctx; |
395 | |
|
396 | 0 | time_t now = time(NULL); |
397 | 0 | #ifdef HAVE_GMTIME_R |
398 | 0 | struct tm tm_buffer; |
399 | 0 | struct tm *tm = gmtime_r(&now, &tm_buffer); |
400 | | #else |
401 | | struct tm *tm = gmtime(&now); |
402 | | #endif |
403 | 0 | kstring_t message = { 0, 0, NULL }; |
404 | 0 | unsigned char digest[DIGEST_BUFSIZ]; |
405 | 0 | size_t digest_len; |
406 | |
|
407 | 0 | if (!hdrs) { // Closing connection |
408 | 0 | free_auth_data(ad); |
409 | 0 | return 0; |
410 | 0 | } |
411 | | |
412 | 0 | if (ad->creds_expiry_time > 0 |
413 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
414 | 0 | refresh_auth_data(ad); |
415 | 0 | } else if (now - ad->auth_time < AUTH_LIFETIME) { |
416 | | // Last auth string should still be valid |
417 | 0 | *hdrs = NULL; |
418 | 0 | return 0; |
419 | 0 | } |
420 | | |
421 | 0 | strftime(ad->date, sizeof(ad->date), "Date: %a, %d %b %Y %H:%M:%S GMT", tm); |
422 | 0 | if (!ad->id.l || !ad->secret.l) { |
423 | 0 | ad->auth_time = now; |
424 | 0 | return copy_auth_headers(ad, hdrs); |
425 | 0 | } |
426 | | |
427 | 0 | if (ksprintf(&message, "%s\n\n\n%s\n%s%s%s%s", |
428 | 0 | ad->mode == 'r' ? "GET" : "PUT", ad->date + 6, |
429 | 0 | ad->token.l ? "x-amz-security-token:" : "", |
430 | 0 | ad->token.l ? ad->token.s : "", |
431 | 0 | ad->token.l ? "\n" : "", |
432 | 0 | ad->bucket) < 0) { |
433 | 0 | return -1; |
434 | 0 | } |
435 | | |
436 | 0 | digest_len = s3_sign(digest, &ad->secret, &message); |
437 | 0 | ad->auth_hdr.l = 0; |
438 | 0 | if (ksprintf(&ad->auth_hdr, "Authorization: AWS %s:", ad->id.s) < 0) |
439 | 0 | goto fail; |
440 | 0 | base64_kput(digest, digest_len, &ad->auth_hdr); |
441 | |
|
442 | 0 | free(message.s); |
443 | 0 | ad->auth_time = now; |
444 | 0 | return copy_auth_headers(ad, hdrs); |
445 | | |
446 | 0 | fail: |
447 | 0 | free(message.s); |
448 | 0 | return -1; |
449 | 0 | } |
450 | | |
451 | | |
452 | | /* like a escape path but for query strings '=' and '&' are untouched */ |
453 | 0 | static char *escape_query(const char *qs) { |
454 | 0 | size_t i, j = 0, length, alloced; |
455 | 0 | char *escaped; |
456 | |
|
457 | 0 | length = strlen(qs); |
458 | 0 | alloced = length * 3 + 1; |
459 | 0 | if ((escaped = malloc(alloced)) == NULL) { |
460 | 0 | return NULL; |
461 | 0 | } |
462 | | |
463 | 0 | for (i = 0; i < length; i++) { |
464 | 0 | int c = qs[i]; |
465 | |
|
466 | 0 | if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
467 | 0 | c == '_' || c == '-' || c == '~' || c == '.' || c == '/' || c == '=' || c == '&') { |
468 | 0 | escaped[j++] = c; |
469 | 0 | } else { |
470 | 0 | snprintf(escaped + j, alloced - j, "%%%02X", c); |
471 | 0 | j += 3; |
472 | 0 | } |
473 | 0 | } |
474 | |
|
475 | 0 | escaped[j] = '\0'; |
476 | |
|
477 | 0 | return escaped; |
478 | 0 | } |
479 | | |
480 | | |
481 | 0 | static char *escape_path(const char *path) { |
482 | 0 | size_t i, j = 0, length, alloced; |
483 | 0 | char *escaped; |
484 | |
|
485 | 0 | length = strlen(path); |
486 | 0 | alloced = length * 3 + 1; |
487 | |
|
488 | 0 | if ((escaped = malloc(alloced)) == NULL) { |
489 | 0 | return NULL; |
490 | 0 | } |
491 | | |
492 | 0 | for (i = 0; i < length; i++) { |
493 | 0 | int c = path[i]; |
494 | |
|
495 | 0 | if (c == '?') break; // don't escape ? or beyond |
496 | | |
497 | 0 | if ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || |
498 | 0 | c == '_' || c == '-' || c == '~' || c == '.' || c == '/') { |
499 | 0 | escaped[j++] = c; |
500 | 0 | } else { |
501 | 0 | snprintf(escaped + j, alloced - j, "%%%02X", c); |
502 | 0 | j += 3; |
503 | 0 | } |
504 | 0 | } |
505 | |
|
506 | 0 | if (i != length) { |
507 | | // in the case of a '?' copy the rest of the path across unchanged |
508 | 0 | strcpy(escaped + j, path + i); |
509 | 0 | } else { |
510 | 0 | escaped[j] = '\0'; |
511 | 0 | } |
512 | |
|
513 | 0 | return escaped; |
514 | 0 | } |
515 | | |
516 | | |
517 | 0 | static int is_escaped(const char *str) { |
518 | 0 | const char *c = str; |
519 | 0 | int escaped = 0; |
520 | 0 | int needs_escape = 0; |
521 | |
|
522 | 0 | while (*c != '\0') { |
523 | 0 | if (*c == '%' && c[1] != '\0' && c[2] != '\0') { |
524 | 0 | if (isxdigit_c(c[1]) && isxdigit_c(c[2])) { |
525 | 0 | escaped = 1; |
526 | 0 | c += 3; |
527 | 0 | continue; |
528 | 0 | } else { |
529 | | // only escaped if all % signs are escaped |
530 | 0 | escaped = 0; |
531 | 0 | } |
532 | 0 | } |
533 | 0 | if (!((*c >= '0' && *c <= '9') || (*c >= 'A' && *c <= 'Z') |
534 | 0 | || (*c >= 'a' && *c <= 'z') || |
535 | 0 | *c == '_' || *c == '-' || *c == '~' || *c == '.' || *c == '/')) { |
536 | 0 | needs_escape = 1; |
537 | 0 | } |
538 | 0 | c++; |
539 | 0 | } |
540 | |
|
541 | 0 | return escaped || !needs_escape; |
542 | 0 | } |
543 | | |
544 | | static int redirect_endpoint_callback(void *auth, long response, |
545 | 0 | kstring_t *header, kstring_t *url) { |
546 | 0 | s3_auth_data *ad = (s3_auth_data *)auth; |
547 | 0 | char *new_region; |
548 | 0 | char *end; |
549 | 0 | int ret = -1; |
550 | | |
551 | | // get the new region from the reply header |
552 | 0 | if ((new_region = strstr(header->s, "x-amz-bucket-region: "))) { |
553 | |
|
554 | 0 | new_region += strlen("x-amz-bucket-region: "); |
555 | 0 | end = new_region; |
556 | |
|
557 | 0 | while (isalnum_c(*end) || ispunct_c(*end)) end++; |
558 | |
|
559 | 0 | *end = 0; |
560 | |
|
561 | 0 | if (strstr(ad->host.s, "amazonaws.com")) { |
562 | 0 | ad->region.l = 0; |
563 | 0 | kputs(new_region, &ad->region); |
564 | |
|
565 | 0 | ad->host.l = 0; |
566 | 0 | ksprintf(&ad->host, "s3.%s.amazonaws.com", new_region); |
567 | |
|
568 | 0 | if (ad->region.l && ad->host.l) { |
569 | 0 | url->l = 0; |
570 | 0 | kputs(ad->host.s, url); |
571 | 0 | kputsn(ad->bucket, strlen(ad->bucket), url); |
572 | 0 | if (ad->user_query_string.l) { |
573 | 0 | kputc('?', url); |
574 | 0 | kputsn(ad->user_query_string.s, ad->user_query_string.l, url); |
575 | 0 | } |
576 | 0 | ret = 0; |
577 | 0 | } |
578 | 0 | } |
579 | 0 | } |
580 | |
|
581 | 0 | return ret; |
582 | 0 | } |
583 | | |
584 | | static s3_auth_data * setup_auth_data(const char *s3url, const char *mode, |
585 | | int sigver, kstring_t *url) |
586 | 0 | { |
587 | 0 | s3_auth_data *ad = calloc(1, sizeof(*ad)); |
588 | 0 | const char *bucket, *path; |
589 | 0 | char *escaped = NULL; |
590 | 0 | size_t url_path_pos; |
591 | 0 | ptrdiff_t bucket_len; |
592 | 0 | int is_https = 1, dns_compliant; |
593 | 0 | char *query_start; |
594 | 0 | enum {s3_auto, s3_virtual, s3_path} address_style = s3_auto; |
595 | |
|
596 | 0 | if (!ad) |
597 | 0 | return NULL; |
598 | 0 | ad->mode = strchr(mode, 'r') ? 'r' : 'w'; |
599 | | |
600 | | // Our S3 URL format is s3[+SCHEME]://[ID[:SECRET[:TOKEN]]@]BUCKET/PATH |
601 | |
|
602 | 0 | if (s3url[2] == '+') { |
603 | 0 | bucket = strchr(s3url, ':') + 1; |
604 | 0 | if (bucket == NULL) { |
605 | 0 | free(ad); |
606 | 0 | return NULL; |
607 | 0 | } |
608 | 0 | kputsn(&s3url[3], bucket - &s3url[3], url); |
609 | 0 | is_https = strncmp(url->s, "https:", 6) == 0; |
610 | 0 | } |
611 | 0 | else { |
612 | 0 | kputs("https:", url); |
613 | 0 | bucket = &s3url[3]; |
614 | 0 | } |
615 | 0 | while (*bucket == '/') kputc(*bucket++, url); |
616 | |
|
617 | 0 | path = bucket + strcspn(bucket, "/?#@"); |
618 | |
|
619 | 0 | if (*path == '@') { |
620 | 0 | const char *colon = strpbrk(bucket, ":@"); |
621 | 0 | if (*colon != ':') { |
622 | 0 | urldecode_kput(bucket, colon - bucket, &ad->profile); |
623 | 0 | } |
624 | 0 | else { |
625 | 0 | const char *colon2 = strpbrk(&colon[1], ":@"); |
626 | 0 | urldecode_kput(bucket, colon - bucket, &ad->id); |
627 | 0 | urldecode_kput(&colon[1], colon2 - &colon[1], &ad->secret); |
628 | 0 | if (*colon2 == ':') |
629 | 0 | urldecode_kput(&colon2[1], path - &colon2[1], &ad->token); |
630 | 0 | } |
631 | |
|
632 | 0 | bucket = &path[1]; |
633 | 0 | path = bucket + strcspn(bucket, "/?#"); |
634 | 0 | } |
635 | 0 | else { |
636 | | // If the URL has no ID[:SECRET]@, consider environment variables. |
637 | 0 | const char *v; |
638 | 0 | if ((v = getenv("AWS_ACCESS_KEY_ID")) != NULL) kputs(v, &ad->id); |
639 | 0 | if ((v = getenv("AWS_SECRET_ACCESS_KEY")) != NULL) kputs(v, &ad->secret); |
640 | 0 | if ((v = getenv("AWS_SESSION_TOKEN")) != NULL) kputs(v, &ad->token); |
641 | 0 | if ((v = getenv("AWS_DEFAULT_REGION")) != NULL) kputs(v, &ad->region); |
642 | 0 | if ((v = getenv("HTS_S3_HOST")) != NULL) kputs(v, &ad->host); |
643 | |
|
644 | 0 | if ((v = getenv("AWS_DEFAULT_PROFILE")) != NULL) kputs(v, &ad->profile); |
645 | 0 | else if ((v = getenv("AWS_PROFILE")) != NULL) kputs(v, &ad->profile); |
646 | 0 | else kputs("default", &ad->profile); |
647 | |
|
648 | 0 | if ((v = getenv("HTS_S3_ADDRESS_STYLE")) != NULL) { |
649 | 0 | if (strcasecmp(v, "virtual") == 0) { |
650 | 0 | address_style = s3_virtual; |
651 | 0 | } else if (strcasecmp(v, "path") == 0) { |
652 | 0 | address_style = s3_path; |
653 | 0 | } |
654 | 0 | } |
655 | 0 | } |
656 | |
|
657 | 0 | if (ad->id.l == 0) { |
658 | 0 | kstring_t url_style = KS_INITIALIZE; |
659 | 0 | kstring_t expiry_time = KS_INITIALIZE; |
660 | 0 | const char *v = getenv("AWS_SHARED_CREDENTIALS_FILE"); |
661 | 0 | parse_ini(v? v : "~/.aws/credentials", ad->profile.s, |
662 | 0 | "aws_access_key_id", &ad->id, |
663 | 0 | "aws_secret_access_key", &ad->secret, |
664 | 0 | "aws_session_token", &ad->token, |
665 | 0 | "region", &ad->region, |
666 | 0 | "addressing_style", &url_style, |
667 | 0 | "expiry_time", &expiry_time, |
668 | 0 | NULL); |
669 | |
|
670 | 0 | if (url_style.l) { |
671 | 0 | if (strcmp(url_style.s, "virtual") == 0) { |
672 | 0 | address_style = s3_virtual; |
673 | 0 | } else if (strcmp(url_style.s, "path") == 0) { |
674 | 0 | address_style = s3_path; |
675 | 0 | } else { |
676 | 0 | address_style = s3_auto; |
677 | 0 | } |
678 | 0 | } |
679 | 0 | if (expiry_time.l) { |
680 | | // Not a real part of the AWS configuration file, but it allows |
681 | | // support for short-term credentials like those for the IAM |
682 | | // service. The botocore library uses the key "expiry_time" |
683 | | // internally for this purpose. |
684 | | // See https://github.com/boto/botocore/blob/develop/botocore/credentials.py |
685 | 0 | ad->creds_expiry_time = parse_rfc3339_date(&expiry_time); |
686 | 0 | } |
687 | |
|
688 | 0 | ks_free(&url_style); |
689 | 0 | ks_free(&expiry_time); |
690 | 0 | } |
691 | |
|
692 | 0 | if (ad->id.l == 0) { |
693 | 0 | kstring_t url_style = KS_INITIALIZE; |
694 | 0 | const char *v = getenv("HTS_S3_S3CFG"); |
695 | 0 | parse_ini(v? v : "~/.s3cfg", ad->profile.s, "access_key", &ad->id, |
696 | 0 | "secret_key", &ad->secret, "access_token", &ad->token, |
697 | 0 | "host_base", &ad->host, |
698 | 0 | "bucket_location", &ad->region, |
699 | 0 | "host_bucket", &url_style, |
700 | 0 | NULL); |
701 | |
|
702 | 0 | if (url_style.l) { |
703 | | // Conforming to s3cmd's GitHub PR#416, host_bucket without the "%(bucket)s" string |
704 | | // indicates use of path style adressing. |
705 | 0 | if (strstr(url_style.s, "%(bucket)s") == NULL) { |
706 | 0 | address_style = s3_path; |
707 | 0 | } else { |
708 | 0 | address_style = s3_auto; |
709 | 0 | } |
710 | 0 | } |
711 | |
|
712 | 0 | ks_free(&url_style); |
713 | 0 | } |
714 | |
|
715 | 0 | if (ad->id.l == 0) |
716 | 0 | parse_simple("~/.awssecret", &ad->id, &ad->secret); |
717 | | |
718 | | |
719 | | // if address_style is set, force the dns_compliant setting |
720 | 0 | if (address_style == s3_virtual) { |
721 | 0 | dns_compliant = 1; |
722 | 0 | } else if (address_style == s3_path) { |
723 | 0 | dns_compliant = 0; |
724 | 0 | } else { |
725 | 0 | dns_compliant = is_dns_compliant(bucket, path, is_https); |
726 | 0 | } |
727 | |
|
728 | 0 | if (ad->host.l == 0) |
729 | 0 | kputs("s3.amazonaws.com", &ad->host); |
730 | |
|
731 | 0 | if (!dns_compliant && ad->region.l > 0 |
732 | 0 | && strcmp(ad->host.s, "s3.amazonaws.com") == 0) { |
733 | | // Can avoid a redirection by including the region in the host name |
734 | | // (assuming the right one has been specified) |
735 | 0 | ad->host.l = 0; |
736 | 0 | ksprintf(&ad->host, "s3.%s.amazonaws.com", ad->region.s); |
737 | 0 | } |
738 | |
|
739 | 0 | if (ad->region.l == 0) |
740 | 0 | kputs("us-east-1", &ad->region); |
741 | |
|
742 | 0 | if (!is_escaped(path)) { |
743 | 0 | escaped = escape_path(path); |
744 | 0 | if (escaped == NULL) { |
745 | 0 | goto error; |
746 | 0 | } |
747 | 0 | } |
748 | | |
749 | 0 | bucket_len = path - bucket; |
750 | | |
751 | | // Use virtual hosted-style access if possible, otherwise path-style. |
752 | 0 | if (dns_compliant) { |
753 | 0 | size_t url_host_pos = url->l; |
754 | | // Append "bucket.host" to url |
755 | 0 | kputsn_(bucket, bucket_len, url); |
756 | 0 | kputc('.', url); |
757 | 0 | kputsn(ad->host.s, ad->host.l, url); |
758 | 0 | url_path_pos = url->l; |
759 | |
|
760 | 0 | if (sigver == 4) { |
761 | | // Copy back to ad->host to use when making the signature |
762 | 0 | ad->host.l = 0; |
763 | 0 | kputsn(url->s + url_host_pos, url->l - url_host_pos, &ad->host); |
764 | 0 | } |
765 | 0 | } |
766 | 0 | else { |
767 | | // Append "host/bucket" to url |
768 | 0 | kputsn(ad->host.s, ad->host.l, url); |
769 | 0 | url_path_pos = url->l; |
770 | 0 | kputc('/', url); |
771 | 0 | kputsn(bucket, bucket_len, url); |
772 | 0 | } |
773 | |
|
774 | 0 | kputs(escaped == NULL ? path : escaped, url); |
775 | |
|
776 | 0 | if (sigver == 4 || !dns_compliant) { |
777 | 0 | ad->bucket = malloc(url->l - url_path_pos + 1); |
778 | 0 | if (ad->bucket == NULL) { |
779 | 0 | goto error; |
780 | 0 | } |
781 | 0 | memcpy(ad->bucket, url->s + url_path_pos, url->l - url_path_pos + 1); |
782 | 0 | } |
783 | 0 | else { |
784 | 0 | ad->bucket = malloc(url->l - url_path_pos + bucket_len + 2); |
785 | 0 | if (ad->bucket == NULL) { |
786 | 0 | goto error; |
787 | 0 | } |
788 | 0 | ad->bucket[0] = '/'; |
789 | 0 | memcpy(ad->bucket + 1, bucket, bucket_len); |
790 | 0 | memcpy(ad->bucket + bucket_len + 1, |
791 | 0 | url->s + url_path_pos, url->l - url_path_pos + 1); |
792 | 0 | } |
793 | | |
794 | | // write any query strings to its own place to use later |
795 | 0 | if ((query_start = strchr(ad->bucket, '?'))) { |
796 | 0 | kputs(query_start + 1, &ad->user_query_string); |
797 | 0 | *query_start = 0; |
798 | 0 | } |
799 | |
|
800 | 0 | free(escaped); |
801 | |
|
802 | 0 | return ad; |
803 | | |
804 | 0 | error: |
805 | 0 | free(escaped); |
806 | 0 | free_auth_data(ad); |
807 | 0 | return NULL; |
808 | 0 | } |
809 | | |
810 | | static hFILE * s3_rewrite(const char *s3url, const char *mode, va_list *argsp) |
811 | 0 | { |
812 | 0 | kstring_t url = { 0, 0, NULL }; |
813 | 0 | s3_auth_data *ad = setup_auth_data(s3url, mode, 2, &url); |
814 | |
|
815 | 0 | if (!ad) |
816 | 0 | return NULL; |
817 | | |
818 | 0 | hFILE *fp = hopen(url.s, mode, "va_list", argsp, |
819 | 0 | "httphdr_callback", auth_header_callback, |
820 | 0 | "httphdr_callback_data", ad, |
821 | 0 | "redirect_callback", redirect_endpoint_callback, |
822 | 0 | "redirect_callback_data", ad, |
823 | 0 | NULL); |
824 | 0 | if (!fp) goto fail; |
825 | | |
826 | 0 | free(url.s); |
827 | 0 | return fp; |
828 | | |
829 | 0 | fail: |
830 | 0 | free(url.s); |
831 | 0 | free_auth_data(ad); |
832 | 0 | return NULL; |
833 | 0 | } |
834 | | |
835 | | /*************************************************************** |
836 | | |
837 | | AWS S3 sig version 4 writing code |
838 | | |
839 | | ****************************************************************/ |
840 | | |
841 | 0 | static void hash_string(char *in, size_t length, char *out, size_t out_len) { |
842 | 0 | unsigned char hashed[SHA256_DIGEST_BUFSIZE]; |
843 | 0 | int i, j; |
844 | |
|
845 | 0 | s3_sha256((const unsigned char *)in, length, hashed); |
846 | |
|
847 | 0 | for (i = 0, j = 0; i < SHA256_DIGEST_BUFSIZE; i++, j+= 2) { |
848 | 0 | snprintf(out + j, out_len - j, "%02x", hashed[i]); |
849 | 0 | } |
850 | 0 | } |
851 | | |
852 | 0 | static void ksinit(kstring_t *s) { |
853 | 0 | s->l = 0; |
854 | 0 | s->m = 0; |
855 | 0 | s->s = NULL; |
856 | 0 | } |
857 | | |
858 | | |
859 | 0 | static void ksfree(kstring_t *s) { |
860 | 0 | free(s->s); |
861 | 0 | ksinit(s); |
862 | 0 | } |
863 | | |
864 | | |
865 | 0 | static int make_signature(s3_auth_data *ad, kstring_t *string_to_sign, char *signature_string, size_t sig_string_len) { |
866 | 0 | unsigned char date_key[SHA256_DIGEST_BUFSIZE]; |
867 | 0 | unsigned char date_region_key[SHA256_DIGEST_BUFSIZE]; |
868 | 0 | unsigned char date_region_service_key[SHA256_DIGEST_BUFSIZE]; |
869 | 0 | unsigned char signing_key[SHA256_DIGEST_BUFSIZE]; |
870 | 0 | unsigned char signature[SHA256_DIGEST_BUFSIZE]; |
871 | |
|
872 | 0 | const unsigned char service[] = "s3"; |
873 | 0 | const unsigned char request[] = "aws4_request"; |
874 | |
|
875 | 0 | kstring_t secret_access_key = {0, 0, NULL}; |
876 | 0 | unsigned int len; |
877 | 0 | unsigned int i, j; |
878 | |
|
879 | 0 | ksprintf(&secret_access_key, "AWS4%s", ad->secret.s); |
880 | |
|
881 | 0 | if (secret_access_key.l == 0) { |
882 | 0 | return -1; |
883 | 0 | } |
884 | | |
885 | 0 | s3_sign_sha256(secret_access_key.s, secret_access_key.l, (const unsigned char *)ad->date_short, strlen(ad->date_short), date_key, &len); |
886 | 0 | s3_sign_sha256(date_key, len, (const unsigned char *)ad->region.s, ad->region.l, date_region_key, &len); |
887 | 0 | s3_sign_sha256(date_region_key, len, service, 2, date_region_service_key, &len); |
888 | 0 | s3_sign_sha256(date_region_service_key, len, request, 12, signing_key, &len); |
889 | 0 | s3_sign_sha256(signing_key, len, (const unsigned char *)string_to_sign->s, string_to_sign->l, signature, &len); |
890 | |
|
891 | 0 | for (i = 0, j = 0; i < len; i++, j+= 2) { |
892 | 0 | snprintf(signature_string + j, sig_string_len - j, "%02x", signature[i]); |
893 | 0 | } |
894 | |
|
895 | 0 | ksfree(&secret_access_key); |
896 | |
|
897 | 0 | return 0; |
898 | 0 | } |
899 | | |
900 | | |
901 | 0 | static int make_authorisation(s3_auth_data *ad, char *http_request, char *content, kstring_t *auth) { |
902 | 0 | kstring_t signed_headers = {0, 0, NULL}; |
903 | 0 | kstring_t canonical_headers = {0, 0, NULL}; |
904 | 0 | kstring_t canonical_request = {0, 0, NULL}; |
905 | 0 | kstring_t scope = {0, 0, NULL}; |
906 | 0 | kstring_t string_to_sign = {0, 0, NULL}; |
907 | 0 | char cr_hash[HASH_LENGTH_SHA256]; |
908 | 0 | char signature_string[HASH_LENGTH_SHA256]; |
909 | 0 | int ret = -1; |
910 | | |
911 | |
|
912 | 0 | if (!ad->token.l) { |
913 | 0 | kputs("host;x-amz-content-sha256;x-amz-date", &signed_headers); |
914 | 0 | } else { |
915 | 0 | kputs("host;x-amz-content-sha256;x-amz-date;x-amz-security-token", &signed_headers); |
916 | 0 | } |
917 | |
|
918 | 0 | if (signed_headers.l == 0) { |
919 | 0 | return -1; |
920 | 0 | } |
921 | | |
922 | | |
923 | 0 | if (!ad->token.l) { |
924 | 0 | ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\n", |
925 | 0 | ad->host.s, content, ad->date_long); |
926 | 0 | } else { |
927 | 0 | ksprintf(&canonical_headers, "host:%s\nx-amz-content-sha256:%s\nx-amz-date:%s\nx-amz-security-token:%s\n", |
928 | 0 | ad->host.s, content, ad->date_long, ad->token.s); |
929 | 0 | } |
930 | |
|
931 | 0 | if (canonical_headers.l == 0) { |
932 | 0 | goto cleanup; |
933 | 0 | } |
934 | | |
935 | | // bucket == canonical_uri |
936 | 0 | ksprintf(&canonical_request, "%s\n%s\n%s\n%s\n%s\n%s", |
937 | 0 | http_request, ad->bucket, ad->canonical_query_string.s, |
938 | 0 | canonical_headers.s, signed_headers.s, content); |
939 | |
|
940 | 0 | if (canonical_request.l == 0) { |
941 | 0 | goto cleanup; |
942 | 0 | } |
943 | | |
944 | 0 | hash_string(canonical_request.s, canonical_request.l, cr_hash, sizeof(cr_hash)); |
945 | |
|
946 | 0 | ksprintf(&scope, "%s/%s/s3/aws4_request", ad->date_short, ad->region.s); |
947 | |
|
948 | 0 | if (scope.l == 0) { |
949 | 0 | goto cleanup; |
950 | 0 | } |
951 | | |
952 | 0 | ksprintf(&string_to_sign, "AWS4-HMAC-SHA256\n%s\n%s\n%s", ad->date_long, scope.s, cr_hash); |
953 | |
|
954 | 0 | if (string_to_sign.l == 0) { |
955 | 0 | goto cleanup; |
956 | 0 | } |
957 | | |
958 | 0 | if (make_signature(ad, &string_to_sign, signature_string, sizeof(signature_string))) { |
959 | 0 | goto cleanup; |
960 | 0 | } |
961 | | |
962 | 0 | ksprintf(auth, "Authorization: AWS4-HMAC-SHA256 Credential=%s/%s/%s/s3/aws4_request,SignedHeaders=%s,Signature=%s", |
963 | 0 | ad->id.s, ad->date_short, ad->region.s, signed_headers.s, signature_string); |
964 | |
|
965 | 0 | if (auth->l == 0) { |
966 | 0 | goto cleanup; |
967 | 0 | } |
968 | | |
969 | 0 | ret = 0; |
970 | |
|
971 | 0 | cleanup: |
972 | 0 | ksfree(&signed_headers); |
973 | 0 | ksfree(&canonical_headers); |
974 | 0 | ksfree(&canonical_request); |
975 | 0 | ksfree(&scope); |
976 | 0 | ksfree(&string_to_sign); |
977 | |
|
978 | 0 | return ret; |
979 | 0 | } |
980 | | |
981 | | |
982 | 0 | static int update_time(s3_auth_data *ad, time_t now) { |
983 | 0 | int ret = -1; |
984 | 0 | #ifdef HAVE_GMTIME_R |
985 | 0 | struct tm tm_buffer; |
986 | 0 | struct tm *tm = gmtime_r(&now, &tm_buffer); |
987 | | #else |
988 | | struct tm *tm = gmtime(&now); |
989 | | #endif |
990 | |
|
991 | 0 | if (now - ad->auth_time > AUTH_LIFETIME) { |
992 | | // update timestamp |
993 | 0 | ad->auth_time = now; |
994 | |
|
995 | 0 | if (strftime(ad->date_long, 17, "%Y%m%dT%H%M%SZ", tm) != 16) { |
996 | 0 | return -1; |
997 | 0 | } |
998 | | |
999 | 0 | if (strftime(ad->date_short, 9, "%Y%m%d", tm) != 8) { |
1000 | 0 | return -1;; |
1001 | 0 | } |
1002 | | |
1003 | 0 | ad->date_html.l = 0; |
1004 | 0 | ksprintf(&ad->date_html, "x-amz-date: %s", ad->date_long); |
1005 | 0 | } |
1006 | | |
1007 | 0 | if (ad->date_html.l) ret = 0; |
1008 | |
|
1009 | 0 | return ret; |
1010 | 0 | } |
1011 | | |
1012 | | |
1013 | 0 | static int query_cmp(const void *p1, const void *p2) { |
1014 | 0 | char **q1 = (char **)p1; |
1015 | 0 | char **q2 = (char **)p2; |
1016 | |
|
1017 | 0 | return strcmp(*q1, *q2); |
1018 | 0 | } |
1019 | | |
1020 | | |
1021 | | /* Query strings must be in alphabetical order for authorisation */ |
1022 | | |
1023 | 0 | static int order_query_string(kstring_t *qs) { |
1024 | 0 | int *query_offset = NULL; |
1025 | 0 | int num_queries, i; |
1026 | 0 | char **queries = NULL; |
1027 | 0 | kstring_t ordered = {0, 0, NULL}; |
1028 | 0 | char *escaped = NULL; |
1029 | 0 | int ret = -1; |
1030 | |
|
1031 | 0 | if ((query_offset = ksplit(qs, '&', &num_queries)) == NULL) { |
1032 | 0 | return -1; |
1033 | 0 | } |
1034 | | |
1035 | 0 | if ((queries = malloc(num_queries * sizeof(char*))) == NULL) |
1036 | 0 | goto err; |
1037 | | |
1038 | 0 | for (i = 0; i < num_queries; i++) { |
1039 | 0 | queries[i] = qs->s + query_offset[i]; |
1040 | 0 | } |
1041 | |
|
1042 | 0 | qsort(queries, num_queries, sizeof(char *), query_cmp); |
1043 | |
|
1044 | 0 | for (i = 0; i < num_queries; i++) { |
1045 | 0 | if (i) { |
1046 | 0 | kputs("&", &ordered); |
1047 | 0 | } |
1048 | |
|
1049 | 0 | kputs(queries[i], &ordered); |
1050 | 0 | } |
1051 | |
|
1052 | 0 | if ((escaped = escape_query(ordered.s)) == NULL) |
1053 | 0 | goto err; |
1054 | | |
1055 | 0 | qs->l = 0; |
1056 | 0 | kputs(escaped, qs); |
1057 | |
|
1058 | 0 | ret = 0; |
1059 | 0 | err: |
1060 | 0 | free(ordered.s); |
1061 | 0 | free(queries); |
1062 | 0 | free(query_offset); |
1063 | 0 | free(escaped); |
1064 | |
|
1065 | 0 | return ret; |
1066 | 0 | } |
1067 | | |
1068 | | |
1069 | | static int write_authorisation_callback(void *auth, char *request, kstring_t *content, char *cqs, |
1070 | | kstring_t *hash, kstring_t *auth_str, kstring_t *date, |
1071 | 0 | kstring_t *token, int uqs) { |
1072 | 0 | s3_auth_data *ad = (s3_auth_data *)auth; |
1073 | 0 | char content_hash[HASH_LENGTH_SHA256]; |
1074 | 0 | time_t now; |
1075 | |
|
1076 | 0 | if (request == NULL) { |
1077 | | // signal to free auth data |
1078 | 0 | free_auth_data(ad); |
1079 | 0 | return 0; |
1080 | 0 | } |
1081 | | |
1082 | 0 | now = time(NULL); |
1083 | |
|
1084 | 0 | if (update_time(ad, now)) { |
1085 | 0 | return -1; |
1086 | 0 | } |
1087 | 0 | if (ad->creds_expiry_time > 0 |
1088 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
1089 | 0 | refresh_auth_data(ad); |
1090 | 0 | } |
1091 | |
|
1092 | 0 | if (content) { |
1093 | 0 | hash_string(content->s, content->l, content_hash, sizeof(content_hash)); |
1094 | 0 | } else { |
1095 | | // empty hash |
1096 | 0 | hash_string("", 0, content_hash, sizeof(content_hash)); |
1097 | 0 | } |
1098 | |
|
1099 | 0 | ad->canonical_query_string.l = 0; |
1100 | 0 | kputs(cqs, &ad->canonical_query_string); |
1101 | |
|
1102 | 0 | if (ad->canonical_query_string.l == 0) { |
1103 | 0 | return -1; |
1104 | 0 | } |
1105 | | |
1106 | | /* add a user provided query string, normally only useful on upload initiation */ |
1107 | 0 | if (uqs) { |
1108 | 0 | kputs("&", &ad->canonical_query_string); |
1109 | 0 | kputs(ad->user_query_string.s, &ad->canonical_query_string); |
1110 | |
|
1111 | 0 | if (order_query_string(&ad->canonical_query_string)) { |
1112 | 0 | return -1; |
1113 | 0 | } |
1114 | 0 | } |
1115 | | |
1116 | 0 | if (make_authorisation(ad, request, content_hash, auth_str)) { |
1117 | 0 | return -1; |
1118 | 0 | } |
1119 | | |
1120 | 0 | kputs(ad->date_html.s, date); |
1121 | 0 | kputsn(content_hash, HASH_LENGTH_SHA256, hash); |
1122 | |
|
1123 | 0 | if (date->l == 0 || hash->l == 0) { |
1124 | 0 | return -1; |
1125 | 0 | } |
1126 | | |
1127 | 0 | if (ad->token.l) { |
1128 | 0 | ksprintf(token, "x-amz-security-token: %s", ad->token.s); |
1129 | 0 | } |
1130 | |
|
1131 | 0 | return 0; |
1132 | 0 | } |
1133 | | |
1134 | | |
1135 | 0 | static int v4_auth_header_callback(void *ctx, char ***hdrs) { |
1136 | 0 | s3_auth_data *ad = (s3_auth_data *) ctx; |
1137 | 0 | char content_hash[HASH_LENGTH_SHA256]; |
1138 | 0 | kstring_t content = KS_INITIALIZE; |
1139 | 0 | kstring_t authorisation = KS_INITIALIZE; |
1140 | 0 | kstring_t token_hdr = KS_INITIALIZE; |
1141 | 0 | char *date_html = NULL; |
1142 | 0 | time_t now; |
1143 | 0 | int idx; |
1144 | |
|
1145 | 0 | if (!hdrs) { // Closing connection |
1146 | 0 | free_auth_data(ad); |
1147 | 0 | return 0; |
1148 | 0 | } |
1149 | | |
1150 | 0 | now = time(NULL); |
1151 | |
|
1152 | 0 | if (update_time(ad, now)) { |
1153 | 0 | return -1; |
1154 | 0 | } |
1155 | | |
1156 | 0 | if (ad->creds_expiry_time > 0 |
1157 | 0 | && ad->creds_expiry_time - now < CREDENTIAL_LIFETIME) { |
1158 | 0 | refresh_auth_data(ad); |
1159 | 0 | } |
1160 | |
|
1161 | 0 | if (!ad->id.l || !ad->secret.l) { |
1162 | 0 | return copy_auth_headers(ad, hdrs); |
1163 | 0 | } |
1164 | | |
1165 | 0 | hash_string("", 0, content_hash, sizeof(content_hash)); // empty hash |
1166 | |
|
1167 | 0 | ad->canonical_query_string.l = 0; |
1168 | |
|
1169 | 0 | if (ad->user_query_string.l > 0) { |
1170 | 0 | kputs(ad->user_query_string.s, &ad->canonical_query_string); |
1171 | |
|
1172 | 0 | if (order_query_string(&ad->canonical_query_string)) { |
1173 | 0 | return -1; |
1174 | 0 | } |
1175 | 0 | } else { |
1176 | 0 | kputs("", &ad->canonical_query_string); |
1177 | 0 | } |
1178 | | |
1179 | 0 | if (make_authorisation(ad, "GET", content_hash, &authorisation)) { |
1180 | 0 | return -1; |
1181 | 0 | } |
1182 | | |
1183 | 0 | ksprintf(&content, "x-amz-content-sha256: %s", content_hash); |
1184 | 0 | date_html = strdup(ad->date_html.s); |
1185 | |
|
1186 | 0 | if (ad->token.l > 0) { |
1187 | 0 | kputs("X-Amz-Security-Token: ", &token_hdr); |
1188 | 0 | kputs(ad->token.s, &token_hdr); |
1189 | 0 | } |
1190 | |
|
1191 | 0 | if (content.l == 0 || date_html == NULL) { |
1192 | 0 | ksfree(&authorisation); |
1193 | 0 | ksfree(&content); |
1194 | 0 | ksfree(&token_hdr); |
1195 | 0 | free(date_html); |
1196 | 0 | return -1; |
1197 | 0 | } |
1198 | | |
1199 | 0 | *hdrs = &ad->headers[0]; |
1200 | 0 | idx = 0; |
1201 | 0 | ad->headers[idx++] = ks_release(&authorisation); |
1202 | 0 | ad->headers[idx++] = date_html; |
1203 | 0 | ad->headers[idx++] = ks_release(&content); |
1204 | 0 | if (token_hdr.s) |
1205 | 0 | ad->headers[idx++] = ks_release(&token_hdr); |
1206 | 0 | ad->headers[idx++] = NULL; |
1207 | |
|
1208 | 0 | return 0; |
1209 | 0 | } |
1210 | | |
1211 | 0 | static int handle_400_response(hFILE *fp, s3_auth_data *ad) { |
1212 | | // v4 signatures in virtual hosted mode return 400 Bad Request if the |
1213 | | // wrong region is used to make the signature. The response is an xml |
1214 | | // document which includes the name of the correct region. This can |
1215 | | // be extracted and used to generate a corrected signature. |
1216 | | // As the xml is fairly simple, go with something "good enough" instead |
1217 | | // of trying to parse it properly. |
1218 | |
|
1219 | 0 | char buffer[1024], *region, *reg_end; |
1220 | 0 | ssize_t bytes; |
1221 | |
|
1222 | 0 | bytes = hread(fp, buffer, sizeof(buffer) - 1); |
1223 | 0 | if (bytes < 0) { |
1224 | 0 | return -1; |
1225 | 0 | } |
1226 | 0 | buffer[bytes] = '\0'; |
1227 | 0 | region = strstr(buffer, "<Region>"); |
1228 | 0 | if (region == NULL) { |
1229 | 0 | return -1; |
1230 | 0 | } |
1231 | 0 | region += 8; |
1232 | 0 | while (isspace((unsigned char) *region)) ++region; |
1233 | 0 | reg_end = strchr(region, '<'); |
1234 | 0 | if (reg_end == NULL || strncmp(reg_end + 1, "/Region>", 8) != 0) { |
1235 | 0 | return -1; |
1236 | 0 | } |
1237 | 0 | while (reg_end > region && isspace((unsigned char) reg_end[-1])) --reg_end; |
1238 | 0 | ad->region.l = 0; |
1239 | 0 | kputsn(region, reg_end - region, &ad->region); |
1240 | 0 | if (ad->region.l == 0) { |
1241 | 0 | return -1; |
1242 | 0 | } |
1243 | | |
1244 | 0 | return 0; |
1245 | 0 | } |
1246 | | |
1247 | 0 | static int set_region(void *adv, kstring_t *region) { |
1248 | 0 | s3_auth_data *ad = (s3_auth_data *) adv; |
1249 | |
|
1250 | 0 | ad->region.l = 0; |
1251 | 0 | return kputsn(region->s, region->l, &ad->region) < 0; |
1252 | 0 | } |
1253 | | |
1254 | | static int http_status_errno(int status) |
1255 | 0 | { |
1256 | 0 | if (status >= 500) |
1257 | 0 | switch (status) { |
1258 | 0 | case 501: return ENOSYS; |
1259 | 0 | case 503: return EBUSY; |
1260 | 0 | case 504: return ETIMEDOUT; |
1261 | 0 | default: return EIO; |
1262 | 0 | } |
1263 | 0 | else if (status >= 400) |
1264 | 0 | switch (status) { |
1265 | 0 | case 401: return EPERM; |
1266 | 0 | case 403: return EACCES; |
1267 | 0 | case 404: return ENOENT; |
1268 | 0 | case 405: return EROFS; |
1269 | 0 | case 407: return EPERM; |
1270 | 0 | case 408: return ETIMEDOUT; |
1271 | 0 | case 410: return ENOENT; |
1272 | 0 | default: return EINVAL; |
1273 | 0 | } |
1274 | 0 | else return 0; |
1275 | 0 | } |
1276 | | |
1277 | 0 | static hFILE *s3_open_v4(const char *s3url, const char *mode, va_list *argsp) { |
1278 | 0 | kstring_t url = { 0, 0, NULL }; |
1279 | |
|
1280 | 0 | s3_auth_data *ad = setup_auth_data(s3url, mode, 4, &url); |
1281 | 0 | hFILE *fp = NULL; |
1282 | |
|
1283 | 0 | if (ad == NULL) { |
1284 | 0 | return NULL; |
1285 | 0 | } |
1286 | | |
1287 | 0 | if (ad->mode == 'r') { |
1288 | 0 | long http_response = 0; |
1289 | |
|
1290 | 0 | fp = hopen(url.s, mode, "va_list", argsp, |
1291 | 0 | "httphdr_callback", v4_auth_header_callback, |
1292 | 0 | "httphdr_callback_data", ad, |
1293 | 0 | "redirect_callback", redirect_endpoint_callback, |
1294 | 0 | "redirect_callback_data", ad, |
1295 | 0 | "http_response_ptr", &http_response, |
1296 | 0 | "fail_on_error", 0, |
1297 | 0 | NULL); |
1298 | |
|
1299 | 0 | if (fp == NULL) goto error; |
1300 | | |
1301 | 0 | if (http_response == 400) { |
1302 | 0 | ad->refcount = 1; |
1303 | 0 | if (handle_400_response(fp, ad) != 0) { |
1304 | 0 | goto error; |
1305 | 0 | } |
1306 | 0 | hclose_abruptly(fp); |
1307 | 0 | fp = hopen(url.s, mode, "va_list", argsp, |
1308 | 0 | "httphdr_callback", v4_auth_header_callback, |
1309 | 0 | "httphdr_callback_data", ad, |
1310 | 0 | "redirect_callback", redirect_endpoint_callback, |
1311 | 0 | "redirect_callback_data", ad, |
1312 | 0 | NULL); |
1313 | 0 | } else if (http_response > 400) { |
1314 | 0 | ad->refcount = 1; |
1315 | 0 | errno = http_status_errno(http_response); |
1316 | 0 | goto error; |
1317 | 0 | } |
1318 | | |
1319 | 0 | if (fp == NULL) goto error; |
1320 | 0 | } else { |
1321 | 0 | kstring_t final_url = {0, 0, NULL}; |
1322 | | |
1323 | | // add the scheme marker |
1324 | 0 | ksprintf(&final_url, "s3w+%s", url.s); |
1325 | |
|
1326 | 0 | if(final_url.l == 0) goto error; |
1327 | | |
1328 | 0 | fp = hopen(final_url.s, mode, "va_list", argsp, |
1329 | 0 | "s3_auth_callback", write_authorisation_callback, |
1330 | 0 | "s3_auth_callback_data", ad, |
1331 | 0 | "redirect_callback", redirect_endpoint_callback, |
1332 | 0 | "set_region_callback", set_region, |
1333 | 0 | NULL); |
1334 | 0 | free(final_url.s); |
1335 | |
|
1336 | 0 | if (fp == NULL) goto error; |
1337 | 0 | } |
1338 | | |
1339 | 0 | free(url.s); |
1340 | |
|
1341 | 0 | return fp; |
1342 | | |
1343 | 0 | error: |
1344 | |
|
1345 | 0 | if (fp) hclose_abruptly(fp); |
1346 | 0 | free(url.s); |
1347 | 0 | free_auth_data(ad); |
1348 | |
|
1349 | 0 | return NULL; |
1350 | 0 | } |
1351 | | |
1352 | | |
1353 | | static hFILE *s3_open(const char *url, const char *mode) |
1354 | 0 | { |
1355 | 0 | hFILE *fp; |
1356 | |
|
1357 | 0 | kstring_t mode_colon = { 0, 0, NULL }; |
1358 | 0 | kputs(mode, &mode_colon); |
1359 | 0 | kputc(':', &mode_colon); |
1360 | |
|
1361 | 0 | if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code |
1362 | 0 | fp = s3_open_v4(url, mode_colon.s, NULL); |
1363 | 0 | } else { |
1364 | 0 | fp = s3_rewrite(url, mode_colon.s, NULL); |
1365 | 0 | } |
1366 | |
|
1367 | 0 | free(mode_colon.s); |
1368 | |
|
1369 | 0 | return fp; |
1370 | 0 | } |
1371 | | |
1372 | | static hFILE *s3_vopen(const char *url, const char *mode_colon, va_list args0) |
1373 | 0 | { |
1374 | 0 | hFILE *fp; |
1375 | | // Need to use va_copy() as we can only take the address of an actual |
1376 | | // va_list object, not that of a parameter whose type may have decayed. |
1377 | 0 | va_list args; |
1378 | 0 | va_copy(args, args0); |
1379 | |
|
1380 | 0 | if (getenv("HTS_S3_V2") == NULL) { // Force the v2 signature code |
1381 | 0 | fp = s3_open_v4(url, mode_colon, &args); |
1382 | 0 | } else { |
1383 | 0 | fp = s3_rewrite(url, mode_colon, &args); |
1384 | 0 | } |
1385 | |
|
1386 | 0 | va_end(args); |
1387 | 0 | return fp; |
1388 | 0 | } |
1389 | | |
1390 | | int PLUGIN_GLOBAL(hfile_plugin_init,_s3)(struct hFILE_plugin *self) |
1391 | 1 | { |
1392 | 1 | static const struct hFILE_scheme_handler handler = |
1393 | 1 | { s3_open, hfile_always_remote, "Amazon S3", 2000 + 50, s3_vopen |
1394 | 1 | }; |
1395 | | |
1396 | | #ifdef ENABLE_PLUGINS |
1397 | | // Embed version string for examination via strings(1) or what(1) |
1398 | | static const char id[] = "@(#)hfile_s3 plugin (htslib)\t" HTS_VERSION_TEXT; |
1399 | | if (hts_verbose >= 9) |
1400 | | fprintf(stderr, "[M::hfile_s3.init] version %s\n", strchr(id, '\t')+1); |
1401 | | #endif |
1402 | | |
1403 | 1 | self->name = "Amazon S3"; |
1404 | 1 | hfile_add_scheme_handler("s3", &handler); |
1405 | 1 | hfile_add_scheme_handler("s3+http", &handler); |
1406 | 1 | hfile_add_scheme_handler("s3+https", &handler); |
1407 | 1 | return 0; |
1408 | 1 | } |