Coverage Report

Created: 2023-01-17 06:24

/src/htslib/hfile_s3_write.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
    hfile_s3_write.c - Code to handle multipart uploading to S3.
3
4
    Copyright (C) 2019 Genome Research Ltd.
5
6
    Author: Andrew Whitwham <aw7@sanger.ac.uk>
7
8
Permission is hereby granted, free of charge, to any person obtaining a copy
9
of this software and associated documentation files (the "Software"), to deal
10
in the Software without restriction, including without limitation the rights
11
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
copies of the Software, and to permit persons to whom the Software is
13
furnished to do so, subject to the following conditions:
14
15
The above copyright notice and this permission notice shall be included in
16
all copies or substantial portions of the Software.
17
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
DEALINGS IN THE SOFTWARE
25
26
27
S3 Multipart Upload
28
-------------------
29
30
There are several steps in the Mulitipart upload.
31
32
33
1) Initiate Upload
34
------------------
35
36
Initiate the upload and get an upload ID.  This ID is used in all other steps.
37
38
39
2) Upload Part
40
--------------
41
42
Upload a part of the data.  5Mb minimum part size (except for the last part).
43
Each part is numbered and a successful upload returns an Etag header value that
44
needs to used for the completion step.
45
46
Step repeated till all data is uploaded.
47
48
49
3) Completion
50
-------------
51
52
Complete the upload by sending all the part numbers along with their associated
53
Etag values.
54
55
56
Optional - Abort
57
----------------
58
59
If something goes wrong this instructs the server to delete all the partial
60
uploads and abandon the upload process.
61
62
63
Andrew Whitwham, January 2019
64
*/
65
66
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
67
#include <config.h>
68
69
#include <stdarg.h>
70
#include <stdlib.h>
71
#include <string.h>
72
#ifdef __MSYS__
73
#include <strings.h>
74
#endif
75
#include <errno.h>
76
#include <pthread.h>
77
78
#include "hfile_internal.h"
79
#ifdef ENABLE_PLUGINS
80
#include "version.h"
81
#endif
82
#include "htslib/hts.h"
83
#include "htslib/kstring.h"
84
#include "htslib/khash.h"
85
86
#include <curl/curl.h>
87
88
0
#define MINIMUM_S3_WRITE_SIZE 5242880
89
0
#define S3_MOVED_PERMANENTLY 301
90
0
#define S3_BAD_REQUEST 400
91
92
// Lets the part memory size grow to about 1Gb giving a 2.5Tb max file size.
93
// Max. parts allowed by AWS is 10000, so use ceil(10000.0/9.0)
94
0
#define EXPAND_ON 1112
95
96
static struct {
97
    kstring_t useragent;
98
    CURLSH *share;
99
    pthread_mutex_t share_lock;
100
} curl = { { 0, 0, NULL }, NULL, PTHREAD_MUTEX_INITIALIZER };
101
102
static void share_lock(CURL *handle, curl_lock_data data,
103
1
                       curl_lock_access access, void *userptr) {
104
1
    pthread_mutex_lock(&curl.share_lock);
105
1
}
106
107
1
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
108
1
    pthread_mutex_unlock(&curl.share_lock);
109
1
}
110
111
typedef int (*s3_auth_callback) (void *auth_data, char *, kstring_t*, char*, kstring_t*, kstring_t*, kstring_t*, kstring_t*, int);
112
113
typedef int (*set_region_callback) (void *auth_data, kstring_t *region);
114
115
typedef struct {
116
    s3_auth_callback callback;
117
    redirect_callback redirect_callback;
118
    set_region_callback set_region_callback;
119
    void *callback_data;
120
} s3_authorisation;
121
122
typedef struct {
123
    hFILE base;
124
    CURL *curl;
125
    CURLcode ret;
126
    s3_authorisation *au;
127
    kstring_t buffer;
128
    kstring_t url;
129
    kstring_t upload_id;
130
    kstring_t completion_message;
131
    int part_no;
132
    int aborted;
133
    size_t index;
134
    long verbose;
135
    int part_size;
136
    int expand;
137
} hFILE_s3_write;
138
139
140
0
static void ksinit(kstring_t *s) {
141
0
    s->l = 0;
142
0
    s->m = 0;
143
0
    s->s = NULL;
144
0
}
145
146
147
0
static void ksfree(kstring_t *s) {
148
0
    free(s->s);
149
0
    ksinit(s);
150
0
}
151
152
153
0
static size_t response_callback(void *contents, size_t size, size_t nmemb, void *userp) {
154
0
    size_t realsize = size * nmemb;
155
0
    kstring_t *resp = (kstring_t *)userp;
156
157
0
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
158
0
        return 0;
159
0
    }
160
161
0
    return realsize;
162
0
}
163
164
165
0
static int get_entry(char *in, char *start_tag, char *end_tag, kstring_t *out) {
166
0
    char *start;
167
0
    char *end;
168
169
0
    if (!in) {
170
0
        return EOF;
171
0
    }
172
173
0
    start = strstr(in, start_tag);
174
0
    if (!start) return EOF;
175
176
0
    start += strlen(start_tag);
177
0
    end = strstr(start, end_tag);
178
179
0
    if (!end) return EOF;
180
181
0
    return kputsn(start, end - start, out);
182
0
}
183
184
185
0
static void cleanup_local(hFILE_s3_write *fp) {
186
0
    ksfree(&fp->buffer);
187
0
    ksfree(&fp->url);
188
0
    ksfree(&fp->upload_id);
189
0
    ksfree(&fp->completion_message);
190
0
    curl_easy_cleanup(fp->curl);
191
0
    free(fp->au);
192
193
0
}
194
195
196
0
static void cleanup(hFILE_s3_write *fp) {
197
    // free up authorisation data
198
0
    fp->au->callback(fp->au->callback_data,  NULL, NULL, NULL, NULL, NULL, NULL, NULL, 0);
199
0
    cleanup_local(fp);
200
0
}
201
202
203
0
static struct curl_slist *set_html_headers(hFILE_s3_write *fp, kstring_t *auth, kstring_t *date, kstring_t *content, kstring_t *token) {
204
0
    struct curl_slist *headers = NULL;
205
206
0
    headers = curl_slist_append(headers, "Content-Type:"); // get rid of this
207
0
    headers = curl_slist_append(headers, "Expect:");       // and this
208
0
    headers = curl_slist_append(headers, auth->s);
209
0
    headers = curl_slist_append(headers, date->s);
210
0
    headers = curl_slist_append(headers, content->s);
211
212
0
    if (token->l) {
213
0
        headers = curl_slist_append(headers, token->s);
214
0
    }
215
216
0
    curl_easy_setopt(fp->curl, CURLOPT_HTTPHEADER, headers);
217
218
0
    return headers;
219
0
}
220
221
222
/*
223
    The partially uploaded file will hang around unless the delete command is sent.
224
*/
225
0
static int abort_upload(hFILE_s3_write *fp) {
226
0
    kstring_t content_hash = {0, 0, NULL};
227
0
    kstring_t authorisation = {0, 0, NULL};
228
0
    kstring_t url = {0, 0, NULL};
229
0
    kstring_t content = {0, 0, NULL};
230
0
    kstring_t canonical_query_string = {0, 0, NULL};
231
0
    kstring_t date = {0, 0, NULL};
232
0
    kstring_t token = {0, 0, NULL};
233
0
    int ret = -1;
234
0
    struct curl_slist *headers = NULL;
235
0
    char http_request[] = "DELETE";
236
237
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
238
0
        goto out;
239
0
    }
240
241
0
    if (fp->au->callback(fp->au->callback_data,  http_request, NULL,
242
0
                         canonical_query_string.s, &content_hash,
243
0
                         &authorisation, &date, &token, 0) != 0) {
244
0
        goto out;
245
0
    }
246
247
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
248
0
        goto out;
249
0
    }
250
251
0
    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
252
0
        goto out;
253
0
    }
254
255
0
    curl_easy_reset(fp->curl);
256
0
    curl_easy_setopt(fp->curl, CURLOPT_CUSTOMREQUEST, http_request);
257
0
    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
258
0
    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
259
260
0
    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
261
262
0
    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
263
0
    fp->ret = curl_easy_perform(fp->curl);
264
265
0
    if (fp->ret == CURLE_OK) {
266
0
        ret = 0;
267
0
    }
268
269
0
 out:
270
0
    ksfree(&authorisation);
271
0
    ksfree(&content);
272
0
    ksfree(&content_hash);
273
0
    ksfree(&url);
274
0
    ksfree(&date);
275
0
    ksfree(&canonical_query_string);
276
0
    ksfree(&token);
277
0
    curl_slist_free_all(headers);
278
279
0
    fp->aborted = 1;
280
0
    cleanup(fp);
281
282
0
    return ret;
283
0
}
284
285
286
0
static int complete_upload(hFILE_s3_write *fp, kstring_t *resp) {
287
0
    kstring_t content_hash = {0, 0, NULL};
288
0
    kstring_t authorisation = {0, 0, NULL};
289
0
    kstring_t url = {0, 0, NULL};
290
0
    kstring_t content = {0, 0, NULL};
291
0
    kstring_t canonical_query_string = {0, 0, NULL};
292
0
    kstring_t date = {0, 0, NULL};
293
0
    kstring_t token = {0, 0, NULL};
294
0
    int ret = -1;
295
0
    struct curl_slist *headers = NULL;
296
0
    char http_request[] = "POST";
297
298
0
    if (ksprintf(&canonical_query_string, "uploadId=%s", fp->upload_id.s) < 0) {
299
0
        return -1;
300
0
    }
301
302
    // finish off the completion reply
303
0
    if (kputs("</CompleteMultipartUpload>\n", &fp->completion_message) < 0) {
304
0
        goto out;
305
0
    }
306
307
0
    if (fp->au->callback(fp->au->callback_data,  http_request,
308
0
                         &fp->completion_message, canonical_query_string.s,
309
0
                         &content_hash, &authorisation, &date, &token, 0) != 0) {
310
0
        goto out;
311
0
    }
312
313
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
314
0
        goto out;
315
0
    }
316
317
0
    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
318
0
        goto out;
319
0
    }
320
321
0
    curl_easy_reset(fp->curl);
322
0
    curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
323
0
    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, fp->completion_message.s);
324
0
    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDSIZE, (long) fp->completion_message.l);
325
0
    curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
326
0
    curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
327
0
    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
328
0
    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
329
330
0
    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
331
332
0
    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
333
0
    fp->ret = curl_easy_perform(fp->curl);
334
335
0
    if (fp->ret == CURLE_OK) {
336
0
        ret = 0;
337
0
    }
338
339
0
 out:
340
0
    ksfree(&authorisation);
341
0
    ksfree(&content);
342
0
    ksfree(&content_hash);
343
0
    ksfree(&url);
344
0
    ksfree(&date);
345
0
    ksfree(&token);
346
0
    ksfree(&canonical_query_string);
347
0
    curl_slist_free_all(headers);
348
349
0
    return ret;
350
0
}
351
352
353
0
static size_t upload_callback(void *ptr, size_t size, size_t nmemb, void *stream) {
354
0
    size_t realsize = size * nmemb;
355
0
    hFILE_s3_write *fp = (hFILE_s3_write *)stream;
356
0
    size_t read_length;
357
358
0
    if (realsize > (fp->buffer.l - fp->index)) {
359
0
        read_length = fp->buffer.l - fp->index;
360
0
    } else {
361
0
        read_length = realsize;
362
0
    }
363
364
0
    memcpy(ptr, fp->buffer.s + fp->index, read_length);
365
0
    fp->index += read_length;
366
367
0
    return read_length;
368
0
}
369
370
371
0
static int upload_part(hFILE_s3_write *fp, kstring_t *resp) {
372
0
    kstring_t content_hash = {0, 0, NULL};
373
0
    kstring_t authorisation = {0, 0, NULL};
374
0
    kstring_t url = {0, 0, NULL};
375
0
    kstring_t content = {0, 0, NULL};
376
0
    kstring_t canonical_query_string = {0, 0, NULL};
377
0
    kstring_t date = {0, 0, NULL};
378
0
    kstring_t token = {0, 0, NULL};
379
0
    int ret = -1;
380
0
    struct curl_slist *headers = NULL;
381
0
    char http_request[] = "PUT";
382
383
0
    if (ksprintf(&canonical_query_string, "partNumber=%d&uploadId=%s", fp->part_no, fp->upload_id.s) < 0) {
384
0
        return -1;
385
0
    }
386
387
0
    if (fp->au->callback(fp->au->callback_data, http_request, &fp->buffer,
388
0
                         canonical_query_string.s, &content_hash,
389
0
                         &authorisation, &date, &token, 0) != 0) {
390
0
        goto out;
391
0
    }
392
393
0
    if (ksprintf(&url, "%s?%s", fp->url.s, canonical_query_string.s) < 0) {
394
0
        goto out;
395
0
    }
396
397
0
    fp->index = 0;
398
0
    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
399
0
        goto out;
400
0
    }
401
402
0
    curl_easy_reset(fp->curl);
403
404
0
    curl_easy_setopt(fp->curl, CURLOPT_UPLOAD, 1L);
405
0
    curl_easy_setopt(fp->curl, CURLOPT_READFUNCTION, upload_callback);
406
0
    curl_easy_setopt(fp->curl, CURLOPT_READDATA, fp);
407
0
    curl_easy_setopt(fp->curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)fp->buffer.l);
408
0
    curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
409
0
    curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)resp);
410
0
    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
411
0
    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
412
413
0
    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
414
415
0
    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
416
0
    fp->ret = curl_easy_perform(fp->curl);
417
418
0
    if (fp->ret == CURLE_OK) {
419
0
        ret = 0;
420
0
    }
421
422
0
 out:
423
0
    ksfree(&authorisation);
424
0
    ksfree(&content);
425
0
    ksfree(&content_hash);
426
0
    ksfree(&url);
427
0
    ksfree(&date);
428
0
    ksfree(&token);
429
0
    ksfree(&canonical_query_string);
430
0
    curl_slist_free_all(headers);
431
432
0
    return ret;
433
0
}
434
435
436
0
static ssize_t s3_write(hFILE *fpv, const void *bufferv, size_t nbytes) {
437
0
    hFILE_s3_write *fp = (hFILE_s3_write *)fpv;
438
0
    const char *buffer  = (const char *)bufferv;
439
440
0
    if (kputsn(buffer, nbytes, &fp->buffer) == EOF) {
441
0
        return -1;
442
0
    }
443
444
0
    if (fp->buffer.l > fp->part_size) {
445
        // time to write out our data
446
0
        kstring_t response = {0, 0, NULL};
447
0
        int ret;
448
449
0
        ret = upload_part(fp, &response);
450
451
0
        if (!ret) {
452
0
            long response_code;
453
0
            kstring_t etag = {0, 0, NULL};
454
455
0
            curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
456
457
0
            if (response_code > 200) {
458
0
                ret = -1;
459
0
            } else {
460
0
                if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
461
0
                    ret = -1;
462
0
                } else {
463
0
                    ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
464
0
                        fp->part_no, etag.s);
465
466
0
                    ksfree(&etag);
467
0
                }
468
0
            }
469
0
        }
470
471
0
        ksfree(&response);
472
473
0
        if (ret) {
474
0
            abort_upload(fp);
475
0
            return -1;
476
0
        }
477
478
0
        fp->part_no++;
479
0
        fp->buffer.l = 0;
480
481
0
        if (fp->expand && (fp->part_no % EXPAND_ON == 0)) {
482
0
            fp->part_size *= 2;
483
0
        }
484
0
    }
485
486
0
    return nbytes;
487
0
}
488
489
490
0
static int s3_close(hFILE *fpv) {
491
0
    hFILE_s3_write *fp = (hFILE_s3_write *)fpv;
492
0
    kstring_t response = {0, 0, NULL};
493
0
    int ret = 0;
494
495
0
    if (!fp->aborted) {
496
497
0
        if (fp->buffer.l) {
498
            // write the last part
499
500
0
            ret = upload_part(fp, &response);
501
502
0
            if (!ret) {
503
0
                long response_code;
504
0
                kstring_t etag = {0, 0, NULL};
505
506
0
                curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
507
508
0
                if (response_code > 200) {
509
0
                    ret = -1;
510
0
                } else {
511
0
                    if (get_entry(response.s, "ETag: \"", "\"", &etag) == EOF) {
512
0
                        ret = -1;
513
0
                    } else {
514
0
                        ksprintf(&fp->completion_message, "\t<Part>\n\t\t<PartNumber>%d</PartNumber>\n\t\t<ETag>%s</ETag>\n\t</Part>\n",
515
0
                            fp->part_no, etag.s);
516
517
0
                        ksfree(&etag);
518
0
                    }
519
0
                }
520
0
            }
521
522
0
            ksfree(&response);
523
524
0
            if (ret) {
525
0
                abort_upload(fp);
526
0
                return -1;
527
0
            }
528
529
0
            fp->part_no++;
530
0
        }
531
532
0
        if (fp->part_no > 1) {
533
0
            ret = complete_upload(fp, &response);
534
535
0
            if (!ret) {
536
0
                if (strstr(response.s, "CompleteMultipartUploadResult") == NULL) {
537
0
                    ret = -1;
538
0
                }
539
0
            }
540
0
        } else {
541
0
            ret = -1;
542
0
        }
543
544
0
        if (ret) {
545
0
            abort_upload(fp);
546
0
        } else {
547
0
            cleanup(fp);
548
0
        }
549
0
    }
550
551
0
    ksfree(&response);
552
553
0
    return ret;
554
0
}
555
556
557
0
static int redirect_endpoint(hFILE_s3_write *fp, kstring_t *head) {
558
0
    int ret = -1;
559
560
0
    if (fp->au->redirect_callback) {
561
0
        ret = fp->au->redirect_callback(fp->au->callback_data, 301, head, &fp->url);
562
0
    }
563
564
0
    return ret;
565
0
}
566
567
0
static int handle_bad_request(hFILE_s3_write *fp, kstring_t *resp) {
568
0
    kstring_t region = {0, 0, NULL};
569
0
    int ret = -1;
570
571
0
    if (fp->au->set_region_callback) {
572
0
        if (get_entry(resp->s, "<Region>", "</Region>", &region) == EOF) {
573
0
            return -1;
574
0
        }
575
576
0
        ret = fp->au->set_region_callback(fp->au->callback_data, &region);
577
578
0
        ksfree(&region);
579
0
    }
580
581
0
    return ret;
582
0
}
583
584
0
static int initialise_upload(hFILE_s3_write *fp, kstring_t *head, kstring_t *resp, int user_query) {
585
0
    kstring_t content_hash = {0, 0, NULL};
586
0
    kstring_t authorisation = {0, 0, NULL};
587
0
    kstring_t url = {0, 0, NULL};
588
0
    kstring_t content = {0, 0, NULL};
589
0
    kstring_t date = {0, 0, NULL};
590
0
    kstring_t token = {0, 0, NULL};
591
0
    int ret = -1;
592
0
    struct curl_slist *headers = NULL;
593
0
    char http_request[] = "POST";
594
0
    char delimiter = '?';
595
596
0
    if (user_query) {
597
0
        delimiter = '&';
598
0
    }
599
600
0
    if (fp->au->callback(fp->au->callback_data,  http_request, NULL, "uploads=",
601
0
                         &content_hash, &authorisation, &date, &token, user_query) != 0) {
602
0
        goto out;
603
0
    }
604
605
0
    if (ksprintf(&url, "%s%cuploads", fp->url.s, delimiter) < 0) {
606
0
        goto out;
607
0
    }
608
609
0
    if (ksprintf(&content, "x-amz-content-sha256: %s", content_hash.s) < 0) {
610
0
        goto out;
611
0
    }
612
613
0
    curl_easy_setopt(fp->curl, CURLOPT_URL, url.s);
614
0
    curl_easy_setopt(fp->curl, CURLOPT_POST, 1L);
615
0
    curl_easy_setopt(fp->curl, CURLOPT_POSTFIELDS, "");  // send no data
616
0
    curl_easy_setopt(fp->curl, CURLOPT_WRITEFUNCTION, response_callback);
617
0
    curl_easy_setopt(fp->curl, CURLOPT_WRITEDATA, (void *)resp);
618
0
    curl_easy_setopt(fp->curl, CURLOPT_HEADERFUNCTION, response_callback);
619
0
    curl_easy_setopt(fp->curl, CURLOPT_HEADERDATA, (void *)head);
620
0
    curl_easy_setopt(fp->curl, CURLOPT_USERAGENT, curl.useragent.s);
621
622
0
    curl_easy_setopt(fp->curl, CURLOPT_VERBOSE, fp->verbose);
623
624
0
    headers = set_html_headers(fp, &authorisation, &date, &content, &token);
625
0
    fp->ret = curl_easy_perform(fp->curl);
626
627
0
    if (fp->ret == CURLE_OK) {
628
0
        ret = 0;
629
0
    }
630
631
0
 out:
632
0
    ksfree(&authorisation);
633
0
    ksfree(&content);
634
0
    ksfree(&content_hash);
635
0
    ksfree(&url);
636
0
    ksfree(&date);
637
0
    ksfree(&token);
638
0
    curl_slist_free_all(headers);
639
640
0
    return ret;
641
0
}
642
643
644
0
static int get_upload_id(hFILE_s3_write *fp, kstring_t *resp) {
645
0
    int ret = 0;
646
647
0
    ksinit(&fp->upload_id);
648
649
0
    if (get_entry(resp->s, "<UploadId>", "</UploadId>", &fp->upload_id) == EOF) {
650
0
        ret = -1;
651
0
    }
652
653
0
    return ret;
654
0
}
655
656
657
static const struct hFILE_backend s3_write_backend = {
658
    NULL, s3_write, NULL, NULL, s3_close
659
};
660
661
662
0
static hFILE *s3_write_open(const char *url, s3_authorisation *auth) {
663
0
    hFILE_s3_write *fp;
664
0
    kstring_t response = {0, 0, NULL};
665
0
    kstring_t header   = {0, 0, NULL};
666
0
    int ret, has_user_query = 0;
667
0
    char *query_start;
668
0
    const char *env;
669
670
671
0
    if (!auth || !auth->callback || !auth->callback_data) {
672
0
        return NULL;
673
0
    }
674
675
0
    fp = (hFILE_s3_write *)hfile_init(sizeof(hFILE_s3_write), "w", 0);
676
677
0
    if (fp == NULL) {
678
0
        return NULL;
679
0
    }
680
681
0
    if ((fp->curl = curl_easy_init()) == NULL) {
682
0
        errno = ENOMEM;
683
0
        goto error;
684
0
    }
685
686
0
    if ((fp->au = calloc(1, sizeof(s3_authorisation))) == NULL) {
687
0
        goto error;
688
0
    }
689
690
0
    memcpy(fp->au, auth, sizeof(s3_authorisation));
691
692
0
    ksinit(&fp->buffer);
693
0
    ksinit(&fp->url);
694
0
    ksinit(&fp->completion_message);
695
0
    fp->aborted = 0;
696
697
0
    fp->part_size = MINIMUM_S3_WRITE_SIZE;
698
0
    fp->expand = 1;
699
700
0
    if ((env = getenv("HTS_S3_PART_SIZE")) != NULL) {
701
0
        int part_size = atoi(env) * 1024 * 1024;
702
703
0
        if (part_size > fp->part_size)
704
0
            fp->part_size = part_size;
705
706
0
        fp->expand = 0;
707
0
    }
708
709
0
    if (hts_verbose >= 8) {
710
0
        fp->verbose = 1L;
711
0
    } else {
712
0
        fp->verbose = 0L;
713
0
    }
714
715
0
    kputs(url + 4, &fp->url);
716
717
0
    if ((query_start = strchr(fp->url.s, '?'))) {
718
0
        has_user_query = 1;;
719
0
    }
720
721
0
    ret = initialise_upload(fp, &header, &response, has_user_query);
722
723
0
    if (ret == 0) {
724
0
        long response_code;
725
726
0
        curl_easy_getinfo(fp->curl, CURLINFO_RESPONSE_CODE, &response_code);
727
728
0
        if (response_code == S3_MOVED_PERMANENTLY) {
729
0
            if (redirect_endpoint(fp, &header) == 0) {
730
0
                ksfree(&response);
731
0
                ksfree(&header);
732
733
0
                ret = initialise_upload(fp, &header, &response, has_user_query);
734
0
            }
735
0
        } else if (response_code == S3_BAD_REQUEST) {
736
0
            if (handle_bad_request(fp, &response) == 0) {
737
0
                ksfree(&response);
738
0
                ksfree(&header);
739
740
0
                ret = initialise_upload(fp, &header, &response, has_user_query);
741
0
            }
742
0
        }
743
744
0
        ksfree(&header); // no longer needed
745
0
    }
746
747
0
    if (ret) goto error;
748
749
0
    if (get_upload_id(fp, &response)) goto error;
750
751
    // start the completion message (a formatted list of parts)
752
0
    ksinit(&fp->completion_message);
753
754
0
    if (kputs("<CompleteMultipartUpload>\n", &fp->completion_message) == EOF) {
755
0
        goto error;
756
0
    }
757
758
0
    fp->part_no = 1;
759
760
    // user query string no longer a useful part of the URL
761
0
    if (query_start)
762
0
         *query_start = '\0';
763
764
0
    fp->base.backend = &s3_write_backend;
765
0
    ksfree(&response);
766
767
0
    return &fp->base;
768
769
0
error:
770
0
    ksfree(&response);
771
0
    cleanup_local(fp);
772
0
    hfile_destroy((hFILE *)fp);
773
0
    return NULL;
774
0
}
775
776
777
0
static hFILE *hopen_s3_write(const char *url, const char *mode) {
778
0
    if (hts_verbose >= 1) {
779
0
        fprintf(stderr, "[E::%s] s3w:// URLs should not be used directly; use s3:// instead.\n", __func__);
780
0
    }
781
0
    return NULL;
782
0
}
783
784
785
0
static int parse_va_list(s3_authorisation *auth, va_list args) {
786
0
    const char *argtype;
787
788
0
    while  ((argtype = va_arg(args, const char *)) != NULL) {
789
0
        if (strcmp(argtype, "s3_auth_callback") == 0) {
790
0
            auth->callback = va_arg(args, s3_auth_callback);
791
0
        } else if (strcmp(argtype, "s3_auth_callback_data") == 0) {
792
0
            auth->callback_data = va_arg(args, void *);
793
0
        } else if (strcmp(argtype, "redirect_callback") == 0) {
794
0
            auth->redirect_callback = va_arg(args, redirect_callback);
795
0
        } else if (strcmp(argtype, "set_region_callback") == 0) {
796
0
            auth->set_region_callback = va_arg(args, set_region_callback);
797
0
        } else if (strcmp(argtype, "va_list") == 0) {
798
0
            va_list *args2 = va_arg(args, va_list *);
799
800
0
            if (args2) {
801
0
                if (parse_va_list(auth, *args2) < 0) return -1;
802
0
            }
803
0
        } else {
804
0
            errno = EINVAL;
805
0
            return -1;
806
0
        }
807
0
    }
808
809
0
    return 0;
810
0
}
811
812
813
0
static hFILE *vhopen_s3_write(const char *url, const char *mode, va_list args) {
814
0
    hFILE *fp = NULL;
815
0
    s3_authorisation auth = {NULL, NULL, NULL};
816
817
0
    if (parse_va_list(&auth, args) == 0) {
818
0
        fp =  s3_write_open(url, &auth);
819
0
    }
820
821
0
    return fp;
822
0
}
823
824
825
1
static void s3_write_exit() {
826
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
827
1
        curl.share = NULL;
828
829
1
    free(curl.useragent.s);
830
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
831
1
    curl_global_cleanup();
832
1
}
833
834
835
1
int PLUGIN_GLOBAL(hfile_plugin_init,_s3_write)(struct hFILE_plugin *self) {
836
837
1
    static const struct hFILE_scheme_handler handler =
838
1
        { hopen_s3_write, hfile_always_remote, "S3 Multipart Upload",
839
1
          2000 + 50, vhopen_s3_write
840
1
        };
841
842
#ifdef ENABLE_PLUGINS
843
    // Embed version string for examination via strings(1) or what(1)
844
    static const char id[] =
845
        "@(#)hfile_s3_write plugin (htslib)\t" HTS_VERSION_TEXT;
846
    const char *version = strchr(id, '\t') + 1;
847
848
    if (hts_verbose >= 9)
849
        fprintf(stderr, "[M::hfile_s3_write.init] version %s\n",
850
                version);
851
#else
852
1
    const char *version = hts_version();
853
1
#endif
854
855
1
    const curl_version_info_data *info;
856
1
    CURLcode err;
857
1
    CURLSHcode errsh;
858
859
1
    err = curl_global_init(CURL_GLOBAL_ALL);
860
861
1
    if (err != CURLE_OK) {
862
        // look at putting in an errno here
863
0
        return -1;
864
0
    }
865
866
1
    curl.share = curl_share_init();
867
868
1
    if (curl.share == NULL) {
869
0
        curl_global_cleanup();
870
0
        errno = EIO;
871
0
        return -1;
872
0
    }
873
874
1
    errsh  = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
875
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
876
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
877
878
1
    if (errsh != 0) {
879
0
        curl_share_cleanup(curl.share);
880
0
        curl_global_cleanup();
881
0
        errno = EIO;
882
0
        return -1;
883
0
    }
884
885
1
    info = curl_version_info(CURLVERSION_NOW);
886
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
887
888
1
    self->name = "S3 Multipart Upload";
889
1
    self->destroy = s3_write_exit;
890
891
1
    hfile_add_scheme_handler("s3w",       &handler);
892
1
    hfile_add_scheme_handler("s3w+http",  &handler);
893
1
    hfile_add_scheme_handler("s3w+https", &handler);
894
895
1
    return 0;
896
1
}