Coverage Report

Created: 2023-01-17 06:24

/src/htslib/hfile_libcurl.c
Line
Count
Source (jump to first uncovered line)
1
/*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <strings.h>
32
#include <errno.h>
33
#include <pthread.h>
34
#ifndef _WIN32
35
# include <sys/select.h>
36
#endif
37
#include <assert.h>
38
39
#include "hfile_internal.h"
40
#ifdef ENABLE_PLUGINS
41
#include "version.h"
42
#endif
43
#include "htslib/hts.h"  // for hts_version() and hts_verbose
44
#include "htslib/kstring.h"
45
#include "htslib/khash.h"
46
47
#include <curl/curl.h>
48
49
// Number of seconds to take off auth_token expiry, to allow for clock skew
50
// and slow servers
51
0
#define AUTH_REFRESH_EARLY_SECS 60
52
53
// Minimum number of bytes to skip when seeking forward.  Seeks less than
54
// this will just read the data and throw it away.  The optimal value
55
// depends on how long it takes to make a new connection compared
56
// to how fast the data arrives.
57
0
#define MIN_SEEK_FORWARD 1000000
58
59
typedef struct {
60
    char *path;
61
    char *token;
62
    time_t expiry;
63
    int failed;
64
    pthread_mutex_t lock;
65
} auth_token;
66
67
// For the authorization header cache
68
KHASH_MAP_INIT_STR(auth_map, auth_token *)
69
70
// Curl-compatible header linked list
71
typedef struct {
72
    struct curl_slist *list;
73
    unsigned int num;
74
    unsigned int size;
75
} hdrlist;
76
77
typedef struct {
78
    hdrlist fixed;                   // List of headers supplied at hopen()
79
    hdrlist extra;                   // List of headers from callback
80
    hts_httphdr_callback callback;   // Callback to get more headers
81
    void *callback_data;             // Data to pass to httphdr callback
82
    auth_token *auth;                // Authentication token
83
    int auth_hdr_num;                // Location of auth_token in hdrlist extra
84
                                     // If -1, Authorization header is in fixed
85
                                     //    -2, it came from the callback
86
                                     //    -3, "auth_token_enabled", "false"
87
                                     //        passed to hopen()
88
    redirect_callback redirect;      // Callback to handle 3xx redirects
89
    void *redirect_data;             // Data to pass to redirect_callback
90
    long *http_response_ptr;         // Location to store http response code.
91
    int fail_on_error;               // Open fails on >400 response code
92
                                     //    (default true)
93
} http_headers;
94
95
typedef struct {
96
    hFILE base;
97
    CURL *easy;
98
    CURLM *multi;
99
    off_t file_size;
100
    struct {
101
        union { char *rd; const char *wr; } ptr;
102
        size_t len;
103
    } buffer;
104
    CURLcode final_result;  // easy result code for finished transfers
105
    // Flags for communicating with libcurl callbacks:
106
    unsigned paused : 1;    // callback tells us that it has paused transfer
107
    unsigned closing : 1;   // informs callback that hclose() has been invoked
108
    unsigned finished : 1;  // wait_perform() tells us transfer is complete
109
    unsigned perform_again : 1;
110
    unsigned is_read : 1;   // Opened in read mode
111
    unsigned can_seek : 1;  // Can (attempt to) seek on this handle
112
    unsigned is_recursive:1; // Opened by hfile_libcurl itself
113
    unsigned tried_seek : 1; // At least one seek has been attempted
114
    int nrunning;
115
    http_headers headers;
116
117
    off_t delayed_seek;      // Location to seek to before reading
118
    off_t last_offset;       // Location we're seeking from
119
    char *preserved;         // Preserved buffer content on seek
120
    size_t preserved_bytes;  // Number of preserved bytes
121
    size_t preserved_size;   // Size of preserved buffer
122
} hFILE_libcurl;
123
124
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125
static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126
127
static int http_status_errno(int status)
128
0
{
129
0
    if (status >= 500)
130
0
        switch (status) {
131
0
        case 501: return ENOSYS;
132
0
        case 503: return EBUSY;
133
0
        case 504: return ETIMEDOUT;
134
0
        default:  return EIO;
135
0
        }
136
0
    else if (status >= 400)
137
0
        switch (status) {
138
0
        case 401: return EPERM;
139
0
        case 403: return EACCES;
140
0
        case 404: return ENOENT;
141
0
        case 405: return EROFS;
142
0
        case 407: return EPERM;
143
0
        case 408: return ETIMEDOUT;
144
0
        case 410: return ENOENT;
145
0
        default:  return EINVAL;
146
0
        }
147
0
    else return 0;
148
0
}
149
150
static int easy_errno(CURL *easy, CURLcode err)
151
0
{
152
0
    long lval;
153
154
0
    switch (err) {
155
0
    case CURLE_OK:
156
0
        return 0;
157
158
0
    case CURLE_UNSUPPORTED_PROTOCOL:
159
0
    case CURLE_URL_MALFORMAT:
160
0
        return EINVAL;
161
162
0
#if LIBCURL_VERSION_NUM >= 0x071505
163
0
    case CURLE_NOT_BUILT_IN:
164
0
        return ENOSYS;
165
0
#endif
166
167
0
    case CURLE_COULDNT_RESOLVE_PROXY:
168
0
    case CURLE_COULDNT_RESOLVE_HOST:
169
0
    case CURLE_FTP_CANT_GET_HOST:
170
0
        return EDESTADDRREQ; // Lookup failure
171
172
0
    case CURLE_COULDNT_CONNECT:
173
0
    case CURLE_SEND_ERROR:
174
0
    case CURLE_RECV_ERROR:
175
0
        if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176
0
            return lval;
177
0
        else
178
0
            return ECONNABORTED;
179
180
0
    case CURLE_REMOTE_ACCESS_DENIED:
181
0
    case CURLE_LOGIN_DENIED:
182
0
    case CURLE_TFTP_PERM:
183
0
        return EACCES;
184
185
0
    case CURLE_PARTIAL_FILE:
186
0
        return EPIPE;
187
188
0
    case CURLE_HTTP_RETURNED_ERROR:
189
0
        if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190
0
            return http_status_errno(lval);
191
0
        else
192
0
            return EIO;
193
194
0
    case CURLE_OUT_OF_MEMORY:
195
0
        return ENOMEM;
196
197
0
    case CURLE_OPERATION_TIMEDOUT:
198
0
        return ETIMEDOUT;
199
200
0
    case CURLE_RANGE_ERROR:
201
0
        return ESPIPE;
202
203
0
    case CURLE_SSL_CONNECT_ERROR:
204
        // TODO return SSL error buffer messages
205
0
        return ECONNABORTED;
206
207
0
    case CURLE_FILE_COULDNT_READ_FILE:
208
0
    case CURLE_TFTP_NOTFOUND:
209
0
        return ENOENT;
210
211
0
    case CURLE_TOO_MANY_REDIRECTS:
212
0
        return ELOOP;
213
214
0
    case CURLE_FILESIZE_EXCEEDED:
215
0
        return EFBIG;
216
217
0
    case CURLE_REMOTE_DISK_FULL:
218
0
        return ENOSPC;
219
220
0
    case CURLE_REMOTE_FILE_EXISTS:
221
0
        return EEXIST;
222
223
0
    default:
224
0
        hts_log_error("Libcurl reported error %d (%s)", (int) err,
225
0
                      curl_easy_strerror(err));
226
0
        return EIO;
227
0
    }
228
0
}
229
230
static int multi_errno(CURLMcode errm)
231
0
{
232
0
    switch (errm) {
233
0
    case CURLM_CALL_MULTI_PERFORM:
234
0
    case CURLM_OK:
235
0
        return 0;
236
237
0
    case CURLM_BAD_HANDLE:
238
0
    case CURLM_BAD_EASY_HANDLE:
239
0
    case CURLM_BAD_SOCKET:
240
0
        return EBADF;
241
242
0
    case CURLM_OUT_OF_MEMORY:
243
0
        return ENOMEM;
244
245
0
    default:
246
0
        hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247
0
                      curl_multi_strerror(errm));
248
0
        return EIO;
249
0
    }
250
0
}
251
252
static struct {
253
    kstring_t useragent;
254
    CURLSH *share;
255
    char *auth_path;
256
    khash_t(auth_map) *auth_map;
257
    int allow_unencrypted_auth_header;
258
    pthread_mutex_t auth_lock;
259
    pthread_mutex_t share_lock;
260
} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261
           PTHREAD_MUTEX_INITIALIZER };
262
263
static void share_lock(CURL *handle, curl_lock_data data,
264
1
                       curl_lock_access access, void *userptr) {
265
1
    pthread_mutex_lock(&curl.share_lock);
266
1
}
267
268
1
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269
1
    pthread_mutex_unlock(&curl.share_lock);
270
1
}
271
272
0
static void free_auth(auth_token *tok) {
273
0
    if (!tok) return;
274
0
    if (pthread_mutex_destroy(&tok->lock)) abort();
275
0
    free(tok->path);
276
0
    free(tok->token);
277
0
    free(tok);
278
0
}
279
280
static void libcurl_exit()
281
1
{
282
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283
1
        curl.share = NULL;
284
285
1
    free(curl.useragent.s);
286
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287
288
1
    free(curl.auth_path);
289
1
    curl.auth_path = NULL;
290
291
1
    if (curl.auth_map) {
292
0
        khiter_t i;
293
0
        for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294
0
            if (kh_exist(curl.auth_map, i)) {
295
0
                free_auth(kh_value(curl.auth_map, i));
296
0
                kh_key(curl.auth_map, i) = NULL;
297
0
                kh_value(curl.auth_map, i) = NULL;
298
0
            }
299
0
        }
300
0
        kh_destroy(auth_map, curl.auth_map);
301
0
        curl.auth_map = NULL;
302
0
    }
303
304
1
    curl_global_cleanup();
305
1
}
306
307
0
static int append_header(hdrlist *hdrs, const char *data, int dup) {
308
0
    if (hdrs->num == hdrs->size) {
309
0
        unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310
0
        struct curl_slist *new_list = realloc(hdrs->list,
311
0
                                              new_sz * sizeof(*new_list));
312
0
        if (!new_list) return -1;
313
0
        hdrs->size = new_sz;
314
0
        hdrs->list = new_list;
315
0
        for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316
0
    }
317
    // Annoyingly, libcurl doesn't declare the char * as const...
318
0
    hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319
0
    if (!hdrs->list[hdrs->num].data) return -1;
320
0
    if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321
0
    hdrs->list[hdrs->num].next = NULL;
322
0
    hdrs->num++;
323
0
    return 0;
324
0
}
325
326
0
static void free_headers(hdrlist *hdrs, int completely) {
327
0
    unsigned int i;
328
0
    for (i = 0; i < hdrs->num; i++) {
329
0
        free(hdrs->list[i].data);
330
0
        hdrs->list[i].data = NULL;
331
0
        hdrs->list[i].next = NULL;
332
0
    }
333
0
    hdrs->num = 0;
334
0
    if (completely) {
335
0
        free(hdrs->list);
336
0
        hdrs->size = 0;
337
0
        hdrs->list = NULL;
338
0
    }
339
0
}
340
341
0
static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342
0
    if (fp->headers.fixed.num > 0)
343
0
        return &fp->headers.fixed.list[0];
344
0
    if (fp->headers.extra.num > 0)
345
0
        return &fp->headers.extra.list[0];
346
0
    return 0;
347
0
}
348
349
0
static inline int is_authorization(const char *hdr) {
350
0
    return (strncasecmp("authorization:", hdr, 14) == 0);
351
0
}
352
353
0
static int add_callback_headers(hFILE_libcurl *fp) {
354
0
    char **hdrs = NULL, **hdr;
355
356
0
    if (!fp->headers.callback)
357
0
        return 0;
358
359
    // Get the headers from the callback
360
0
    if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361
0
        return -1;
362
0
    }
363
364
0
    if (!hdrs) // No change
365
0
        return 0;
366
367
    // Remove any old callback headers
368
0
    if (fp->headers.fixed.num > 0) {
369
        // Unlink lists
370
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371
0
    }
372
0
    free_headers(&fp->headers.extra, 0);
373
374
0
    if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375
0
        fp->headers.auth_hdr_num = 0; // Just removed it...
376
377
    // Convert to libcurl-suitable form
378
0
    for (hdr = hdrs; *hdr; hdr++) {
379
0
        if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380
0
            goto cleanup;
381
0
        }
382
0
        if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383
0
            fp->headers.auth_hdr_num = -2;
384
0
    }
385
0
    for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386
387
0
    if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388
        // Relink lists
389
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390
0
            = &fp->headers.extra.list[0];
391
0
    }
392
0
    return 0;
393
394
0
 cleanup:
395
0
    while (hdr && *hdr) {
396
0
        free(*hdr);
397
0
        *hdr = NULL;
398
0
    }
399
0
    return -1;
400
0
}
401
402
/*
403
 * Read an OAUTH2-style Bearer access token (see
404
 * https://tools.ietf.org/html/rfc6750#section-4).
405
 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406
 * '?' for a JSON parse error; 'm' if it runs out of memory.
407
 */
408
0
static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409
0
    hts_json_token *t = hts_json_alloc_token();
410
0
    kstring_t str = {0, 0, NULL};
411
0
    char *token = NULL, *type = NULL, *expiry = NULL;
412
0
    int ret = 'i';
413
414
0
    if (!t) goto error;
415
416
0
    if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417
0
    while (hts_json_fnext(auth_fp, t, &str) != '}') {
418
0
        char *key;
419
0
        if (hts_json_token_type(t) != 's') {
420
0
            ret = '?';
421
0
            goto error;
422
0
        }
423
0
        key = hts_json_token_str(t);
424
0
        if (!key) goto error;
425
0
        if (strcmp(key, "access_token") == 0) {
426
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427
0
            token = ks_release(&str);
428
0
        } else if (strcmp(key, "token_type") == 0) {
429
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430
0
            type = ks_release(&str);
431
0
        } else if (strcmp(key, "expires_in") == 0) {
432
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433
0
            expiry = ks_release(&str);
434
0
        } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435
0
            ret = '?';
436
0
            goto error;
437
0
        }
438
0
    }
439
440
0
    if (!token || (type && strcmp(type, "Bearer") != 0)) {
441
0
        ret = 'i';
442
0
        goto error;
443
0
    }
444
445
0
    ret = 'm';
446
0
    str.l = 0;
447
0
    if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448
0
    if (kputs(token, &str) < 0) goto error;
449
0
    free(tok->token);
450
0
    tok->token = ks_release(&str);
451
0
    if (expiry) {
452
0
        long exp = strtol(expiry, NULL, 10);
453
0
        if (exp < 0) exp = 0;
454
0
        tok->expiry = time(NULL) + exp;
455
0
    } else {
456
0
        tok->expiry = 0;
457
0
    }
458
0
    ret = 'v';
459
460
0
 error:
461
0
    free(token);
462
0
    free(type);
463
0
    free(expiry);
464
0
    free(str.s);
465
0
    hts_json_free_token(t);
466
0
    return ret;
467
0
}
468
469
0
static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470
0
    kstring_t line = {0, 0, NULL};
471
0
    kstring_t token = {0, 0, NULL};
472
0
    const char *start, *end;
473
474
0
    if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475
0
    if (kputc('\0', &line) < 0) goto error;
476
477
0
    for (start = line.s; *start && isspace_c(*start); start++) {}
478
0
    for (end = start; *end && !isspace_c(*end); end++) {}
479
480
0
    if (end > start) {
481
0
        if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482
0
        if (kputsn(start, end - start, &token) < 0) goto error;
483
0
    }
484
485
0
    free(tok->token);
486
0
    tok->token = ks_release(&token);
487
0
    tok->expiry = 0;
488
0
    free(line.s);
489
0
    return 0;
490
491
0
 error:
492
0
    free(line.s);
493
0
    free(token.s);
494
0
    return -1;
495
0
}
496
497
0
static int renew_auth_token(auth_token *tok, int *changed) {
498
0
    hFILE *auth_fp = NULL;
499
0
    char buffer[16];
500
0
    ssize_t len;
501
502
0
    *changed = 0;
503
0
    if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504
0
        return 0; // Still valid
505
506
0
    if (tok->failed)
507
0
        return -1;
508
509
0
    *changed = 1;
510
0
    auth_fp = hopen(tok->path, "rR");
511
0
    if (!auth_fp) {
512
        // Not worried about missing files; other errors are bad.
513
0
        if (errno != ENOENT)
514
0
            goto fail;
515
516
0
        tok->expiry = 0; // Prevent retry
517
0
        free(tok->token); // Just in case it was set
518
0
        return 0;
519
0
    }
520
521
0
    len = hpeek(auth_fp, buffer, sizeof(buffer));
522
0
    if (len < 0)
523
0
        goto fail;
524
525
0
    if (memchr(buffer, '{', len) != NULL) {
526
0
        if (read_auth_json(tok, auth_fp) != 'v')
527
0
            goto fail;
528
0
    } else {
529
0
        if (read_auth_plain(tok, auth_fp) < 0)
530
0
            goto fail;
531
0
    }
532
533
0
    return hclose(auth_fp) < 0 ? -1 : 0;
534
535
0
 fail:
536
0
    tok->failed = 1;
537
0
    if (auth_fp) hclose_abruptly(auth_fp);
538
0
    return -1;
539
0
}
540
541
0
static int add_auth_header(hFILE_libcurl *fp) {
542
0
    int changed = 0;
543
544
0
    if (fp->headers.auth_hdr_num < 0)
545
0
        return 0; // Have an Authorization header from open or header callback
546
547
0
    if (!fp->headers.auth)
548
0
        return 0; // Nothing to add
549
550
0
    pthread_mutex_lock(&fp->headers.auth->lock);
551
0
    if (renew_auth_token(fp->headers.auth, &changed) < 0)
552
0
        goto unlock_fail;
553
554
0
    if (!changed && fp->headers.auth_hdr_num > 0) {
555
0
        pthread_mutex_unlock(&fp->headers.auth->lock);
556
0
        return 0;
557
0
    }
558
559
0
    if (fp->headers.auth_hdr_num > 0) {
560
        // Had a previous header, so swap in the new one
561
0
        char *header = fp->headers.auth->token;
562
0
        char *header_copy = header ? strdup(header) : NULL;
563
0
        int idx = fp->headers.auth_hdr_num - 1;
564
0
        if (header && !header_copy)
565
0
            goto unlock_fail;
566
567
0
        if (header_copy) {
568
0
            free(fp->headers.extra.list[idx].data);
569
0
            fp->headers.extra.list[idx].data = header_copy;
570
0
        } else {
571
0
            unsigned int j;
572
            // More complicated case - need to get rid of the old header
573
            // and tidy up linked lists
574
0
            free(fp->headers.extra.list[idx].data);
575
0
            for (j = idx + 1; j < fp->headers.extra.num; j++) {
576
0
                fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577
0
                fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578
0
            }
579
0
            fp->headers.extra.num--;
580
0
            if (fp->headers.extra.num > 0) {
581
0
                fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582
0
            } else if (fp->headers.fixed.num > 0) {
583
0
                fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584
0
            }
585
0
            fp->headers.auth_hdr_num = 0;
586
0
        }
587
0
    } else if (fp->headers.auth->token) {
588
        // Add new header and remember where it is
589
0
        if (append_header(&fp->headers.extra,
590
0
                          fp->headers.auth->token, 1) < 0) {
591
0
            goto unlock_fail;
592
0
        }
593
0
        fp->headers.auth_hdr_num = fp->headers.extra.num;
594
0
    }
595
596
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
597
0
    return 0;
598
599
0
 unlock_fail:
600
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
601
0
    return -1;
602
0
}
603
604
0
static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605
0
    const char *host = NULL, *p, *q;
606
0
    kstring_t name = {0, 0, NULL};
607
0
    size_t host_len = 0;
608
0
    khiter_t idx;
609
0
    auth_token *tok = NULL;
610
611
    // Nothing to do if:
612
    //   curl.auth_path has not been set
613
    //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614
    //   we already have an Authorization header
615
0
    if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616
0
        return 0;
617
618
    // Insist on having a secure connection unless the user insists harder
619
0
    if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620
0
        return 0;
621
622
0
    host = strstr(url, "://");
623
0
    if (host) {
624
0
        host += 3;
625
0
        host_len = strcspn(host, "/");
626
0
    }
627
628
0
    p = curl.auth_path;
629
0
    while ((q = strstr(p, "%h")) != NULL) {
630
0
        if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631
0
        if (kputsn_(p, q - p, &name) < 0) goto error;
632
0
        if (kputsn_(host, host_len, &name) < 0) goto error;
633
0
        p = q + 2;
634
0
    }
635
0
    if (kputs(p, &name) < 0) goto error;
636
637
0
    pthread_mutex_lock(&curl.auth_lock);
638
0
    idx = kh_get(auth_map, curl.auth_map, name.s);
639
0
    if (idx < kh_end(curl.auth_map)) {
640
0
        tok = kh_value(curl.auth_map, idx);
641
0
    } else {
642
0
        tok = calloc(1, sizeof(*tok));
643
0
        if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644
0
            free(tok);
645
0
            tok = NULL;
646
0
        }
647
0
        if (tok) {
648
0
            int ret = -1;
649
0
            tok->path = ks_release(&name);
650
0
            tok->token = NULL;
651
0
            tok->expiry = 1; // Force refresh
652
0
            idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653
0
            if (ret < 0) {
654
0
                free_auth(tok);
655
0
                tok = NULL;
656
0
            }
657
0
            kh_value(curl.auth_map, idx) = tok;
658
0
        }
659
0
    }
660
0
    pthread_mutex_unlock(&curl.auth_lock);
661
662
0
    fp->headers.auth = tok;
663
0
    free(name.s);
664
665
0
    return add_auth_header(fp);
666
667
0
 error:
668
0
    free(name.s);
669
0
    return -1;
670
0
}
671
672
static void process_messages(hFILE_libcurl *fp)
673
0
{
674
0
    CURLMsg *msg;
675
0
    int remaining;
676
677
0
    while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678
0
        switch (msg->msg) {
679
0
        case CURLMSG_DONE:
680
0
            fp->finished = 1;
681
0
            fp->final_result = msg->data.result;
682
0
            break;
683
684
0
        default:
685
0
            break;
686
0
        }
687
0
    }
688
0
}
689
690
static int wait_perform(hFILE_libcurl *fp)
691
0
{
692
0
    fd_set rd, wr, ex;
693
0
    int maxfd, nrunning;
694
0
    long timeout;
695
0
    CURLMcode errm;
696
697
0
    if (!fp->perform_again) {
698
0
        FD_ZERO(&rd);
699
0
        FD_ZERO(&wr);
700
0
        FD_ZERO(&ex);
701
0
        if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702
0
            maxfd = -1, timeout = 1000;
703
0
        else {
704
0
            if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705
0
                timeout = 1000;
706
0
            else if (timeout < 0) {
707
0
                timeout = 10000;  // as recommended by curl_multi_timeout(3)
708
0
            }
709
0
        }
710
0
        if (maxfd < 0) {
711
0
            if (timeout > 100)
712
0
                timeout = 100; // as recommended by curl_multi_fdset(3)
713
#ifdef _WIN32
714
            /* Windows ignores the first argument of select, so calling select
715
             * with maxfd=-1 does not give the expected result of sleeping for
716
             * timeout milliseconds in the conditional block below.
717
             * So sleep here and skip the next block.
718
             */
719
            Sleep(timeout);
720
            timeout = 0;
721
#endif
722
0
        }
723
724
0
        if (timeout > 0) {
725
0
            struct timeval tval;
726
0
            tval.tv_sec  = (timeout / 1000);
727
0
            tval.tv_usec = (timeout % 1000) * 1000;
728
729
0
            if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730
0
        }
731
0
    }
732
733
0
    errm = curl_multi_perform(fp->multi, &nrunning);
734
0
    fp->perform_again = 0;
735
0
    if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736
0
    else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737
738
0
    if (nrunning < fp->nrunning) process_messages(fp);
739
0
    return 0;
740
0
}
741
742
743
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744
0
{
745
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746
0
    size_t n = size * nmemb;
747
748
0
    if (n > fp->buffer.len) {
749
0
        fp->paused = 1;
750
0
        return CURL_WRITEFUNC_PAUSE;
751
0
    }
752
0
    else if (n == 0) return 0;
753
754
0
    memcpy(fp->buffer.ptr.rd, ptr, n);
755
0
    fp->buffer.ptr.rd += n;
756
0
    fp->buffer.len -= n;
757
0
    return n;
758
0
}
759
760
761
static size_t header_callback(void *contents, size_t size, size_t nmemb,
762
                              void *userp)
763
0
{
764
0
    size_t realsize = size * nmemb;
765
0
    kstring_t *resp = (kstring_t *)userp;
766
767
0
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
768
0
        return 0;
769
0
    }
770
771
0
    return realsize;
772
0
}
773
774
775
static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776
0
{
777
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778
0
    char *buffer = (char *) bufferv;
779
0
    off_t to_skip = -1;
780
0
    ssize_t got = 0;
781
0
    CURLcode err;
782
783
0
    if (fp->delayed_seek >= 0) {
784
0
        assert(fp->base.offset == fp->delayed_seek);
785
786
0
        if (fp->preserved
787
0
            && fp->last_offset > fp->delayed_seek
788
0
            && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789
            // Can use buffer contents copied when seeking started, to
790
            // avoid having to re-read data discarded by hseek().
791
            // Note fp->last_offset is the offset of the *end* of the
792
            // preserved buffer.
793
0
            size_t n = fp->last_offset - fp->delayed_seek;
794
0
            char *start = fp->preserved + (fp->preserved_bytes - n);
795
0
            size_t bytes = n <= nbytes ? n : nbytes;
796
0
            memcpy(buffer, start, bytes);
797
0
            if (bytes < n) { // Part of the preserved buffer still left
798
0
                fp->delayed_seek += bytes;
799
0
            } else {
800
0
                fp->last_offset = fp->delayed_seek = -1;
801
0
            }
802
0
            return bytes;
803
0
        }
804
805
0
        if (fp->last_offset >= 0
806
0
            && fp->delayed_seek > fp->last_offset
807
0
            && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808
            // If not seeking far, just read the data and throw it away.  This
809
            // is likely to be quicker than opening a new stream
810
0
            to_skip = fp->delayed_seek - fp->last_offset;
811
0
        } else {
812
0
            if (restart_from_position(fp, fp->delayed_seek) < 0) {
813
0
                return -1;
814
0
            }
815
0
        }
816
0
        fp->delayed_seek = -1;
817
0
        fp->last_offset = -1;
818
0
        fp->preserved_bytes = 0;
819
0
    }
820
821
0
    do {
822
0
        fp->buffer.ptr.rd = buffer;
823
0
        fp->buffer.len = nbytes;
824
0
        fp->paused = 0;
825
0
        if (!fp->finished) {
826
0
            err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827
0
            if (err != CURLE_OK) {
828
0
                errno = easy_errno(fp->easy, err);
829
0
                return -1;
830
0
            }
831
0
        }
832
833
0
        while (! fp->paused && ! fp->finished) {
834
0
            if (wait_perform(fp) < 0) return -1;
835
0
        }
836
837
0
        got = fp->buffer.ptr.rd - buffer;
838
839
0
        if (to_skip >= 0) { // Skipping over a small seek
840
0
            if (got < to_skip) { // Need to skip more data
841
0
                to_skip -= got;
842
0
            } else {
843
0
                got -= to_skip;
844
0
                if (got > 0) {  // If enough was skipped, return the rest
845
0
                    memmove(buffer, buffer + to_skip, got);
846
0
                    to_skip = -1;
847
0
                }
848
0
            }
849
0
        }
850
0
    } while (to_skip >= 0 && ! fp->finished);
851
0
    fp->buffer.ptr.rd = NULL;
852
0
    fp->buffer.len = 0;
853
854
0
    if (fp->finished && fp->final_result != CURLE_OK) {
855
0
        errno = easy_errno(fp->easy, fp->final_result);
856
0
        return -1;
857
0
    }
858
859
0
    return got;
860
0
}
861
862
static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863
0
{
864
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865
0
    size_t n = size * nmemb;
866
867
0
    if (fp->buffer.len == 0) {
868
        // Send buffer is empty; normally pause, or signal EOF if we're closing
869
0
        if (fp->closing) return 0;
870
0
        else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871
0
    }
872
873
0
    if (n > fp->buffer.len) n = fp->buffer.len;
874
0
    memcpy(ptr, fp->buffer.ptr.wr, n);
875
0
    fp->buffer.ptr.wr += n;
876
0
    fp->buffer.len -= n;
877
0
    return n;
878
0
}
879
880
static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881
0
{
882
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883
0
    const char *buffer = (const char *) bufferv;
884
0
    CURLcode err;
885
886
0
    fp->buffer.ptr.wr = buffer;
887
0
    fp->buffer.len = nbytes;
888
0
    fp->paused = 0;
889
0
    err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890
0
    if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891
892
0
    while (! fp->paused && ! fp->finished)
893
0
        if (wait_perform(fp) < 0) return -1;
894
895
0
    nbytes = fp->buffer.ptr.wr - buffer;
896
0
    fp->buffer.ptr.wr = NULL;
897
0
    fp->buffer.len = 0;
898
899
0
    if (fp->finished && fp->final_result != CURLE_OK) {
900
0
        errno = easy_errno(fp->easy, fp->final_result);
901
0
        return -1;
902
0
    }
903
904
0
    return nbytes;
905
0
}
906
907
static void preserve_buffer_content(hFILE_libcurl *fp)
908
0
{
909
0
    if (fp->base.begin == fp->base.end) {
910
0
        fp->preserved_bytes = 0;
911
0
        return;
912
0
    }
913
0
    if (!fp->preserved
914
0
        || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915
0
        fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916
0
        if (!fp->preserved) return;
917
0
        fp->preserved_size = fp->base.limit - fp->base.buffer;
918
0
    }
919
920
0
    assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921
922
0
    memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923
0
    fp->preserved_bytes = fp->base.end - fp->base.begin;
924
0
    return;
925
0
}
926
927
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928
0
{
929
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930
0
    off_t origin, pos;
931
932
0
    if (!fp->is_read || !fp->can_seek) {
933
        // Cowardly refuse to seek when writing or a previous seek failed.
934
0
        errno = ESPIPE;
935
0
        return -1;
936
0
    }
937
938
0
    switch (whence) {
939
0
    case SEEK_SET:
940
0
        origin = 0;
941
0
        break;
942
0
    case SEEK_CUR:
943
0
        errno = ENOSYS;
944
0
        return -1;
945
0
    case SEEK_END:
946
0
        if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947
0
        origin = fp->file_size;
948
0
        break;
949
0
    default:
950
0
        errno = EINVAL;
951
0
        return -1;
952
0
    }
953
954
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955
0
    if ((offset < 0)? origin + offset < 0
956
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957
0
        errno = EINVAL;
958
0
        return -1;
959
0
    }
960
961
0
    pos = origin + offset;
962
963
0
    if (fp->tried_seek) {
964
        /* Seeking has worked at least once, so now we can delay doing
965
           the actual work until the next read.  This avoids lots of pointless
966
           http or ftp reconnections if the caller does lots of seeks
967
           without any intervening reads. */
968
0
        if (fp->delayed_seek < 0) {
969
0
            fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970
            // Stash the current hFILE buffer content in case it's useful later
971
0
            preserve_buffer_content(fp);
972
0
        }
973
0
        fp->delayed_seek = pos;
974
0
        return pos;
975
0
    }
976
977
0
    if (restart_from_position(fp, pos) < 0) {
978
        /* This value for errno may not be entirely true, but the caller may be
979
           able to carry on with the existing handle. */
980
0
        errno = ESPIPE;
981
0
        return -1;
982
0
    }
983
984
0
    fp->tried_seek = 1;
985
0
    return pos;
986
0
}
987
988
0
static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989
0
    hFILE_libcurl temp_fp;
990
0
    CURLcode err;
991
0
    CURLMcode errm;
992
0
    int update_headers = 0;
993
0
    int save_errno = 0;
994
995
    // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996
    // limited reads (e.g. about a BAM block!) so seeking can reuse the
997
    // existing connection more often.
998
999
    // Get new headers from the callback (if defined).  This changes the
1000
    // headers in fp before it gets duplicated, but they should be have been
1001
    // sent by now.
1002
1003
0
    if (fp->headers.callback) {
1004
0
        if (add_callback_headers(fp) != 0)
1005
0
            return -1;
1006
0
        update_headers = 1;
1007
0
    }
1008
0
    if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009
0
        if (add_auth_header(fp) != 0)
1010
0
            return -1;
1011
0
        update_headers = 1;
1012
0
    }
1013
0
    if (update_headers) {
1014
0
        struct curl_slist *list = get_header_list(fp);
1015
0
        if (list) {
1016
0
            err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017
0
            if (err != CURLE_OK) {
1018
0
                errno = easy_errno(fp->easy,err);
1019
0
                return -1;
1020
0
            }
1021
0
        }
1022
0
    }
1023
1024
    /*
1025
      Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026
      a new request to the server, reading from the location that we want
1027
      to seek to.  If the new request works and returns the correct data,
1028
      the original easy handle in *fp is closed and replaced with the new
1029
      one.  If not, we close the new handle and leave *fp unchanged.
1030
     */
1031
1032
0
    memcpy(&temp_fp, fp, sizeof(temp_fp));
1033
0
    temp_fp.buffer.len = 0;
1034
0
    temp_fp.buffer.ptr.rd = NULL;
1035
0
    temp_fp.easy = curl_easy_duphandle(fp->easy);
1036
0
    if (!temp_fp.easy)
1037
0
        goto early_error;
1038
1039
0
    err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042
0
    if (err != CURLE_OK) {
1043
0
        save_errno = easy_errno(temp_fp.easy, err);
1044
0
        goto error;
1045
0
    }
1046
1047
0
    temp_fp.buffer.len = 0;  // Ensures we only read the response headers
1048
0
    temp_fp.paused = temp_fp.finished = 0;
1049
1050
    // fp->multi and temp_fp.multi are the same.
1051
0
    errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052
0
    if (errm != CURLM_OK) {
1053
0
        save_errno = multi_errno(errm);
1054
0
        goto error;
1055
0
    }
1056
0
    temp_fp.nrunning = ++fp->nrunning;
1057
1058
0
    while (! temp_fp.paused && ! temp_fp.finished)
1059
0
        if (wait_perform(&temp_fp) < 0) {
1060
0
            save_errno = errno;
1061
0
            goto error_remove;
1062
0
        }
1063
1064
0
    if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065
0
        save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066
0
        goto error_remove;
1067
0
    }
1068
1069
    // We've got a good response, close the original connection and
1070
    // replace it with the new one.
1071
1072
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073
0
    if (errm != CURLM_OK) {
1074
        // Clean up as much as possible
1075
0
        curl_easy_reset(temp_fp.easy);
1076
0
        if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077
0
            fp->nrunning--;
1078
0
            curl_easy_cleanup(temp_fp.easy);
1079
0
        }
1080
0
        save_errno = multi_errno(errm);
1081
0
        goto early_error;
1082
0
    }
1083
0
    fp->nrunning--;
1084
1085
0
    curl_easy_cleanup(fp->easy);
1086
0
    fp->easy = temp_fp.easy;
1087
0
    err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089
0
    if (err != CURLE_OK) {
1090
0
        save_errno = easy_errno(fp->easy, err);
1091
0
        curl_easy_reset(fp->easy);
1092
0
        errno = save_errno;
1093
0
        return -1;
1094
0
    }
1095
0
    fp->buffer.len = 0;
1096
0
    fp->paused = temp_fp.paused;
1097
0
    fp->finished = temp_fp.finished;
1098
0
    fp->perform_again = temp_fp.perform_again;
1099
0
    fp->final_result = temp_fp.final_result;
1100
1101
0
    return 0;
1102
1103
0
 error_remove:
1104
0
    curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105
0
    errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106
0
    if (errm != CURLM_OK) {
1107
0
        errno = multi_errno(errm);
1108
0
        return -1;
1109
0
    }
1110
0
    fp->nrunning--;
1111
0
 error:
1112
0
    curl_easy_cleanup(temp_fp.easy);
1113
0
 early_error:
1114
0
    fp->can_seek = 0;  // Don't try to seek again
1115
0
    if (save_errno)
1116
0
        errno = save_errno;
1117
0
    return -1;
1118
0
}
1119
1120
static int libcurl_close(hFILE *fpv)
1121
0
{
1122
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123
0
    CURLcode err;
1124
0
    CURLMcode errm;
1125
0
    int save_errno = 0;
1126
1127
    // Before closing the file, unpause it and perform on it so that uploads
1128
    // have the opportunity to signal EOF to the server -- see send_callback().
1129
1130
0
    fp->buffer.len = 0;
1131
0
    fp->closing = 1;
1132
0
    fp->paused = 0;
1133
0
    if (!fp->finished) {
1134
0
        err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135
0
        if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136
0
    }
1137
1138
0
    while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139
0
        if (wait_perform(fp) < 0) save_errno = errno;
1140
1141
0
    if (fp->finished && fp->final_result != CURLE_OK)
1142
0
        save_errno = easy_errno(fp->easy, fp->final_result);
1143
1144
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145
0
    if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146
0
    fp->nrunning--;
1147
1148
0
    curl_easy_cleanup(fp->easy);
1149
0
    curl_multi_cleanup(fp->multi);
1150
1151
0
    if (fp->headers.callback) // Tell callback to free any data it needs to
1152
0
        fp->headers.callback(fp->headers.callback_data, NULL);
1153
0
    free_headers(&fp->headers.fixed, 1);
1154
0
    free_headers(&fp->headers.extra, 1);
1155
1156
0
    free(fp->preserved);
1157
1158
0
    if (save_errno) { errno = save_errno; return -1; }
1159
0
    else return 0;
1160
0
}
1161
1162
static const struct hFILE_backend libcurl_backend =
1163
{
1164
    libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165
};
1166
1167
static hFILE *
1168
libcurl_open(const char *url, const char *modes, http_headers *headers)
1169
0
{
1170
0
    hFILE_libcurl *fp;
1171
0
    struct curl_slist *list;
1172
0
    char mode;
1173
0
    const char *s;
1174
0
    CURLcode err;
1175
0
    CURLMcode errm;
1176
0
    int save, is_recursive;
1177
0
    kstring_t in_header = {0, 0, NULL};
1178
0
    long response;
1179
1180
0
    is_recursive = strchr(modes, 'R') != NULL;
1181
1182
0
    if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183
0
        mode = *s;
1184
0
        if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185
0
    }
1186
0
    else mode = '\0';
1187
1188
0
    if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189
1190
0
    fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191
0
    if (fp == NULL) goto early_error;
1192
1193
0
    if (headers) {
1194
0
        fp->headers = *headers;
1195
0
    } else {
1196
0
        memset(&fp->headers, 0, sizeof(fp->headers));
1197
0
        fp->headers.fail_on_error = 1;
1198
0
    }
1199
1200
0
    fp->file_size = -1;
1201
0
    fp->buffer.ptr.rd = NULL;
1202
0
    fp->buffer.len = 0;
1203
0
    fp->final_result = (CURLcode) -1;
1204
0
    fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205
0
    fp->can_seek = 1;
1206
0
    fp->tried_seek = 0;
1207
0
    fp->delayed_seek = fp->last_offset = -1;
1208
0
    fp->preserved = NULL;
1209
0
    fp->preserved_bytes = fp->preserved_size = 0;
1210
0
    fp->is_recursive = is_recursive;
1211
0
    fp->nrunning = 0;
1212
0
    fp->easy = NULL;
1213
1214
0
    fp->multi = curl_multi_init();
1215
0
    if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216
1217
0
    fp->easy = curl_easy_init();
1218
0
    if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219
1220
    // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221
0
    err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222
1223
    // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224
    // by full path (but not strictly compliant with RFC1738).
1225
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, CURLFTPMETHOD_NOCWD);
1226
1227
0
    if (mode == 'r') {
1228
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1229
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1230
0
        fp->is_read = 1;
1231
0
    }
1232
0
    else {
1233
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1234
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1235
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1236
0
        if (append_header(&fp->headers.fixed,
1237
0
                          "Transfer-Encoding: chunked", 1) < 0)
1238
0
            goto error;
1239
0
        fp->is_read = 0;
1240
0
    }
1241
1242
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1243
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1244
0
    {
1245
0
        char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1246
0
        if (env_curl_ca_bundle) {
1247
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1248
0
        }
1249
0
    }
1250
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1251
0
    if (fp->headers.callback) {
1252
0
        if (add_callback_headers(fp) != 0) goto error;
1253
0
    }
1254
0
    if (get_auth_token(fp, url) < 0)
1255
0
        goto error;
1256
0
    if ((list = get_header_list(fp)) != NULL)
1257
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1258
1259
0
    if (hts_verbose <= 8 && fp->headers.fail_on_error)
1260
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1261
0
    if (hts_verbose >= 8)
1262
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1263
1264
0
    if (fp->headers.redirect) {
1265
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1266
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1267
0
    } else {
1268
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1269
0
    }
1270
1271
0
    if (err != 0) { errno = ENOSYS; goto error; }
1272
1273
0
    errm = curl_multi_add_handle(fp->multi, fp->easy);
1274
0
    if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1275
0
    fp->nrunning++;
1276
1277
0
    while (! fp->paused && ! fp->finished) {
1278
0
        if (wait_perform(fp) < 0) goto error_remove;
1279
0
    }
1280
1281
0
    curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1282
0
    if (fp->headers.http_response_ptr) {
1283
0
        *fp->headers.http_response_ptr = response;
1284
0
    }
1285
1286
0
    if (fp->finished && fp->final_result != CURLE_OK) {
1287
0
        errno = easy_errno(fp->easy, fp->final_result);
1288
0
        goto error_remove;
1289
0
    }
1290
1291
0
    if (fp->headers.redirect) {
1292
0
        if (response >= 300 && response < 400) { // redirection
1293
0
            kstring_t new_url = {0, 0, NULL};
1294
1295
0
            if (fp->headers.redirect(fp->headers.redirect_data, response,
1296
0
                                     &in_header, &new_url)) {
1297
0
                errno = ENOSYS;
1298
0
                goto error;
1299
0
            }
1300
1301
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1302
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1303
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1304
0
            free(ks_release(&in_header));
1305
1306
0
            if (err != 0) { errno = ENOSYS; goto error; }
1307
0
            free(ks_release(&new_url));
1308
1309
0
            if (restart_from_position(fp, 0) < 0) {
1310
0
                goto error_remove;
1311
0
            }
1312
1313
0
            if (fp->headers.http_response_ptr) {
1314
0
                curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1315
0
                                  fp->headers.http_response_ptr);
1316
0
            }
1317
1318
0
            if (fp->finished && fp->final_result != CURLE_OK) {
1319
0
                errno = easy_errno(fp->easy, fp->final_result);
1320
0
                goto error_remove;
1321
0
            }
1322
0
        } else {
1323
            // we no longer need to look at the headers
1324
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1325
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1326
0
            free(ks_release(&in_header));
1327
1328
0
            if (err != 0) { errno = ENOSYS; goto error; }
1329
0
        }
1330
0
    }
1331
1332
0
    if (mode == 'r') {
1333
0
        double dval;
1334
1335
0
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1336
0
                              &dval) == CURLE_OK && dval >= 0.0)
1337
0
            fp->file_size = (off_t) (dval + 0.1);
1338
0
    }
1339
1340
0
    fp->base.backend = &libcurl_backend;
1341
0
    return &fp->base;
1342
1343
0
error_remove:
1344
0
    save = errno;
1345
0
    (void) curl_multi_remove_handle(fp->multi, fp->easy);
1346
0
    fp->nrunning--;
1347
0
    errno = save;
1348
1349
0
error:
1350
0
    if (fp->headers.redirect) free(in_header.s);
1351
0
    save = errno;
1352
0
    if (fp->easy) curl_easy_cleanup(fp->easy);
1353
0
    if (fp->multi) curl_multi_cleanup(fp->multi);
1354
0
    free_headers(&fp->headers.extra, 1);
1355
0
    hfile_destroy((hFILE *) fp);
1356
0
    errno = save;
1357
0
    return NULL;
1358
1359
0
early_error:
1360
0
    return NULL;
1361
0
}
1362
1363
static hFILE *hopen_libcurl(const char *url, const char *modes)
1364
0
{
1365
0
    return libcurl_open(url, modes, NULL);
1366
0
}
1367
1368
static int parse_va_list(http_headers *headers, va_list args)
1369
0
{
1370
0
    const char *argtype;
1371
1372
0
    while ((argtype = va_arg(args, const char *)) != NULL)
1373
0
        if (strcmp(argtype, "httphdr:v") == 0) {
1374
0
            const char **hdr;
1375
0
            for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1376
0
                if (append_header(&headers->fixed, *hdr, 1) < 0)
1377
0
                    return -1;
1378
0
                if (is_authorization(*hdr))
1379
0
                    headers->auth_hdr_num = -1;
1380
0
            }
1381
0
        }
1382
0
        else if (strcmp(argtype, "httphdr:l") == 0) {
1383
0
            const char *hdr;
1384
0
            while ((hdr = va_arg(args, const char *)) != NULL) {
1385
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1386
0
                    return -1;
1387
0
                if (is_authorization(hdr))
1388
0
                    headers->auth_hdr_num = -1;
1389
0
            }
1390
0
        }
1391
0
        else if (strcmp(argtype, "httphdr") == 0) {
1392
0
            const char *hdr = va_arg(args, const char *);
1393
0
            if (hdr) {
1394
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1395
0
                    return -1;
1396
0
                if (is_authorization(hdr))
1397
0
                    headers->auth_hdr_num = -1;
1398
0
            }
1399
0
        }
1400
0
        else if (strcmp(argtype, "httphdr_callback") == 0) {
1401
0
            headers->callback = va_arg(args, const hts_httphdr_callback);
1402
0
        }
1403
0
        else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1404
0
            headers->callback_data = va_arg(args, void *);
1405
0
        }
1406
0
        else if (strcmp(argtype, "va_list") == 0) {
1407
0
            va_list *args2 = va_arg(args, va_list *);
1408
0
            if (args2) {
1409
0
                if (parse_va_list(headers, *args2) < 0) return -1;
1410
0
            }
1411
0
        }
1412
0
        else if (strcmp(argtype, "auth_token_enabled") == 0) {
1413
0
            const char *flag = va_arg(args, const char *);
1414
0
            if (strcmp(flag, "false") == 0)
1415
0
                headers->auth_hdr_num = -3;
1416
0
        }
1417
0
        else if (strcmp(argtype, "redirect_callback") == 0) {
1418
0
            headers->redirect = va_arg(args, const redirect_callback);
1419
0
        }
1420
0
        else if (strcmp(argtype, "redirect_callback_data") == 0) {
1421
0
            headers->redirect_data = va_arg(args, void *);
1422
0
        }
1423
0
        else if (strcmp(argtype, "http_response_ptr") == 0) {
1424
0
            headers->http_response_ptr = va_arg(args, long *);
1425
0
        }
1426
0
        else if (strcmp(argtype, "fail_on_error") == 0) {
1427
0
            headers->fail_on_error = va_arg(args, int);
1428
0
        }
1429
0
        else { errno = EINVAL; return -1; }
1430
1431
0
    return 0;
1432
0
}
1433
1434
/*
1435
  HTTP headers to be added to the request can be passed in as extra
1436
  arguments to hopen().  The headers can be specified as follows:
1437
1438
  * Single header:
1439
    hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1440
1441
  * Multiple headers in the argument list:
1442
    hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1443
1444
  * Multiple headers in a char* array:
1445
    hopen(url, mode, "httphdr:v", hdrs, NULL);
1446
    where `hdrs` is a char **.  The list ends with a NULL pointer.
1447
1448
  * A callback function
1449
    hopen(url, mode, "httphdr_callback", func,
1450
                     "httphdr_callback_data", arg, NULL);
1451
    `func` has type
1452
         int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1453
    `arg` is passed to the callback as a void *.
1454
1455
    The function is called at file open, and when attempting to seek (which
1456
    opens a new HTTP request).  This allows, for example, access tokens
1457
    that may have gone stale to be regenerated.  The function is also
1458
    called (with `hdrs` == NULL) on file close so that the callback can
1459
    free any memory that it needs to.
1460
1461
    The callback should return 0 on success, non-zero on failure.  It should
1462
    return in *hdrs a list of strings containing the new headers (terminated
1463
    with a NULL pointer).  These will replace any headers previously supplied
1464
    by the callback.  If no changes are necessary, it can return NULL
1465
    in *hdrs, in which case the previous headers will be left unchanged.
1466
1467
    Ownership of the strings in the header list passes to hfile_libcurl,
1468
    so the callback should not attempt to use or free them itself.  The memory
1469
    containing the array belongs to the callback and will not be freed by
1470
    hfile_libcurl.
1471
1472
    Headers supplied by the callback are appended after any specified
1473
    using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1474
    is made to replace these headers (even if a key is repeated) so anything
1475
    that is expected to vary needs to come from the callback.
1476
 */
1477
1478
static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1479
0
{
1480
0
    hFILE *fp = NULL;
1481
0
    http_headers headers = { .fail_on_error = 1 };
1482
1483
0
    if (parse_va_list(&headers, args) == 0) {
1484
0
        fp = libcurl_open(url, modes, &headers);
1485
0
    }
1486
1487
0
    if (!fp) {
1488
0
        free_headers(&headers.fixed, 1);
1489
0
    }
1490
0
    return fp;
1491
0
}
1492
1493
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1494
1
{
1495
1
    static const struct hFILE_scheme_handler handler =
1496
1
        { hopen_libcurl, hfile_always_remote, "libcurl",
1497
1
          2000 + 50,
1498
1
          vhopen_libcurl };
1499
1500
#ifdef ENABLE_PLUGINS
1501
    // Embed version string for examination via strings(1) or what(1)
1502
    static const char id[] =
1503
        "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1504
    const char *version = strchr(id, '\t')+1;
1505
#else
1506
1
    const char *version = hts_version();
1507
1
#endif
1508
1
    const curl_version_info_data *info;
1509
1
    const char * const *protocol;
1510
1
    const char *auth;
1511
1
    CURLcode err;
1512
1
    CURLSHcode errsh;
1513
1514
1
    err = curl_global_init(CURL_GLOBAL_ALL);
1515
1
    if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1516
1517
1
    curl.share = curl_share_init();
1518
1
    if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1519
1
    errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1520
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1521
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1522
1
    if (errsh != 0) {
1523
0
        curl_share_cleanup(curl.share);
1524
0
        curl_global_cleanup();
1525
0
        errno = EIO;
1526
0
        return -1;
1527
0
    }
1528
1529
1
    if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1530
0
        curl.auth_path = strdup(auth);
1531
0
        curl.auth_map = kh_init(auth_map);
1532
0
        if (!curl.auth_path || !curl.auth_map) {
1533
0
            int save_errno = errno;
1534
0
            free(curl.auth_path);
1535
0
            kh_destroy(auth_map, curl.auth_map);
1536
0
            curl_share_cleanup(curl.share);
1537
0
            curl_global_cleanup();
1538
0
            errno = save_errno;
1539
0
            return -1;
1540
0
        }
1541
0
    }
1542
1
    if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1543
1
        && strcmp(auth, "I understand the risks") == 0) {
1544
0
        curl.allow_unencrypted_auth_header = 1;
1545
0
    }
1546
1547
1
    info = curl_version_info(CURLVERSION_NOW);
1548
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1549
1550
1
    self->name = "libcurl";
1551
1
    self->destroy = libcurl_exit;
1552
1553
24
    for (protocol = info->protocols; *protocol; protocol++)
1554
23
        hfile_add_scheme_handler(*protocol, &handler);
1555
1
    return 0;
1556
1
}