Coverage Report

Created: 2025-07-18 07:26

/src/htslib/hfile_libcurl.c
Line
Count
Source (jump to first uncovered line)
1
/*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <strings.h>
32
#include <errno.h>
33
#include <pthread.h>
34
#ifndef _WIN32
35
# include <sys/select.h>
36
#endif
37
#include <assert.h>
38
39
#include "hfile_internal.h"
40
#ifdef ENABLE_PLUGINS
41
#include "version.h"
42
#endif
43
#include "htslib/hts.h"  // for hts_version() and hts_verbose
44
#include "htslib/kstring.h"
45
#include "htslib/khash.h"
46
47
#include <curl/curl.h>
48
49
// Number of seconds to take off auth_token expiry, to allow for clock skew
50
// and slow servers
51
0
#define AUTH_REFRESH_EARLY_SECS 60
52
53
// Minimum number of bytes to skip when seeking forward.  Seeks less than
54
// this will just read the data and throw it away.  The optimal value
55
// depends on how long it takes to make a new connection compared
56
// to how fast the data arrives.
57
0
#define MIN_SEEK_FORWARD 1000000
58
59
typedef struct {
60
    char *path;
61
    char *token;
62
    time_t expiry;
63
    int failed;
64
    pthread_mutex_t lock;
65
} auth_token;
66
67
// For the authorization header cache
68
KHASH_MAP_INIT_STR(auth_map, auth_token *)
69
70
// Curl-compatible header linked list
71
typedef struct {
72
    struct curl_slist *list;
73
    unsigned int num;
74
    unsigned int size;
75
} hdrlist;
76
77
typedef struct {
78
    hdrlist fixed;                   // List of headers supplied at hopen()
79
    hdrlist extra;                   // List of headers from callback
80
    hts_httphdr_callback callback;   // Callback to get more headers
81
    void *callback_data;             // Data to pass to httphdr callback
82
    auth_token *auth;                // Authentication token
83
    int auth_hdr_num;                // Location of auth_token in hdrlist extra
84
                                     // If -1, Authorization header is in fixed
85
                                     //    -2, it came from the callback
86
                                     //    -3, "auth_token_enabled", "false"
87
                                     //        passed to hopen()
88
    redirect_callback redirect;      // Callback to handle 3xx redirects
89
    void *redirect_data;             // Data to pass to redirect_callback
90
    long *http_response_ptr;         // Location to store http response code.
91
    int fail_on_error;               // Open fails on >400 response code
92
                                     //    (default true)
93
} http_headers;
94
95
typedef struct {
96
    hFILE base;
97
    CURL *easy;
98
    CURLM *multi;
99
    off_t file_size;
100
    struct {
101
        union { char *rd; const char *wr; } ptr;
102
        size_t len;
103
    } buffer;
104
    CURLcode final_result;  // easy result code for finished transfers
105
    // Flags for communicating with libcurl callbacks:
106
    unsigned paused : 1;    // callback tells us that it has paused transfer
107
    unsigned closing : 1;   // informs callback that hclose() has been invoked
108
    unsigned finished : 1;  // wait_perform() tells us transfer is complete
109
    unsigned perform_again : 1;
110
    unsigned is_read : 1;   // Opened in read mode
111
    unsigned can_seek : 1;  // Can (attempt to) seek on this handle
112
    unsigned is_recursive:1; // Opened by hfile_libcurl itself
113
    unsigned tried_seek : 1; // At least one seek has been attempted
114
    int nrunning;
115
    http_headers headers;
116
117
    off_t delayed_seek;      // Location to seek to before reading
118
    off_t last_offset;       // Location we're seeking from
119
    char *preserved;         // Preserved buffer content on seek
120
    size_t preserved_bytes;  // Number of preserved bytes
121
    size_t preserved_size;   // Size of preserved buffer
122
} hFILE_libcurl;
123
124
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125
static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126
127
static int http_status_errno(int status)
128
39
{
129
39
    if (status >= 500)
130
0
        switch (status) {
131
0
        case 501: return ENOSYS;
132
0
        case 503: return EBUSY;
133
0
        case 504: return ETIMEDOUT;
134
0
        default:  return EIO;
135
0
        }
136
39
    else if (status >= 400)
137
39
        switch (status) {
138
0
        case 401: return EPERM;
139
0
        case 403: return EACCES;
140
0
        case 404: return ENOENT;
141
0
        case 405: return EROFS;
142
0
        case 407: return EPERM;
143
0
        case 408: return ETIMEDOUT;
144
0
        case 410: return ENOENT;
145
39
        default:  return EINVAL;
146
39
        }
147
0
    else return 0;
148
39
}
149
150
static int easy_errno(CURL *easy, CURLcode err)
151
517
{
152
517
    long lval;
153
154
517
    switch (err) {
155
0
    case CURLE_OK:
156
0
        return 0;
157
158
0
    case CURLE_UNSUPPORTED_PROTOCOL:
159
478
    case CURLE_URL_MALFORMAT:
160
478
        return EINVAL;
161
162
0
#if LIBCURL_VERSION_NUM >= 0x071505
163
0
    case CURLE_NOT_BUILT_IN:
164
0
        return ENOSYS;
165
0
#endif
166
167
0
    case CURLE_COULDNT_RESOLVE_PROXY:
168
0
    case CURLE_COULDNT_RESOLVE_HOST:
169
0
    case CURLE_FTP_CANT_GET_HOST:
170
0
        return EDESTADDRREQ; // Lookup failure
171
172
0
    case CURLE_COULDNT_CONNECT:
173
0
    case CURLE_SEND_ERROR:
174
0
    case CURLE_RECV_ERROR:
175
0
        if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176
0
            return lval;
177
0
        else
178
0
            return ECONNABORTED;
179
180
0
    case CURLE_REMOTE_ACCESS_DENIED:
181
0
    case CURLE_LOGIN_DENIED:
182
0
    case CURLE_TFTP_PERM:
183
0
        return EACCES;
184
185
0
    case CURLE_PARTIAL_FILE:
186
0
        return EPIPE;
187
188
39
    case CURLE_HTTP_RETURNED_ERROR:
189
39
        if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190
39
            return http_status_errno(lval);
191
0
        else
192
0
            return EIO;
193
194
0
    case CURLE_OUT_OF_MEMORY:
195
0
        return ENOMEM;
196
197
0
    case CURLE_OPERATION_TIMEDOUT:
198
0
        return ETIMEDOUT;
199
200
0
    case CURLE_RANGE_ERROR:
201
0
        return ESPIPE;
202
203
0
    case CURLE_SSL_CONNECT_ERROR:
204
        // TODO return SSL error buffer messages
205
0
        return ECONNABORTED;
206
207
0
    case CURLE_FILE_COULDNT_READ_FILE:
208
0
    case CURLE_TFTP_NOTFOUND:
209
0
        return ENOENT;
210
211
0
    case CURLE_TOO_MANY_REDIRECTS:
212
0
        return ELOOP;
213
214
0
    case CURLE_FILESIZE_EXCEEDED:
215
0
        return EFBIG;
216
217
0
    case CURLE_REMOTE_DISK_FULL:
218
0
        return ENOSPC;
219
220
0
    case CURLE_REMOTE_FILE_EXISTS:
221
0
        return EEXIST;
222
223
0
    default:
224
0
        hts_log_error("Libcurl reported error %d (%s)", (int) err,
225
0
                      curl_easy_strerror(err));
226
0
        return EIO;
227
517
    }
228
517
}
229
230
static int multi_errno(CURLMcode errm)
231
0
{
232
0
    switch (errm) {
233
0
    case CURLM_CALL_MULTI_PERFORM:
234
0
    case CURLM_OK:
235
0
        return 0;
236
237
0
    case CURLM_BAD_HANDLE:
238
0
    case CURLM_BAD_EASY_HANDLE:
239
0
    case CURLM_BAD_SOCKET:
240
0
        return EBADF;
241
242
0
    case CURLM_OUT_OF_MEMORY:
243
0
        return ENOMEM;
244
245
0
    default:
246
0
        hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247
0
                      curl_multi_strerror(errm));
248
0
        return EIO;
249
0
    }
250
0
}
251
252
static struct {
253
    kstring_t useragent;
254
    CURLSH *share;
255
    char *auth_path;
256
    khash_t(auth_map) *auth_map;
257
    int allow_unencrypted_auth_header;
258
    pthread_mutex_t auth_lock;
259
    pthread_mutex_t share_lock;
260
} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261
           PTHREAD_MUTEX_INITIALIZER };
262
263
static void share_lock(CURL *handle, curl_lock_data data,
264
1.72k
                       curl_lock_access access, void *userptr) {
265
1.72k
    pthread_mutex_lock(&curl.share_lock);
266
1.72k
}
267
268
1.72k
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269
1.72k
    pthread_mutex_unlock(&curl.share_lock);
270
1.72k
}
271
272
0
static void free_auth(auth_token *tok) {
273
0
    if (!tok) return;
274
0
    if (pthread_mutex_destroy(&tok->lock)) abort();
275
0
    free(tok->path);
276
0
    free(tok->token);
277
0
    free(tok);
278
0
}
279
280
static void libcurl_exit(void)
281
1
{
282
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283
1
        curl.share = NULL;
284
285
1
    free(curl.useragent.s);
286
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287
288
1
    free(curl.auth_path);
289
1
    curl.auth_path = NULL;
290
291
1
    if (curl.auth_map) {
292
0
        khiter_t i;
293
0
        for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294
0
            if (kh_exist(curl.auth_map, i)) {
295
0
                free_auth(kh_value(curl.auth_map, i));
296
0
                kh_key(curl.auth_map, i) = NULL;
297
0
                kh_value(curl.auth_map, i) = NULL;
298
0
            }
299
0
        }
300
0
        kh_destroy(auth_map, curl.auth_map);
301
0
        curl.auth_map = NULL;
302
0
    }
303
304
1
    curl_global_cleanup();
305
1
}
306
307
1.13k
static int append_header(hdrlist *hdrs, const char *data, int dup) {
308
1.13k
    if (hdrs->num == hdrs->size) {
309
460
        unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310
460
        struct curl_slist *new_list = realloc(hdrs->list,
311
460
                                              new_sz * sizeof(*new_list));
312
460
        if (!new_list) return -1;
313
460
        hdrs->size = new_sz;
314
460
        hdrs->list = new_list;
315
460
        for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316
460
    }
317
    // Annoyingly, libcurl doesn't declare the char * as const...
318
1.13k
    hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319
1.13k
    if (!hdrs->list[hdrs->num].data) return -1;
320
1.13k
    if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321
1.13k
    hdrs->list[hdrs->num].next = NULL;
322
1.13k
    hdrs->num++;
323
1.13k
    return 0;
324
1.13k
}
325
326
1.44k
static void free_headers(hdrlist *hdrs, int completely) {
327
1.44k
    unsigned int i;
328
2.58k
    for (i = 0; i < hdrs->num; i++) {
329
1.13k
        free(hdrs->list[i].data);
330
1.13k
        hdrs->list[i].data = NULL;
331
1.13k
        hdrs->list[i].next = NULL;
332
1.13k
    }
333
1.44k
    hdrs->num = 0;
334
1.44k
    if (completely) {
335
989
        free(hdrs->list);
336
989
        hdrs->size = 0;
337
989
        hdrs->list = NULL;
338
989
    }
339
1.44k
}
340
341
519
static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342
519
    if (fp->headers.fixed.num > 0)
343
0
        return &fp->headers.fixed.list[0];
344
519
    if (fp->headers.extra.num > 0)
345
460
        return &fp->headers.extra.list[0];
346
59
    return 0;
347
519
}
348
349
1.13k
static inline int is_authorization(const char *hdr) {
350
1.13k
    return (strncasecmp("authorization:", hdr, 14) == 0);
351
1.13k
}
352
353
465
static int add_callback_headers(hFILE_libcurl *fp) {
354
465
    char **hdrs = NULL, **hdr;
355
356
465
    if (!fp->headers.callback)
357
0
        return 0;
358
359
    // Get the headers from the callback
360
465
    if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361
5
        return -1;
362
5
    }
363
364
460
    if (!hdrs) // No change
365
0
        return 0;
366
367
    // Remove any old callback headers
368
460
    if (fp->headers.fixed.num > 0) {
369
        // Unlink lists
370
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371
0
    }
372
460
    free_headers(&fp->headers.extra, 0);
373
374
460
    if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375
0
        fp->headers.auth_hdr_num = 0; // Just removed it...
376
377
    // Convert to libcurl-suitable form
378
1.59k
    for (hdr = hdrs; *hdr; hdr++) {
379
1.13k
        if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380
0
            goto cleanup;
381
0
        }
382
1.13k
        if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383
276
            fp->headers.auth_hdr_num = -2;
384
1.13k
    }
385
1.59k
    for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386
387
460
    if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388
        // Relink lists
389
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390
0
            = &fp->headers.extra.list[0];
391
0
    }
392
460
    return 0;
393
394
0
 cleanup:
395
0
    while (hdr && *hdr) {
396
0
        free(*hdr);
397
0
        *hdr = NULL;
398
0
    }
399
0
    return -1;
400
460
}
401
402
/*
403
 * Read an OAUTH2-style Bearer access token (see
404
 * https://tools.ietf.org/html/rfc6750#section-4).
405
 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406
 * '?' for a JSON parse error; 'm' if it runs out of memory.
407
 */
408
0
static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409
0
    hts_json_token *t = hts_json_alloc_token();
410
0
    kstring_t str = {0, 0, NULL};
411
0
    char *token = NULL, *type = NULL, *expiry = NULL;
412
0
    int ret = 'i';
413
414
0
    if (!t) goto error;
415
416
0
    if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417
0
    while (hts_json_fnext(auth_fp, t, &str) != '}') {
418
0
        char *key;
419
0
        if (hts_json_token_type(t) != 's') {
420
0
            ret = '?';
421
0
            goto error;
422
0
        }
423
0
        key = hts_json_token_str(t);
424
0
        if (!key) goto error;
425
0
        if (strcmp(key, "access_token") == 0) {
426
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427
0
            token = ks_release(&str);
428
0
        } else if (strcmp(key, "token_type") == 0) {
429
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430
0
            type = ks_release(&str);
431
0
        } else if (strcmp(key, "expires_in") == 0) {
432
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433
0
            expiry = ks_release(&str);
434
0
        } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435
0
            ret = '?';
436
0
            goto error;
437
0
        }
438
0
    }
439
440
0
    if (!token || (type && strcmp(type, "Bearer") != 0)) {
441
0
        ret = 'i';
442
0
        goto error;
443
0
    }
444
445
0
    ret = 'm';
446
0
    str.l = 0;
447
0
    if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448
0
    if (kputs(token, &str) < 0) goto error;
449
0
    free(tok->token);
450
0
    tok->token = ks_release(&str);
451
0
    if (expiry) {
452
0
        long exp = strtol(expiry, NULL, 10);
453
0
        if (exp < 0) exp = 0;
454
0
        tok->expiry = time(NULL) + exp;
455
0
    } else {
456
0
        tok->expiry = 0;
457
0
    }
458
0
    ret = 'v';
459
460
0
 error:
461
0
    free(token);
462
0
    free(type);
463
0
    free(expiry);
464
0
    free(str.s);
465
0
    hts_json_free_token(t);
466
0
    return ret;
467
0
}
468
469
0
static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470
0
    kstring_t line = {0, 0, NULL};
471
0
    kstring_t token = {0, 0, NULL};
472
0
    const char *start, *end;
473
474
0
    if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475
0
    if (kputc('\0', &line) < 0) goto error;
476
477
0
    for (start = line.s; *start && isspace_c(*start); start++) {}
478
0
    for (end = start; *end && !isspace_c(*end); end++) {}
479
480
0
    if (end > start) {
481
0
        if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482
0
        if (kputsn(start, end - start, &token) < 0) goto error;
483
0
    }
484
485
0
    free(tok->token);
486
0
    tok->token = ks_release(&token);
487
0
    tok->expiry = 0;
488
0
    free(line.s);
489
0
    return 0;
490
491
0
 error:
492
0
    free(line.s);
493
0
    free(token.s);
494
0
    return -1;
495
0
}
496
497
0
static int renew_auth_token(auth_token *tok, int *changed) {
498
0
    hFILE *auth_fp = NULL;
499
0
    char buffer[16];
500
0
    ssize_t len;
501
502
0
    *changed = 0;
503
0
    if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504
0
        return 0; // Still valid
505
506
0
    if (tok->failed)
507
0
        return -1;
508
509
0
    *changed = 1;
510
0
    auth_fp = hopen(tok->path, "rR");
511
0
    if (!auth_fp) {
512
        // Not worried about missing files; other errors are bad.
513
0
        if (errno != ENOENT)
514
0
            goto fail;
515
516
0
        tok->expiry = 0; // Prevent retry
517
0
        free(tok->token); // Just in case it was set
518
0
        return 0;
519
0
    }
520
521
0
    len = hpeek(auth_fp, buffer, sizeof(buffer));
522
0
    if (len < 0)
523
0
        goto fail;
524
525
0
    if (memchr(buffer, '{', len) != NULL) {
526
0
        if (read_auth_json(tok, auth_fp) != 'v')
527
0
            goto fail;
528
0
    } else {
529
0
        if (read_auth_plain(tok, auth_fp) < 0)
530
0
            goto fail;
531
0
    }
532
533
0
    return hclose(auth_fp) < 0 ? -1 : 0;
534
535
0
 fail:
536
0
    tok->failed = 1;
537
0
    if (auth_fp) hclose_abruptly(auth_fp);
538
0
    return -1;
539
0
}
540
541
0
static int add_auth_header(hFILE_libcurl *fp) {
542
0
    int changed = 0;
543
544
0
    if (fp->headers.auth_hdr_num < 0)
545
0
        return 0; // Have an Authorization header from open or header callback
546
547
0
    if (!fp->headers.auth)
548
0
        return 0; // Nothing to add
549
550
0
    pthread_mutex_lock(&fp->headers.auth->lock);
551
0
    if (renew_auth_token(fp->headers.auth, &changed) < 0)
552
0
        goto unlock_fail;
553
554
0
    if (!changed && fp->headers.auth_hdr_num > 0) {
555
0
        pthread_mutex_unlock(&fp->headers.auth->lock);
556
0
        return 0;
557
0
    }
558
559
0
    if (fp->headers.auth_hdr_num > 0) {
560
        // Had a previous header, so swap in the new one
561
0
        char *header = fp->headers.auth->token;
562
0
        char *header_copy = header ? strdup(header) : NULL;
563
0
        int idx = fp->headers.auth_hdr_num - 1;
564
0
        if (header && !header_copy)
565
0
            goto unlock_fail;
566
567
0
        if (header_copy) {
568
0
            free(fp->headers.extra.list[idx].data);
569
0
            fp->headers.extra.list[idx].data = header_copy;
570
0
        } else {
571
0
            unsigned int j;
572
            // More complicated case - need to get rid of the old header
573
            // and tidy up linked lists
574
0
            free(fp->headers.extra.list[idx].data);
575
0
            for (j = idx + 1; j < fp->headers.extra.num; j++) {
576
0
                fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577
0
                fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578
0
            }
579
0
            fp->headers.extra.num--;
580
0
            if (fp->headers.extra.num > 0) {
581
0
                fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582
0
            } else if (fp->headers.fixed.num > 0) {
583
0
                fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584
0
            }
585
0
            fp->headers.auth_hdr_num = 0;
586
0
        }
587
0
    } else if (fp->headers.auth->token) {
588
        // Add new header and remember where it is
589
0
        if (append_header(&fp->headers.extra,
590
0
                          fp->headers.auth->token, 1) < 0) {
591
0
            goto unlock_fail;
592
0
        }
593
0
        fp->headers.auth_hdr_num = fp->headers.extra.num;
594
0
    }
595
596
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
597
0
    return 0;
598
599
0
 unlock_fail:
600
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
601
0
    return -1;
602
0
}
603
604
519
static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605
519
    const char *host = NULL, *p, *q;
606
519
    kstring_t name = {0, 0, NULL};
607
519
    size_t host_len = 0;
608
519
    khiter_t idx;
609
519
    auth_token *tok = NULL;
610
611
    // Nothing to do if:
612
    //   curl.auth_path has not been set
613
    //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614
    //   we already have an Authorization header
615
519
    if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616
519
        return 0;
617
618
    // Insist on having a secure connection unless the user insists harder
619
0
    if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620
0
        return 0;
621
622
0
    host = strstr(url, "://");
623
0
    if (host) {
624
0
        host += 3;
625
0
        host_len = strcspn(host, "/");
626
0
    }
627
628
0
    p = curl.auth_path;
629
0
    while ((q = strstr(p, "%h")) != NULL) {
630
0
        if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631
0
        if (kputsn_(p, q - p, &name) < 0) goto error;
632
0
        if (kputsn_(host, host_len, &name) < 0) goto error;
633
0
        p = q + 2;
634
0
    }
635
0
    if (kputs(p, &name) < 0) goto error;
636
637
0
    pthread_mutex_lock(&curl.auth_lock);
638
0
    idx = kh_get(auth_map, curl.auth_map, name.s);
639
0
    if (idx < kh_end(curl.auth_map)) {
640
0
        tok = kh_value(curl.auth_map, idx);
641
0
    } else {
642
0
        tok = calloc(1, sizeof(*tok));
643
0
        if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644
0
            free(tok);
645
0
            tok = NULL;
646
0
        }
647
0
        if (tok) {
648
0
            int ret = -1;
649
0
            tok->path = ks_release(&name);
650
0
            tok->token = NULL;
651
0
            tok->expiry = 1; // Force refresh
652
0
            idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653
0
            if (ret < 0) {
654
0
                free_auth(tok);
655
0
                tok = NULL;
656
0
            }
657
0
            kh_value(curl.auth_map, idx) = tok;
658
0
        }
659
0
    }
660
0
    pthread_mutex_unlock(&curl.auth_lock);
661
662
0
    fp->headers.auth = tok;
663
0
    free(name.s);
664
665
0
    return add_auth_header(fp);
666
667
0
 error:
668
0
    free(name.s);
669
0
    return -1;
670
0
}
671
672
static void process_messages(hFILE_libcurl *fp)
673
517
{
674
517
    CURLMsg *msg;
675
517
    int remaining;
676
677
1.03k
    while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678
517
        switch (msg->msg) {
679
517
        case CURLMSG_DONE:
680
517
            fp->finished = 1;
681
517
            fp->final_result = msg->data.result;
682
517
            break;
683
684
0
        default:
685
0
            break;
686
517
        }
687
517
    }
688
517
}
689
690
static int wait_perform(hFILE_libcurl *fp)
691
677
{
692
677
    fd_set rd, wr, ex;
693
677
    int maxfd, nrunning;
694
677
    long timeout;
695
677
    CURLMcode errm;
696
697
677
    if (!fp->perform_again) {
698
677
        FD_ZERO(&rd);
699
677
        FD_ZERO(&wr);
700
677
        FD_ZERO(&ex);
701
677
        if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702
0
            maxfd = -1, timeout = 1000;
703
677
        else {
704
677
            if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705
0
                timeout = 1000;
706
677
            else if (timeout < 0) {
707
0
                timeout = 10000;  // as recommended by curl_multi_timeout(3)
708
0
            }
709
677
        }
710
677
        if (maxfd < 0) {
711
519
            if (timeout > 100)
712
0
                timeout = 100; // as recommended by curl_multi_fdset(3)
713
#ifdef _WIN32
714
            /* Windows ignores the first argument of select, so calling select
715
             * with maxfd=-1 does not give the expected result of sleeping for
716
             * timeout milliseconds in the conditional block below.
717
             * So sleep here and skip the next block.
718
             */
719
            Sleep(timeout);
720
            timeout = 0;
721
#endif
722
519
        }
723
724
677
        if (timeout > 0) {
725
153
            struct timeval tval;
726
153
            tval.tv_sec  = (timeout / 1000);
727
153
            tval.tv_usec = (timeout % 1000) * 1000;
728
729
153
            if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730
153
        }
731
677
    }
732
733
677
    errm = curl_multi_perform(fp->multi, &nrunning);
734
677
    fp->perform_again = 0;
735
677
    if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736
677
    else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737
738
677
    if (nrunning < fp->nrunning) process_messages(fp);
739
677
    return 0;
740
677
}
741
742
743
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744
4
{
745
4
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746
4
    size_t n = size * nmemb;
747
748
4
    if (n > fp->buffer.len) {
749
4
        fp->paused = 1;
750
4
        return CURL_WRITEFUNC_PAUSE;
751
4
    }
752
0
    else if (n == 0) return 0;
753
754
0
    memcpy(fp->buffer.ptr.rd, ptr, n);
755
0
    fp->buffer.ptr.rd += n;
756
0
    fp->buffer.len -= n;
757
0
    return n;
758
4
}
759
760
761
static size_t header_callback(void *contents, size_t size, size_t nmemb,
762
                              void *userp)
763
18
{
764
18
    size_t realsize = size * nmemb;
765
18
    kstring_t *resp = (kstring_t *)userp;
766
767
18
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
768
0
        return 0;
769
0
    }
770
771
18
    return realsize;
772
18
}
773
774
775
static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776
0
{
777
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778
0
    char *buffer = (char *) bufferv;
779
0
    off_t to_skip = -1;
780
0
    ssize_t got = 0;
781
0
    CURLcode err;
782
783
0
    if (fp->delayed_seek >= 0) {
784
0
        assert(fp->base.offset == fp->delayed_seek);
785
786
0
        if (fp->preserved
787
0
            && fp->last_offset > fp->delayed_seek
788
0
            && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789
            // Can use buffer contents copied when seeking started, to
790
            // avoid having to re-read data discarded by hseek().
791
            // Note fp->last_offset is the offset of the *end* of the
792
            // preserved buffer.
793
0
            size_t n = fp->last_offset - fp->delayed_seek;
794
0
            char *start = fp->preserved + (fp->preserved_bytes - n);
795
0
            size_t bytes = n <= nbytes ? n : nbytes;
796
0
            memcpy(buffer, start, bytes);
797
0
            if (bytes < n) { // Part of the preserved buffer still left
798
0
                fp->delayed_seek += bytes;
799
0
            } else {
800
0
                fp->last_offset = fp->delayed_seek = -1;
801
0
            }
802
0
            return bytes;
803
0
        }
804
805
0
        if (fp->last_offset >= 0
806
0
            && fp->delayed_seek > fp->last_offset
807
0
            && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808
            // If not seeking far, just read the data and throw it away.  This
809
            // is likely to be quicker than opening a new stream
810
0
            to_skip = fp->delayed_seek - fp->last_offset;
811
0
        } else {
812
0
            if (restart_from_position(fp, fp->delayed_seek) < 0) {
813
0
                return -1;
814
0
            }
815
0
        }
816
0
        fp->delayed_seek = -1;
817
0
        fp->last_offset = -1;
818
0
        fp->preserved_bytes = 0;
819
0
    }
820
821
0
    do {
822
0
        fp->buffer.ptr.rd = buffer;
823
0
        fp->buffer.len = nbytes;
824
0
        fp->paused = 0;
825
0
        if (!fp->finished) {
826
0
            err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827
0
            if (err != CURLE_OK) {
828
0
                errno = easy_errno(fp->easy, err);
829
0
                return -1;
830
0
            }
831
0
        }
832
833
0
        while (! fp->paused && ! fp->finished) {
834
0
            if (wait_perform(fp) < 0) return -1;
835
0
        }
836
837
0
        got = fp->buffer.ptr.rd - buffer;
838
839
0
        if (to_skip >= 0) { // Skipping over a small seek
840
0
            if (got <= to_skip) { // Need to skip more data
841
0
                to_skip -= got;
842
0
            } else {
843
0
                got -= to_skip;
844
0
                if (got > 0) {  // If enough was skipped, return the rest
845
0
                    memmove(buffer, buffer + to_skip, got);
846
0
                    to_skip = -1;
847
0
                }
848
0
            }
849
0
        }
850
0
    } while (to_skip >= 0 && ! fp->finished);
851
0
    fp->buffer.ptr.rd = NULL;
852
0
    fp->buffer.len = 0;
853
854
0
    if (fp->finished && fp->final_result != CURLE_OK) {
855
0
        errno = easy_errno(fp->easy, fp->final_result);
856
0
        return -1;
857
0
    }
858
859
0
    return got;
860
0
}
861
862
static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863
0
{
864
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865
0
    size_t n = size * nmemb;
866
867
0
    if (fp->buffer.len == 0) {
868
        // Send buffer is empty; normally pause, or signal EOF if we're closing
869
0
        if (fp->closing) return 0;
870
0
        else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871
0
    }
872
873
0
    if (n > fp->buffer.len) n = fp->buffer.len;
874
0
    memcpy(ptr, fp->buffer.ptr.wr, n);
875
0
    fp->buffer.ptr.wr += n;
876
0
    fp->buffer.len -= n;
877
0
    return n;
878
0
}
879
880
static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881
0
{
882
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883
0
    const char *buffer = (const char *) bufferv;
884
0
    CURLcode err;
885
886
0
    fp->buffer.ptr.wr = buffer;
887
0
    fp->buffer.len = nbytes;
888
0
    fp->paused = 0;
889
0
    err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890
0
    if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891
892
0
    while (! fp->paused && ! fp->finished)
893
0
        if (wait_perform(fp) < 0) return -1;
894
895
0
    nbytes = fp->buffer.ptr.wr - buffer;
896
0
    fp->buffer.ptr.wr = NULL;
897
0
    fp->buffer.len = 0;
898
899
0
    if (fp->finished && fp->final_result != CURLE_OK) {
900
0
        errno = easy_errno(fp->easy, fp->final_result);
901
0
        return -1;
902
0
    }
903
904
0
    return nbytes;
905
0
}
906
907
static void preserve_buffer_content(hFILE_libcurl *fp)
908
0
{
909
0
    if (fp->base.begin == fp->base.end) {
910
0
        fp->preserved_bytes = 0;
911
0
        return;
912
0
    }
913
0
    if (!fp->preserved
914
0
        || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915
0
        fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916
0
        if (!fp->preserved) return;
917
0
        fp->preserved_size = fp->base.limit - fp->base.buffer;
918
0
    }
919
920
0
    assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921
922
0
    memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923
0
    fp->preserved_bytes = fp->base.end - fp->base.begin;
924
0
    return;
925
0
}
926
927
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928
0
{
929
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930
0
    off_t origin, pos;
931
932
0
    if (!fp->is_read || !fp->can_seek) {
933
        // Cowardly refuse to seek when writing or a previous seek failed.
934
0
        errno = ESPIPE;
935
0
        return -1;
936
0
    }
937
938
0
    switch (whence) {
939
0
    case SEEK_SET:
940
0
        origin = 0;
941
0
        break;
942
0
    case SEEK_CUR:
943
0
        errno = ENOSYS;
944
0
        return -1;
945
0
    case SEEK_END:
946
0
        if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947
0
        origin = fp->file_size;
948
0
        break;
949
0
    default:
950
0
        errno = EINVAL;
951
0
        return -1;
952
0
    }
953
954
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955
0
    if ((offset < 0)? origin + offset < 0
956
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957
0
        errno = EINVAL;
958
0
        return -1;
959
0
    }
960
961
0
    pos = origin + offset;
962
963
0
    if (fp->tried_seek) {
964
        /* Seeking has worked at least once, so now we can delay doing
965
           the actual work until the next read.  This avoids lots of pointless
966
           http or ftp reconnections if the caller does lots of seeks
967
           without any intervening reads. */
968
0
        if (fp->delayed_seek < 0) {
969
0
            fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970
            // Stash the current hFILE buffer content in case it's useful later
971
0
            preserve_buffer_content(fp);
972
0
        }
973
0
        fp->delayed_seek = pos;
974
0
        return pos;
975
0
    }
976
977
0
    if (restart_from_position(fp, pos) < 0) {
978
        /* This value for errno may not be entirely true, but the caller may be
979
           able to carry on with the existing handle. */
980
0
        errno = ESPIPE;
981
0
        return -1;
982
0
    }
983
984
0
    fp->tried_seek = 1;
985
0
    return pos;
986
0
}
987
988
0
static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989
0
    hFILE_libcurl temp_fp;
990
0
    CURLcode err;
991
0
    CURLMcode errm;
992
0
    int update_headers = 0;
993
0
    int save_errno = 0;
994
995
    // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996
    // limited reads (e.g. about a BAM block!) so seeking can reuse the
997
    // existing connection more often.
998
999
    // Get new headers from the callback (if defined).  This changes the
1000
    // headers in fp before it gets duplicated, but they should be have been
1001
    // sent by now.
1002
1003
0
    if (fp->headers.callback) {
1004
0
        if (add_callback_headers(fp) != 0)
1005
0
            return -1;
1006
0
        update_headers = 1;
1007
0
    }
1008
0
    if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009
0
        if (add_auth_header(fp) != 0)
1010
0
            return -1;
1011
0
        update_headers = 1;
1012
0
    }
1013
0
    if (update_headers) {
1014
0
        struct curl_slist *list = get_header_list(fp);
1015
0
        if (list) {
1016
0
            err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017
0
            if (err != CURLE_OK) {
1018
0
                errno = easy_errno(fp->easy,err);
1019
0
                return -1;
1020
0
            }
1021
0
        }
1022
0
    }
1023
1024
    /*
1025
      Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026
      a new request to the server, reading from the location that we want
1027
      to seek to.  If the new request works and returns the correct data,
1028
      the original easy handle in *fp is closed and replaced with the new
1029
      one.  If not, we close the new handle and leave *fp unchanged.
1030
     */
1031
1032
0
    memcpy(&temp_fp, fp, sizeof(temp_fp));
1033
0
    temp_fp.buffer.len = 0;
1034
0
    temp_fp.buffer.ptr.rd = NULL;
1035
0
    temp_fp.easy = curl_easy_duphandle(fp->easy);
1036
0
    if (!temp_fp.easy)
1037
0
        goto early_error;
1038
1039
0
    err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042
0
    if (err != CURLE_OK) {
1043
0
        save_errno = easy_errno(temp_fp.easy, err);
1044
0
        goto error;
1045
0
    }
1046
1047
0
    temp_fp.buffer.len = 0;  // Ensures we only read the response headers
1048
0
    temp_fp.paused = temp_fp.finished = 0;
1049
1050
    // fp->multi and temp_fp.multi are the same.
1051
0
    errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052
0
    if (errm != CURLM_OK) {
1053
0
        save_errno = multi_errno(errm);
1054
0
        goto error;
1055
0
    }
1056
0
    temp_fp.nrunning = ++fp->nrunning;
1057
1058
0
    while (! temp_fp.paused && ! temp_fp.finished)
1059
0
        if (wait_perform(&temp_fp) < 0) {
1060
0
            save_errno = errno;
1061
0
            goto error_remove;
1062
0
        }
1063
1064
0
    if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065
0
        save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066
0
        goto error_remove;
1067
0
    }
1068
1069
    // We've got a good response, close the original connection and
1070
    // replace it with the new one.
1071
1072
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073
0
    if (errm != CURLM_OK) {
1074
        // Clean up as much as possible
1075
0
        curl_easy_reset(temp_fp.easy);
1076
0
        if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077
0
            fp->nrunning--;
1078
0
            curl_easy_cleanup(temp_fp.easy);
1079
0
        }
1080
0
        save_errno = multi_errno(errm);
1081
0
        goto early_error;
1082
0
    }
1083
0
    fp->nrunning--;
1084
1085
0
    curl_easy_cleanup(fp->easy);
1086
0
    fp->easy = temp_fp.easy;
1087
0
    err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089
0
    if (err != CURLE_OK) {
1090
0
        save_errno = easy_errno(fp->easy, err);
1091
0
        curl_easy_reset(fp->easy);
1092
0
        errno = save_errno;
1093
0
        return -1;
1094
0
    }
1095
0
    fp->buffer.len = 0;
1096
0
    fp->paused = temp_fp.paused;
1097
0
    fp->finished = temp_fp.finished;
1098
0
    fp->perform_again = temp_fp.perform_again;
1099
0
    fp->final_result = temp_fp.final_result;
1100
1101
0
    return 0;
1102
1103
0
 error_remove:
1104
0
    curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105
0
    errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106
0
    if (errm != CURLM_OK) {
1107
0
        errno = multi_errno(errm);
1108
0
        return -1;
1109
0
    }
1110
0
    fp->nrunning--;
1111
0
 error:
1112
0
    curl_easy_cleanup(temp_fp.easy);
1113
0
 early_error:
1114
0
    fp->can_seek = 0;  // Don't try to seek again
1115
0
    if (save_errno)
1116
0
        errno = save_errno;
1117
0
    return -1;
1118
0
}
1119
1120
static int libcurl_close(hFILE *fpv)
1121
2
{
1122
2
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123
2
    CURLcode err;
1124
2
    CURLMcode errm;
1125
2
    int save_errno = 0;
1126
1127
    // Before closing the file, unpause it and perform on it so that uploads
1128
    // have the opportunity to signal EOF to the server -- see send_callback().
1129
1130
2
    fp->buffer.len = 0;
1131
2
    fp->closing = 1;
1132
2
    fp->paused = 0;
1133
2
    if (!fp->finished) {
1134
2
        err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135
2
        if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136
2
    }
1137
1138
2
    while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139
0
        if (wait_perform(fp) < 0) save_errno = errno;
1140
1141
2
    if (fp->finished && fp->final_result != CURLE_OK)
1142
0
        save_errno = easy_errno(fp->easy, fp->final_result);
1143
1144
2
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145
2
    if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146
2
    fp->nrunning--;
1147
1148
2
    curl_easy_cleanup(fp->easy);
1149
2
    curl_multi_cleanup(fp->multi);
1150
1151
2
    if (fp->headers.callback) // Tell callback to free any data it needs to
1152
2
        fp->headers.callback(fp->headers.callback_data, NULL);
1153
2
    free_headers(&fp->headers.fixed, 1);
1154
2
    free_headers(&fp->headers.extra, 1);
1155
1156
2
    free(fp->preserved);
1157
1158
2
    if (save_errno) { errno = save_errno; return -1; }
1159
2
    else return 0;
1160
2
}
1161
1162
static const struct hFILE_backend libcurl_backend =
1163
{
1164
    libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165
};
1166
1167
static hFILE *
1168
libcurl_open(const char *url, const char *modes, http_headers *headers)
1169
524
{
1170
524
    hFILE_libcurl *fp;
1171
524
    struct curl_slist *list;
1172
524
    char mode;
1173
524
    const char *s;
1174
524
    CURLcode err;
1175
524
    CURLMcode errm;
1176
524
    int save, is_recursive;
1177
524
    kstring_t in_header = {0, 0, NULL};
1178
524
    long response;
1179
1180
524
    is_recursive = strchr(modes, 'R') != NULL;
1181
1182
524
    if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183
524
        mode = *s;
1184
524
        if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185
524
    }
1186
0
    else mode = '\0';
1187
1188
524
    if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189
1190
524
    fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191
524
    if (fp == NULL) goto early_error;
1192
1193
524
    if (headers) {
1194
465
        fp->headers = *headers;
1195
465
    } else {
1196
59
        memset(&fp->headers, 0, sizeof(fp->headers));
1197
59
        fp->headers.fail_on_error = 1;
1198
59
    }
1199
1200
524
    fp->file_size = -1;
1201
524
    fp->buffer.ptr.rd = NULL;
1202
524
    fp->buffer.len = 0;
1203
524
    fp->final_result = (CURLcode) -1;
1204
524
    fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205
524
    fp->can_seek = 1;
1206
524
    fp->tried_seek = 0;
1207
524
    fp->delayed_seek = fp->last_offset = -1;
1208
524
    fp->preserved = NULL;
1209
524
    fp->preserved_bytes = fp->preserved_size = 0;
1210
524
    fp->is_recursive = is_recursive;
1211
524
    fp->nrunning = 0;
1212
524
    fp->easy = NULL;
1213
1214
524
    fp->multi = curl_multi_init();
1215
524
    if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216
1217
524
    fp->easy = curl_easy_init();
1218
524
    if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219
1220
    // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221
524
    err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222
1223
    // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224
    // by full path (but not strictly compliant with RFC1738).
1225
524
    err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD,
1226
524
                            (long) CURLFTPMETHOD_NOCWD);
1227
1228
524
    if (mode == 'r') {
1229
524
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1230
524
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1231
524
        fp->is_read = 1;
1232
524
    }
1233
0
    else {
1234
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1235
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1236
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1237
0
        if (append_header(&fp->headers.fixed,
1238
0
                          "Transfer-Encoding: chunked", 1) < 0)
1239
0
            goto error;
1240
0
        fp->is_read = 0;
1241
0
    }
1242
1243
524
    err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1244
524
    err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1245
524
    {
1246
524
        char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1247
524
        if (env_curl_ca_bundle) {
1248
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1249
0
        }
1250
524
    }
1251
524
    err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1252
524
    if (fp->headers.callback) {
1253
465
        if (add_callback_headers(fp) != 0) goto error;
1254
465
    }
1255
519
    if (get_auth_token(fp, url) < 0)
1256
0
        goto error;
1257
519
    if ((list = get_header_list(fp)) != NULL)
1258
460
        err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1259
1260
519
    if (hts_verbose <= 8 && fp->headers.fail_on_error)
1261
59
        err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1262
519
    if (hts_verbose >= 8)
1263
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1264
1265
519
    if (fp->headers.redirect) {
1266
460
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1267
460
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1268
460
    } else {
1269
59
        err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1270
59
    }
1271
1272
519
    if (err != 0) { errno = ENOSYS; goto error; }
1273
1274
519
    errm = curl_multi_add_handle(fp->multi, fp->easy);
1275
519
    if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1276
519
    fp->nrunning++;
1277
1278
1.19k
    while (! fp->paused && ! fp->finished) {
1279
677
        if (wait_perform(fp) < 0) goto error_remove;
1280
677
    }
1281
1282
519
    curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1283
519
    if (fp->headers.http_response_ptr) {
1284
460
        *fp->headers.http_response_ptr = response;
1285
460
    }
1286
1287
519
    if (fp->finished && fp->final_result != CURLE_OK) {
1288
517
        errno = easy_errno(fp->easy, fp->final_result);
1289
517
        goto error_remove;
1290
517
    }
1291
1292
2
    if (fp->headers.redirect) {
1293
2
        if (response >= 300 && response < 400) { // redirection
1294
0
            kstring_t new_url = {0, 0, NULL};
1295
1296
0
            if (fp->headers.redirect(fp->headers.redirect_data, response,
1297
0
                                     &in_header, &new_url)) {
1298
0
                errno = ENOSYS;
1299
0
                goto error;
1300
0
            }
1301
1302
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1303
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1304
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1305
0
            free(ks_release(&in_header));
1306
1307
0
            if (err != 0) { errno = ENOSYS; goto error; }
1308
0
            free(ks_release(&new_url));
1309
1310
0
            if (restart_from_position(fp, 0) < 0) {
1311
0
                goto error_remove;
1312
0
            }
1313
1314
0
            if (fp->headers.http_response_ptr) {
1315
0
                curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1316
0
                                  fp->headers.http_response_ptr);
1317
0
            }
1318
1319
0
            if (fp->finished && fp->final_result != CURLE_OK) {
1320
0
                errno = easy_errno(fp->easy, fp->final_result);
1321
0
                goto error_remove;
1322
0
            }
1323
2
        } else {
1324
            // we no longer need to look at the headers
1325
2
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1326
2
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1327
2
            free(ks_release(&in_header));
1328
1329
2
            if (err != 0) { errno = ENOSYS; goto error; }
1330
2
        }
1331
2
    }
1332
1333
2
    if (mode == 'r') {
1334
2
#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0
1335
2
        curl_off_t offset;
1336
1337
2
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
1338
2
                              &offset) == CURLE_OK && offset > 0)
1339
0
            fp->file_size = (off_t) offset;
1340
#else
1341
        double dval;
1342
1343
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1344
                              &dval) == CURLE_OK && dval >= 0.0)
1345
            fp->file_size = (off_t) (dval + 0.1);
1346
#endif
1347
2
    }
1348
2
    fp->base.backend = &libcurl_backend;
1349
2
    return &fp->base;
1350
1351
517
error_remove:
1352
517
    save = errno;
1353
517
    (void) curl_multi_remove_handle(fp->multi, fp->easy);
1354
517
    fp->nrunning--;
1355
517
    errno = save;
1356
1357
522
error:
1358
522
    if (fp->headers.redirect) free(in_header.s);
1359
522
    save = errno;
1360
522
    if (fp->easy) curl_easy_cleanup(fp->easy);
1361
522
    if (fp->multi) curl_multi_cleanup(fp->multi);
1362
522
    free_headers(&fp->headers.extra, 1);
1363
522
    hfile_destroy((hFILE *) fp);
1364
522
    errno = save;
1365
522
    return NULL;
1366
1367
0
early_error:
1368
0
    return NULL;
1369
517
}
1370
1371
static hFILE *hopen_libcurl(const char *url, const char *modes)
1372
59
{
1373
59
    return libcurl_open(url, modes, NULL);
1374
59
}
1375
1376
static int parse_va_list(http_headers *headers, va_list args)
1377
465
{
1378
465
    const char *argtype;
1379
1380
3.72k
    while ((argtype = va_arg(args, const char *)) != NULL)
1381
3.25k
        if (strcmp(argtype, "httphdr:v") == 0) {
1382
0
            const char **hdr;
1383
0
            for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1384
0
                if (append_header(&headers->fixed, *hdr, 1) < 0)
1385
0
                    return -1;
1386
0
                if (is_authorization(*hdr))
1387
0
                    headers->auth_hdr_num = -1;
1388
0
            }
1389
0
        }
1390
3.25k
        else if (strcmp(argtype, "httphdr:l") == 0) {
1391
0
            const char *hdr;
1392
0
            while ((hdr = va_arg(args, const char *)) != NULL) {
1393
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1394
0
                    return -1;
1395
0
                if (is_authorization(hdr))
1396
0
                    headers->auth_hdr_num = -1;
1397
0
            }
1398
0
        }
1399
3.25k
        else if (strcmp(argtype, "httphdr") == 0) {
1400
0
            const char *hdr = va_arg(args, const char *);
1401
0
            if (hdr) {
1402
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1403
0
                    return -1;
1404
0
                if (is_authorization(hdr))
1405
0
                    headers->auth_hdr_num = -1;
1406
0
            }
1407
0
        }
1408
3.25k
        else if (strcmp(argtype, "httphdr_callback") == 0) {
1409
465
            headers->callback = va_arg(args, const hts_httphdr_callback);
1410
465
        }
1411
2.79k
        else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1412
465
            headers->callback_data = va_arg(args, void *);
1413
465
        }
1414
2.32k
        else if (strcmp(argtype, "va_list") == 0) {
1415
465
            va_list *args2 = va_arg(args, va_list *);
1416
465
            if (args2) {
1417
0
                if (parse_va_list(headers, *args2) < 0) return -1;
1418
0
            }
1419
465
        }
1420
1.86k
        else if (strcmp(argtype, "auth_token_enabled") == 0) {
1421
0
            const char *flag = va_arg(args, const char *);
1422
0
            if (strcmp(flag, "false") == 0)
1423
0
                headers->auth_hdr_num = -3;
1424
0
        }
1425
1.86k
        else if (strcmp(argtype, "redirect_callback") == 0) {
1426
465
            headers->redirect = va_arg(args, const redirect_callback);
1427
465
        }
1428
1.39k
        else if (strcmp(argtype, "redirect_callback_data") == 0) {
1429
465
            headers->redirect_data = va_arg(args, void *);
1430
465
        }
1431
930
        else if (strcmp(argtype, "http_response_ptr") == 0) {
1432
465
            headers->http_response_ptr = va_arg(args, long *);
1433
465
        }
1434
465
        else if (strcmp(argtype, "fail_on_error") == 0) {
1435
465
            headers->fail_on_error = va_arg(args, int);
1436
465
        }
1437
0
        else { errno = EINVAL; return -1; }
1438
1439
465
    return 0;
1440
465
}
1441
1442
/*
1443
  HTTP headers to be added to the request can be passed in as extra
1444
  arguments to hopen().  The headers can be specified as follows:
1445
1446
  * Single header:
1447
    hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1448
1449
  * Multiple headers in the argument list:
1450
    hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1451
1452
  * Multiple headers in a char* array:
1453
    hopen(url, mode, "httphdr:v", hdrs, NULL);
1454
    where `hdrs` is a char **.  The list ends with a NULL pointer.
1455
1456
  * A callback function
1457
    hopen(url, mode, "httphdr_callback", func,
1458
                     "httphdr_callback_data", arg, NULL);
1459
    `func` has type
1460
         int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1461
    `arg` is passed to the callback as a void *.
1462
1463
    The function is called at file open, and when attempting to seek (which
1464
    opens a new HTTP request).  This allows, for example, access tokens
1465
    that may have gone stale to be regenerated.  The function is also
1466
    called (with `hdrs` == NULL) on file close so that the callback can
1467
    free any memory that it needs to.
1468
1469
    The callback should return 0 on success, non-zero on failure.  It should
1470
    return in *hdrs a list of strings containing the new headers (terminated
1471
    with a NULL pointer).  These will replace any headers previously supplied
1472
    by the callback.  If no changes are necessary, it can return NULL
1473
    in *hdrs, in which case the previous headers will be left unchanged.
1474
1475
    Ownership of the strings in the header list passes to hfile_libcurl,
1476
    so the callback should not attempt to use or free them itself.  The memory
1477
    containing the array belongs to the callback and will not be freed by
1478
    hfile_libcurl.
1479
1480
    Headers supplied by the callback are appended after any specified
1481
    using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1482
    is made to replace these headers (even if a key is repeated) so anything
1483
    that is expected to vary needs to come from the callback.
1484
 */
1485
1486
static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1487
465
{
1488
465
    hFILE *fp = NULL;
1489
465
    http_headers headers = { .fail_on_error = 1 };
1490
1491
465
    if (parse_va_list(&headers, args) == 0) {
1492
465
        fp = libcurl_open(url, modes, &headers);
1493
465
    }
1494
1495
465
    if (!fp) {
1496
463
        free_headers(&headers.fixed, 1);
1497
463
    }
1498
465
    return fp;
1499
465
}
1500
1501
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1502
1
{
1503
1
    static const struct hFILE_scheme_handler handler =
1504
1
        { hopen_libcurl, hfile_always_remote, "libcurl",
1505
1
          2000 + 50,
1506
1
          vhopen_libcurl };
1507
1508
#ifdef ENABLE_PLUGINS
1509
    // Embed version string for examination via strings(1) or what(1)
1510
    static const char id[] =
1511
        "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1512
    const char *version = strchr(id, '\t')+1;
1513
#else
1514
1
    const char *version = hts_version();
1515
1
#endif
1516
1
    const curl_version_info_data *info;
1517
1
    const char * const *protocol;
1518
1
    const char *auth;
1519
1
    CURLcode err;
1520
1
    CURLSHcode errsh;
1521
1522
1
    err = curl_global_init(CURL_GLOBAL_ALL);
1523
1
    if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1524
1525
1
    curl.share = curl_share_init();
1526
1
    if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1527
1
    errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1528
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1529
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1530
1
    if (errsh != 0) {
1531
0
        curl_share_cleanup(curl.share);
1532
0
        curl_global_cleanup();
1533
0
        errno = EIO;
1534
0
        return -1;
1535
0
    }
1536
1537
1
    if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1538
0
        curl.auth_path = strdup(auth);
1539
0
        curl.auth_map = kh_init(auth_map);
1540
0
        if (!curl.auth_path || !curl.auth_map) {
1541
0
            int save_errno = errno;
1542
0
            free(curl.auth_path);
1543
0
            kh_destroy(auth_map, curl.auth_map);
1544
0
            curl_share_cleanup(curl.share);
1545
0
            curl_global_cleanup();
1546
0
            errno = save_errno;
1547
0
            return -1;
1548
0
        }
1549
0
    }
1550
1
    if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1551
1
        && strcmp(auth, "I understand the risks") == 0) {
1552
0
        curl.allow_unencrypted_auth_header = 1;
1553
0
    }
1554
1555
1
    info = curl_version_info(CURLVERSION_NOW);
1556
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1557
1558
1
    self->name = "libcurl";
1559
1
    self->destroy = libcurl_exit;
1560
1561
24
    for (protocol = info->protocols; *protocol; protocol++)
1562
23
        hfile_add_scheme_handler(*protocol, &handler);
1563
1
    return 0;
1564
1
}