Coverage Report

Created: 2025-11-15 06:33

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile_libcurl.c
Line
Count
Source
1
/*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <strings.h>
32
#include <errno.h>
33
#include <pthread.h>
34
#ifndef _WIN32
35
# include <sys/select.h>
36
#endif
37
#include <assert.h>
38
39
#include "hfile_internal.h"
40
#ifdef ENABLE_PLUGINS
41
#include "version.h"
42
#endif
43
#include "htslib/hts.h"  // for hts_version() and hts_verbose
44
#include "htslib/kstring.h"
45
#include "htslib/khash.h"
46
47
#include <curl/curl.h>
48
49
// Number of seconds to take off auth_token expiry, to allow for clock skew
50
// and slow servers
51
0
#define AUTH_REFRESH_EARLY_SECS 60
52
53
// Minimum number of bytes to skip when seeking forward.  Seeks less than
54
// this will just read the data and throw it away.  The optimal value
55
// depends on how long it takes to make a new connection compared
56
// to how fast the data arrives.
57
0
#define MIN_SEEK_FORWARD 1000000
58
59
typedef struct {
60
    char *path;
61
    char *token;
62
    time_t expiry;
63
    int failed;
64
    pthread_mutex_t lock;
65
} auth_token;
66
67
// For the authorization header cache
68
KHASH_MAP_INIT_STR(auth_map, auth_token *)
69
70
// Curl-compatible header linked list
71
typedef struct {
72
    struct curl_slist *list;
73
    unsigned int num;
74
    unsigned int size;
75
} hdrlist;
76
77
typedef struct {
78
    hdrlist fixed;                   // List of headers supplied at hopen()
79
    hdrlist extra;                   // List of headers from callback
80
    hts_httphdr_callback callback;   // Callback to get more headers
81
    void *callback_data;             // Data to pass to httphdr callback
82
    auth_token *auth;                // Authentication token
83
    int auth_hdr_num;                // Location of auth_token in hdrlist extra
84
                                     // If -1, Authorization header is in fixed
85
                                     //    -2, it came from the callback
86
                                     //    -3, "auth_token_enabled", "false"
87
                                     //        passed to hopen()
88
    redirect_callback redirect;      // Callback to handle 3xx redirects
89
    void *redirect_data;             // Data to pass to redirect_callback
90
    long *http_response_ptr;         // Location to store http response code.
91
    int fail_on_error;               // Open fails on >400 response code
92
                                     //    (default true)
93
} http_headers;
94
95
typedef struct {
96
    hFILE base;
97
    CURL *easy;
98
    CURLM *multi;
99
    off_t file_size;
100
    struct {
101
        union { char *rd; const char *wr; } ptr;
102
        size_t len;
103
    } buffer;
104
    CURLcode final_result;  // easy result code for finished transfers
105
    // Flags for communicating with libcurl callbacks:
106
    unsigned paused : 1;    // callback tells us that it has paused transfer
107
    unsigned closing : 1;   // informs callback that hclose() has been invoked
108
    unsigned finished : 1;  // wait_perform() tells us transfer is complete
109
    unsigned perform_again : 1;
110
    unsigned is_read : 1;   // Opened in read mode
111
    unsigned can_seek : 1;  // Can (attempt to) seek on this handle
112
    unsigned is_recursive:1; // Opened by hfile_libcurl itself
113
    unsigned tried_seek : 1; // At least one seek has been attempted
114
    int nrunning;
115
    http_headers headers;
116
117
    off_t delayed_seek;      // Location to seek to before reading
118
    off_t last_offset;       // Location we're seeking from
119
    char *preserved;         // Preserved buffer content on seek
120
    size_t preserved_bytes;  // Number of preserved bytes
121
    size_t preserved_size;   // Size of preserved buffer
122
} hFILE_libcurl;
123
124
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125
static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126
127
static int http_status_errno(int status)
128
13
{
129
13
    if (status >= 500)
130
0
        switch (status) {
131
0
        case 501: return ENOSYS;
132
0
        case 503: return EBUSY;
133
0
        case 504: return ETIMEDOUT;
134
0
        default:  return EIO;
135
0
        }
136
13
    else if (status >= 400)
137
13
        switch (status) {
138
0
        case 401: return EPERM;
139
0
        case 403: return EACCES;
140
0
        case 404: return ENOENT;
141
0
        case 405: return EROFS;
142
0
        case 407: return EPERM;
143
0
        case 408: return ETIMEDOUT;
144
0
        case 410: return ENOENT;
145
13
        default:  return EINVAL;
146
13
        }
147
0
    else return 0;
148
13
}
149
150
static int easy_errno(CURL *easy, CURLcode err)
151
360
{
152
360
    long lval;
153
154
360
    switch (err) {
155
0
    case CURLE_OK:
156
0
        return 0;
157
158
0
    case CURLE_UNSUPPORTED_PROTOCOL:
159
347
    case CURLE_URL_MALFORMAT:
160
347
        return EINVAL;
161
162
0
#if LIBCURL_VERSION_NUM >= 0x071505
163
0
    case CURLE_NOT_BUILT_IN:
164
0
        return ENOSYS;
165
0
#endif
166
167
0
    case CURLE_COULDNT_RESOLVE_PROXY:
168
0
    case CURLE_COULDNT_RESOLVE_HOST:
169
0
    case CURLE_FTP_CANT_GET_HOST:
170
0
        return EDESTADDRREQ; // Lookup failure
171
172
0
    case CURLE_COULDNT_CONNECT:
173
0
    case CURLE_SEND_ERROR:
174
0
    case CURLE_RECV_ERROR:
175
0
        if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176
0
            return lval;
177
0
        else
178
0
            return ECONNABORTED;
179
180
0
    case CURLE_REMOTE_ACCESS_DENIED:
181
0
    case CURLE_LOGIN_DENIED:
182
0
    case CURLE_TFTP_PERM:
183
0
        return EACCES;
184
185
0
    case CURLE_PARTIAL_FILE:
186
0
        return EPIPE;
187
188
13
    case CURLE_HTTP_RETURNED_ERROR:
189
13
        if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190
13
            return http_status_errno(lval);
191
0
        else
192
0
            return EIO;
193
194
0
    case CURLE_OUT_OF_MEMORY:
195
0
        return ENOMEM;
196
197
0
    case CURLE_OPERATION_TIMEDOUT:
198
0
        return ETIMEDOUT;
199
200
0
    case CURLE_RANGE_ERROR:
201
0
        return ESPIPE;
202
203
0
    case CURLE_SSL_CONNECT_ERROR:
204
        // TODO return SSL error buffer messages
205
0
        return ECONNABORTED;
206
207
0
    case CURLE_FILE_COULDNT_READ_FILE:
208
0
    case CURLE_TFTP_NOTFOUND:
209
0
        return ENOENT;
210
211
0
    case CURLE_TOO_MANY_REDIRECTS:
212
0
        return ELOOP;
213
214
0
    case CURLE_FILESIZE_EXCEEDED:
215
0
        return EFBIG;
216
217
0
    case CURLE_REMOTE_DISK_FULL:
218
0
        return ENOSPC;
219
220
0
    case CURLE_REMOTE_FILE_EXISTS:
221
0
        return EEXIST;
222
223
0
    default:
224
0
        hts_log_error("Libcurl reported error %d (%s)", (int) err,
225
0
                      curl_easy_strerror(err));
226
0
        return EIO;
227
360
    }
228
360
}
229
230
static int multi_errno(CURLMcode errm)
231
0
{
232
0
    switch (errm) {
233
0
    case CURLM_CALL_MULTI_PERFORM:
234
0
    case CURLM_OK:
235
0
        return 0;
236
237
0
    case CURLM_BAD_HANDLE:
238
0
    case CURLM_BAD_EASY_HANDLE:
239
0
    case CURLM_BAD_SOCKET:
240
0
        return EBADF;
241
242
0
    case CURLM_OUT_OF_MEMORY:
243
0
        return ENOMEM;
244
245
0
    default:
246
0
        hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247
0
                      curl_multi_strerror(errm));
248
0
        return EIO;
249
0
    }
250
0
}
251
252
static struct {
253
    kstring_t useragent;
254
    CURLSH *share;
255
    char *auth_path;
256
    khash_t(auth_map) *auth_map;
257
    int allow_unencrypted_auth_header;
258
    pthread_mutex_t auth_lock;
259
    pthread_mutex_t share_lock;
260
} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261
           PTHREAD_MUTEX_INITIALIZER };
262
263
static void share_lock(CURL *handle, curl_lock_data data,
264
1.15k
                       curl_lock_access access, void *userptr) {
265
1.15k
    pthread_mutex_lock(&curl.share_lock);
266
1.15k
}
267
268
1.15k
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269
1.15k
    pthread_mutex_unlock(&curl.share_lock);
270
1.15k
}
271
272
0
static void free_auth(auth_token *tok) {
273
0
    if (!tok) return;
274
0
    if (pthread_mutex_destroy(&tok->lock)) abort();
275
0
    free(tok->path);
276
0
    free(tok->token);
277
0
    free(tok);
278
0
}
279
280
static void libcurl_exit(void)
281
1
{
282
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283
1
        curl.share = NULL;
284
285
1
    free(curl.useragent.s);
286
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287
288
1
    free(curl.auth_path);
289
1
    curl.auth_path = NULL;
290
291
1
    if (curl.auth_map) {
292
0
        khiter_t i;
293
0
        for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294
0
            if (kh_exist(curl.auth_map, i)) {
295
0
                free_auth(kh_value(curl.auth_map, i));
296
0
                kh_key(curl.auth_map, i) = NULL;
297
0
                kh_value(curl.auth_map, i) = NULL;
298
0
            }
299
0
        }
300
0
        kh_destroy(auth_map, curl.auth_map);
301
0
        curl.auth_map = NULL;
302
0
    }
303
304
1
    curl_global_cleanup();
305
1
}
306
307
795
static int append_header(hdrlist *hdrs, const char *data, int dup) {
308
795
    if (hdrs->num == hdrs->size) {
309
337
        unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310
337
        struct curl_slist *new_list = realloc(hdrs->list,
311
337
                                              new_sz * sizeof(*new_list));
312
337
        if (!new_list) return -1;
313
337
        hdrs->size = new_sz;
314
337
        hdrs->list = new_list;
315
337
        for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316
337
    }
317
    // Annoyingly, libcurl doesn't declare the char * as const...
318
795
    hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319
795
    if (!hdrs->list[hdrs->num].data) return -1;
320
795
    if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321
795
    hdrs->list[hdrs->num].next = NULL;
322
795
    hdrs->num++;
323
795
    return 0;
324
795
}
325
326
1.03k
static void free_headers(hdrlist *hdrs, int completely) {
327
1.03k
    unsigned int i;
328
1.83k
    for (i = 0; i < hdrs->num; i++) {
329
795
        free(hdrs->list[i].data);
330
795
        hdrs->list[i].data = NULL;
331
795
        hdrs->list[i].next = NULL;
332
795
    }
333
1.03k
    hdrs->num = 0;
334
1.03k
    if (completely) {
335
698
        free(hdrs->list);
336
698
        hdrs->size = 0;
337
698
        hdrs->list = NULL;
338
698
    }
339
1.03k
}
340
341
361
static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342
361
    if (fp->headers.fixed.num > 0)
343
0
        return &fp->headers.fixed.list[0];
344
361
    if (fp->headers.extra.num > 0)
345
337
        return &fp->headers.extra.list[0];
346
24
    return 0;
347
361
}
348
349
795
static inline int is_authorization(const char *hdr) {
350
795
    return (strncasecmp("authorization:", hdr, 14) == 0);
351
795
}
352
353
337
static int add_callback_headers(hFILE_libcurl *fp) {
354
337
    char **hdrs = NULL, **hdr;
355
356
337
    if (!fp->headers.callback)
357
0
        return 0;
358
359
    // Get the headers from the callback
360
337
    if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361
0
        return -1;
362
0
    }
363
364
337
    if (!hdrs) // No change
365
0
        return 0;
366
367
    // Remove any old callback headers
368
337
    if (fp->headers.fixed.num > 0) {
369
        // Unlink lists
370
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371
0
    }
372
337
    free_headers(&fp->headers.extra, 0);
373
374
337
    if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375
0
        fp->headers.auth_hdr_num = 0; // Just removed it...
376
377
    // Convert to libcurl-suitable form
378
1.13k
    for (hdr = hdrs; *hdr; hdr++) {
379
795
        if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380
0
            goto cleanup;
381
0
        }
382
795
        if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383
193
            fp->headers.auth_hdr_num = -2;
384
795
    }
385
1.13k
    for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386
387
337
    if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388
        // Relink lists
389
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390
0
            = &fp->headers.extra.list[0];
391
0
    }
392
337
    return 0;
393
394
0
 cleanup:
395
0
    while (hdr && *hdr) {
396
0
        free(*hdr);
397
0
        *hdr = NULL;
398
0
    }
399
0
    return -1;
400
337
}
401
402
/*
403
 * Read an OAUTH2-style Bearer access token (see
404
 * https://tools.ietf.org/html/rfc6750#section-4).
405
 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406
 * '?' for a JSON parse error; 'm' if it runs out of memory.
407
 */
408
0
static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409
0
    hts_json_token *t = hts_json_alloc_token();
410
0
    kstring_t str = {0, 0, NULL};
411
0
    char *token = NULL, *type = NULL, *expiry = NULL;
412
0
    int ret = 'i';
413
414
0
    if (!t) goto error;
415
416
0
    if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417
0
    while (hts_json_fnext(auth_fp, t, &str) != '}') {
418
0
        char *key;
419
0
        if (hts_json_token_type(t) != 's') {
420
0
            ret = '?';
421
0
            goto error;
422
0
        }
423
0
        key = hts_json_token_str(t);
424
0
        if (!key) goto error;
425
0
        if (strcmp(key, "access_token") == 0) {
426
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427
0
            token = ks_release(&str);
428
0
        } else if (strcmp(key, "token_type") == 0) {
429
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430
0
            type = ks_release(&str);
431
0
        } else if (strcmp(key, "expires_in") == 0) {
432
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433
0
            expiry = ks_release(&str);
434
0
        } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435
0
            ret = '?';
436
0
            goto error;
437
0
        }
438
0
    }
439
440
0
    if (!token || (type && strcmp(type, "Bearer") != 0)) {
441
0
        ret = 'i';
442
0
        goto error;
443
0
    }
444
445
0
    ret = 'm';
446
0
    str.l = 0;
447
0
    if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448
0
    if (kputs(token, &str) < 0) goto error;
449
0
    free(tok->token);
450
0
    tok->token = ks_release(&str);
451
0
    if (expiry) {
452
0
        long exp = strtol(expiry, NULL, 10);
453
0
        if (exp < 0) exp = 0;
454
0
        tok->expiry = time(NULL) + exp;
455
0
    } else {
456
0
        tok->expiry = 0;
457
0
    }
458
0
    ret = 'v';
459
460
0
 error:
461
0
    free(token);
462
0
    free(type);
463
0
    free(expiry);
464
0
    free(str.s);
465
0
    hts_json_free_token(t);
466
0
    return ret;
467
0
}
468
469
0
static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470
0
    kstring_t line = {0, 0, NULL};
471
0
    kstring_t token = {0, 0, NULL};
472
0
    const char *start, *end;
473
474
0
    if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475
0
    if (kputc('\0', &line) < 0) goto error;
476
477
0
    for (start = line.s; *start && isspace_c(*start); start++) {}
478
0
    for (end = start; *end && !isspace_c(*end); end++) {}
479
480
0
    if (end > start) {
481
0
        if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482
0
        if (kputsn(start, end - start, &token) < 0) goto error;
483
0
    }
484
485
0
    free(tok->token);
486
0
    tok->token = ks_release(&token);
487
0
    tok->expiry = 0;
488
0
    free(line.s);
489
0
    return 0;
490
491
0
 error:
492
0
    free(line.s);
493
0
    free(token.s);
494
0
    return -1;
495
0
}
496
497
0
static int renew_auth_token(auth_token *tok, int *changed) {
498
0
    hFILE *auth_fp = NULL;
499
0
    char buffer[16];
500
0
    ssize_t len;
501
502
0
    *changed = 0;
503
0
    if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504
0
        return 0; // Still valid
505
506
0
    if (tok->failed)
507
0
        return -1;
508
509
0
    *changed = 1;
510
0
    auth_fp = hopen(tok->path, "rR");
511
0
    if (!auth_fp) {
512
        // Not worried about missing files; other errors are bad.
513
0
        if (errno != ENOENT)
514
0
            goto fail;
515
516
0
        tok->expiry = 0; // Prevent retry
517
0
        free(tok->token); // Just in case it was set
518
0
        return 0;
519
0
    }
520
521
0
    len = hpeek(auth_fp, buffer, sizeof(buffer));
522
0
    if (len < 0)
523
0
        goto fail;
524
525
0
    if (memchr(buffer, '{', len) != NULL) {
526
0
        if (read_auth_json(tok, auth_fp) != 'v')
527
0
            goto fail;
528
0
    } else {
529
0
        if (read_auth_plain(tok, auth_fp) < 0)
530
0
            goto fail;
531
0
    }
532
533
0
    return hclose(auth_fp) < 0 ? -1 : 0;
534
535
0
 fail:
536
0
    tok->failed = 1;
537
0
    if (auth_fp) hclose_abruptly(auth_fp);
538
0
    return -1;
539
0
}
540
541
0
static int add_auth_header(hFILE_libcurl *fp) {
542
0
    int changed = 0;
543
544
0
    if (fp->headers.auth_hdr_num < 0)
545
0
        return 0; // Have an Authorization header from open or header callback
546
547
0
    if (!fp->headers.auth)
548
0
        return 0; // Nothing to add
549
550
0
    pthread_mutex_lock(&fp->headers.auth->lock);
551
0
    if (renew_auth_token(fp->headers.auth, &changed) < 0)
552
0
        goto unlock_fail;
553
554
0
    if (!changed && fp->headers.auth_hdr_num > 0) {
555
0
        pthread_mutex_unlock(&fp->headers.auth->lock);
556
0
        return 0;
557
0
    }
558
559
0
    if (fp->headers.auth_hdr_num > 0) {
560
        // Had a previous header, so swap in the new one
561
0
        char *header = fp->headers.auth->token;
562
0
        char *header_copy = header ? strdup(header) : NULL;
563
0
        int idx = fp->headers.auth_hdr_num - 1;
564
0
        if (header && !header_copy)
565
0
            goto unlock_fail;
566
567
0
        if (header_copy) {
568
0
            free(fp->headers.extra.list[idx].data);
569
0
            fp->headers.extra.list[idx].data = header_copy;
570
0
        } else {
571
0
            unsigned int j;
572
            // More complicated case - need to get rid of the old header
573
            // and tidy up linked lists
574
0
            free(fp->headers.extra.list[idx].data);
575
0
            for (j = idx + 1; j < fp->headers.extra.num; j++) {
576
0
                fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577
0
                fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578
0
            }
579
0
            fp->headers.extra.num--;
580
0
            if (fp->headers.extra.num > 0) {
581
0
                fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582
0
            } else if (fp->headers.fixed.num > 0) {
583
0
                fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584
0
            }
585
0
            fp->headers.auth_hdr_num = 0;
586
0
        }
587
0
    } else if (fp->headers.auth->token) {
588
        // Add new header and remember where it is
589
0
        if (append_header(&fp->headers.extra,
590
0
                          fp->headers.auth->token, 1) < 0) {
591
0
            goto unlock_fail;
592
0
        }
593
0
        fp->headers.auth_hdr_num = fp->headers.extra.num;
594
0
    }
595
596
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
597
0
    return 0;
598
599
0
 unlock_fail:
600
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
601
0
    return -1;
602
0
}
603
604
361
static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605
361
    const char *host = NULL, *p, *q;
606
361
    kstring_t name = {0, 0, NULL};
607
361
    size_t host_len = 0;
608
361
    khiter_t idx;
609
361
    auth_token *tok = NULL;
610
611
    // Nothing to do if:
612
    //   curl.auth_path has not been set
613
    //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614
    //   we already have an Authorization header
615
361
    if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616
361
        return 0;
617
618
    // Insist on having a secure connection unless the user insists harder
619
0
    if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620
0
        return 0;
621
622
0
    host = strstr(url, "://");
623
0
    if (host) {
624
0
        host += 3;
625
0
        host_len = strcspn(host, "/");
626
0
    }
627
628
0
    p = curl.auth_path;
629
0
    while ((q = strstr(p, "%h")) != NULL) {
630
0
        if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631
0
        if (kputsn_(p, q - p, &name) < 0) goto error;
632
0
        if (kputsn_(host, host_len, &name) < 0) goto error;
633
0
        p = q + 2;
634
0
    }
635
0
    if (kputs(p, &name) < 0) goto error;
636
637
0
    pthread_mutex_lock(&curl.auth_lock);
638
0
    idx = kh_get(auth_map, curl.auth_map, name.s);
639
0
    if (idx < kh_end(curl.auth_map)) {
640
0
        tok = kh_value(curl.auth_map, idx);
641
0
    } else {
642
0
        tok = calloc(1, sizeof(*tok));
643
0
        if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644
0
            free(tok);
645
0
            tok = NULL;
646
0
        }
647
0
        if (tok) {
648
0
            int ret = -1;
649
0
            tok->path = ks_release(&name);
650
0
            tok->token = NULL;
651
0
            tok->expiry = 1; // Force refresh
652
0
            idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653
0
            if (ret < 0) {
654
0
                free_auth(tok);
655
0
                tok = NULL;
656
0
            }
657
0
            kh_value(curl.auth_map, idx) = tok;
658
0
        }
659
0
    }
660
0
    pthread_mutex_unlock(&curl.auth_lock);
661
662
0
    fp->headers.auth = tok;
663
0
    free(name.s);
664
665
0
    return add_auth_header(fp);
666
667
0
 error:
668
0
    free(name.s);
669
0
    return -1;
670
0
}
671
672
static void process_messages(hFILE_libcurl *fp)
673
361
{
674
361
    CURLMsg *msg;
675
361
    int remaining;
676
677
722
    while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678
361
        switch (msg->msg) {
679
361
        case CURLMSG_DONE:
680
361
            fp->finished = 1;
681
361
            fp->final_result = msg->data.result;
682
361
            break;
683
684
0
        default:
685
0
            break;
686
361
        }
687
361
    }
688
361
}
689
690
static int wait_perform(hFILE_libcurl *fp)
691
430
{
692
430
    fd_set rd, wr, ex;
693
430
    int maxfd, nrunning;
694
430
    long timeout;
695
430
    CURLMcode errm;
696
697
430
    if (!fp->perform_again) {
698
430
        FD_ZERO(&rd);
699
430
        FD_ZERO(&wr);
700
430
        FD_ZERO(&ex);
701
430
        if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702
0
            maxfd = -1, timeout = 1000;
703
430
        else {
704
430
            if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705
0
                timeout = 1000;
706
430
            else if (timeout < 0) {
707
0
                timeout = 10000;  // as recommended by curl_multi_timeout(3)
708
0
            }
709
430
        }
710
430
        if (maxfd < 0) {
711
361
            if (timeout > 100)
712
0
                timeout = 100; // as recommended by curl_multi_fdset(3)
713
#ifdef _WIN32
714
            /* Windows ignores the first argument of select, so calling select
715
             * with maxfd=-1 does not give the expected result of sleeping for
716
             * timeout milliseconds in the conditional block below.
717
             * So sleep here and skip the next block.
718
             */
719
            Sleep(timeout);
720
            timeout = 0;
721
#endif
722
361
        }
723
724
430
        if (timeout > 0) {
725
68
            struct timeval tval;
726
68
            tval.tv_sec  = (timeout / 1000);
727
68
            tval.tv_usec = (timeout % 1000) * 1000;
728
729
68
            if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730
68
        }
731
430
    }
732
733
430
    errm = curl_multi_perform(fp->multi, &nrunning);
734
430
    fp->perform_again = 0;
735
430
    if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736
430
    else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737
738
430
    if (nrunning < fp->nrunning) process_messages(fp);
739
430
    return 0;
740
430
}
741
742
743
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744
0
{
745
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746
0
    size_t n = size * nmemb;
747
748
0
    if (n > fp->buffer.len) {
749
0
        fp->paused = 1;
750
0
        return CURL_WRITEFUNC_PAUSE;
751
0
    }
752
0
    else if (n == 0) return 0;
753
754
0
    memcpy(fp->buffer.ptr.rd, ptr, n);
755
0
    fp->buffer.ptr.rd += n;
756
0
    fp->buffer.len -= n;
757
0
    return n;
758
0
}
759
760
761
static size_t header_callback(void *contents, size_t size, size_t nmemb,
762
                              void *userp)
763
8
{
764
8
    size_t realsize = size * nmemb;
765
8
    kstring_t *resp = (kstring_t *)userp;
766
767
8
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
768
0
        return 0;
769
0
    }
770
771
8
    return realsize;
772
8
}
773
774
775
static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776
0
{
777
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778
0
    char *buffer = (char *) bufferv;
779
0
    off_t to_skip = -1;
780
0
    ssize_t got = 0;
781
0
    CURLcode err;
782
783
0
    if (fp->delayed_seek >= 0) {
784
0
        assert(fp->base.offset == fp->delayed_seek);
785
786
0
        if (fp->preserved
787
0
            && fp->last_offset > fp->delayed_seek
788
0
            && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789
            // Can use buffer contents copied when seeking started, to
790
            // avoid having to re-read data discarded by hseek().
791
            // Note fp->last_offset is the offset of the *end* of the
792
            // preserved buffer.
793
0
            size_t n = fp->last_offset - fp->delayed_seek;
794
0
            char *start = fp->preserved + (fp->preserved_bytes - n);
795
0
            size_t bytes = n <= nbytes ? n : nbytes;
796
0
            memcpy(buffer, start, bytes);
797
0
            if (bytes < n) { // Part of the preserved buffer still left
798
0
                fp->delayed_seek += bytes;
799
0
            } else {
800
0
                fp->last_offset = fp->delayed_seek = -1;
801
0
            }
802
0
            return bytes;
803
0
        }
804
805
0
        if (fp->last_offset >= 0
806
0
            && fp->delayed_seek > fp->last_offset
807
0
            && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808
            // If not seeking far, just read the data and throw it away.  This
809
            // is likely to be quicker than opening a new stream
810
0
            to_skip = fp->delayed_seek - fp->last_offset;
811
0
        } else {
812
0
            if (restart_from_position(fp, fp->delayed_seek) < 0) {
813
0
                return -1;
814
0
            }
815
0
        }
816
0
        fp->delayed_seek = -1;
817
0
        fp->last_offset = -1;
818
0
        fp->preserved_bytes = 0;
819
0
    }
820
821
0
    do {
822
0
        fp->buffer.ptr.rd = buffer;
823
0
        fp->buffer.len = nbytes;
824
0
        fp->paused = 0;
825
0
        if (!fp->finished) {
826
0
            err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827
0
            if (err != CURLE_OK) {
828
0
                errno = easy_errno(fp->easy, err);
829
0
                return -1;
830
0
            }
831
0
        }
832
833
0
        while (! fp->paused && ! fp->finished) {
834
0
            if (wait_perform(fp) < 0) return -1;
835
0
        }
836
837
0
        got = fp->buffer.ptr.rd - buffer;
838
839
0
        if (to_skip >= 0) { // Skipping over a small seek
840
0
            if (got <= to_skip) { // Need to skip more data
841
0
                to_skip -= got;
842
0
            } else {
843
0
                got -= to_skip;
844
0
                if (got > 0) {  // If enough was skipped, return the rest
845
0
                    memmove(buffer, buffer + to_skip, got);
846
0
                    to_skip = -1;
847
0
                }
848
0
            }
849
0
        }
850
0
    } while (to_skip >= 0 && ! fp->finished);
851
0
    fp->buffer.ptr.rd = NULL;
852
0
    fp->buffer.len = 0;
853
854
0
    if (fp->finished && fp->final_result != CURLE_OK) {
855
0
        errno = easy_errno(fp->easy, fp->final_result);
856
0
        return -1;
857
0
    }
858
859
0
    return got;
860
0
}
861
862
static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863
0
{
864
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865
0
    size_t n = size * nmemb;
866
867
0
    if (fp->buffer.len == 0) {
868
        // Send buffer is empty; normally pause, or signal EOF if we're closing
869
0
        if (fp->closing) return 0;
870
0
        else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871
0
    }
872
873
0
    if (n > fp->buffer.len) n = fp->buffer.len;
874
0
    memcpy(ptr, fp->buffer.ptr.wr, n);
875
0
    fp->buffer.ptr.wr += n;
876
0
    fp->buffer.len -= n;
877
0
    return n;
878
0
}
879
880
static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881
0
{
882
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883
0
    const char *buffer = (const char *) bufferv;
884
0
    CURLcode err;
885
886
0
    fp->buffer.ptr.wr = buffer;
887
0
    fp->buffer.len = nbytes;
888
0
    fp->paused = 0;
889
0
    err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890
0
    if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891
892
0
    while (! fp->paused && ! fp->finished)
893
0
        if (wait_perform(fp) < 0) return -1;
894
895
0
    nbytes = fp->buffer.ptr.wr - buffer;
896
0
    fp->buffer.ptr.wr = NULL;
897
0
    fp->buffer.len = 0;
898
899
0
    if (fp->finished && fp->final_result != CURLE_OK) {
900
0
        errno = easy_errno(fp->easy, fp->final_result);
901
0
        return -1;
902
0
    }
903
904
0
    return nbytes;
905
0
}
906
907
static void preserve_buffer_content(hFILE_libcurl *fp)
908
0
{
909
0
    if (fp->base.begin == fp->base.end) {
910
0
        fp->preserved_bytes = 0;
911
0
        return;
912
0
    }
913
0
    if (!fp->preserved
914
0
        || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915
0
        fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916
0
        if (!fp->preserved) return;
917
0
        fp->preserved_size = fp->base.limit - fp->base.buffer;
918
0
    }
919
920
0
    assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921
922
0
    memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923
0
    fp->preserved_bytes = fp->base.end - fp->base.begin;
924
0
    return;
925
0
}
926
927
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928
0
{
929
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930
0
    off_t origin, pos;
931
932
0
    if (!fp->is_read || !fp->can_seek) {
933
        // Cowardly refuse to seek when writing or a previous seek failed.
934
0
        errno = ESPIPE;
935
0
        return -1;
936
0
    }
937
938
0
    switch (whence) {
939
0
    case SEEK_SET:
940
0
        origin = 0;
941
0
        break;
942
0
    case SEEK_CUR:
943
0
        errno = ENOSYS;
944
0
        return -1;
945
0
    case SEEK_END:
946
0
        if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947
0
        origin = fp->file_size;
948
0
        break;
949
0
    default:
950
0
        errno = EINVAL;
951
0
        return -1;
952
0
    }
953
954
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955
0
    if ((offset < 0)? origin + offset < 0
956
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957
0
        errno = EINVAL;
958
0
        return -1;
959
0
    }
960
961
0
    pos = origin + offset;
962
963
0
    if (fp->tried_seek) {
964
        /* Seeking has worked at least once, so now we can delay doing
965
           the actual work until the next read.  This avoids lots of pointless
966
           http or ftp reconnections if the caller does lots of seeks
967
           without any intervening reads. */
968
0
        if (fp->delayed_seek < 0) {
969
0
            fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970
            // Stash the current hFILE buffer content in case it's useful later
971
0
            preserve_buffer_content(fp);
972
0
        }
973
0
        fp->delayed_seek = pos;
974
0
        return pos;
975
0
    }
976
977
0
    if (restart_from_position(fp, pos) < 0) {
978
        /* This value for errno may not be entirely true, but the caller may be
979
           able to carry on with the existing handle. */
980
0
        errno = ESPIPE;
981
0
        return -1;
982
0
    }
983
984
0
    fp->tried_seek = 1;
985
0
    return pos;
986
0
}
987
988
0
static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989
0
    hFILE_libcurl temp_fp;
990
0
    CURLcode err;
991
0
    CURLMcode errm;
992
0
    int update_headers = 0;
993
0
    int save_errno = 0;
994
995
    // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996
    // limited reads (e.g. about a BAM block!) so seeking can reuse the
997
    // existing connection more often.
998
999
    // Get new headers from the callback (if defined).  This changes the
1000
    // headers in fp before it gets duplicated, but they should be have been
1001
    // sent by now.
1002
1003
0
    if (fp->headers.callback) {
1004
0
        if (add_callback_headers(fp) != 0)
1005
0
            return -1;
1006
0
        update_headers = 1;
1007
0
    }
1008
0
    if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009
0
        if (add_auth_header(fp) != 0)
1010
0
            return -1;
1011
0
        update_headers = 1;
1012
0
    }
1013
0
    if (update_headers) {
1014
0
        struct curl_slist *list = get_header_list(fp);
1015
0
        if (list) {
1016
0
            err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017
0
            if (err != CURLE_OK) {
1018
0
                errno = easy_errno(fp->easy,err);
1019
0
                return -1;
1020
0
            }
1021
0
        }
1022
0
    }
1023
1024
    /*
1025
      Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026
      a new request to the server, reading from the location that we want
1027
      to seek to.  If the new request works and returns the correct data,
1028
      the original easy handle in *fp is closed and replaced with the new
1029
      one.  If not, we close the new handle and leave *fp unchanged.
1030
     */
1031
1032
0
    memcpy(&temp_fp, fp, sizeof(temp_fp));
1033
0
    temp_fp.buffer.len = 0;
1034
0
    temp_fp.buffer.ptr.rd = NULL;
1035
0
    temp_fp.easy = curl_easy_duphandle(fp->easy);
1036
0
    if (!temp_fp.easy)
1037
0
        goto early_error;
1038
1039
0
    err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042
0
    if (err != CURLE_OK) {
1043
0
        save_errno = easy_errno(temp_fp.easy, err);
1044
0
        goto error;
1045
0
    }
1046
1047
0
    temp_fp.buffer.len = 0;  // Ensures we only read the response headers
1048
0
    temp_fp.paused = temp_fp.finished = 0;
1049
1050
    // fp->multi and temp_fp.multi are the same.
1051
0
    errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052
0
    if (errm != CURLM_OK) {
1053
0
        save_errno = multi_errno(errm);
1054
0
        goto error;
1055
0
    }
1056
0
    temp_fp.nrunning = ++fp->nrunning;
1057
1058
0
    while (! temp_fp.paused && ! temp_fp.finished)
1059
0
        if (wait_perform(&temp_fp) < 0) {
1060
0
            save_errno = errno;
1061
0
            goto error_remove;
1062
0
        }
1063
1064
0
    if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065
0
        save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066
0
        goto error_remove;
1067
0
    }
1068
1069
    // We've got a good response, close the original connection and
1070
    // replace it with the new one.
1071
1072
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073
0
    if (errm != CURLM_OK) {
1074
        // Clean up as much as possible
1075
0
        curl_easy_reset(temp_fp.easy);
1076
0
        if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077
0
            fp->nrunning--;
1078
0
            curl_easy_cleanup(temp_fp.easy);
1079
0
        }
1080
0
        save_errno = multi_errno(errm);
1081
0
        goto early_error;
1082
0
    }
1083
0
    fp->nrunning--;
1084
1085
0
    curl_easy_cleanup(fp->easy);
1086
0
    fp->easy = temp_fp.easy;
1087
0
    err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089
0
    if (err != CURLE_OK) {
1090
0
        save_errno = easy_errno(fp->easy, err);
1091
0
        curl_easy_reset(fp->easy);
1092
0
        errno = save_errno;
1093
0
        return -1;
1094
0
    }
1095
0
    fp->buffer.len = 0;
1096
0
    fp->paused = temp_fp.paused;
1097
0
    fp->finished = temp_fp.finished;
1098
0
    fp->perform_again = temp_fp.perform_again;
1099
0
    fp->final_result = temp_fp.final_result;
1100
1101
0
    return 0;
1102
1103
0
 error_remove:
1104
0
    curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105
0
    errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106
0
    if (errm != CURLM_OK) {
1107
0
        errno = multi_errno(errm);
1108
0
        return -1;
1109
0
    }
1110
0
    fp->nrunning--;
1111
0
 error:
1112
0
    curl_easy_cleanup(temp_fp.easy);
1113
0
 early_error:
1114
0
    fp->can_seek = 0;  // Don't try to seek again
1115
0
    if (save_errno)
1116
0
        errno = save_errno;
1117
0
    return -1;
1118
0
}
1119
1120
static int libcurl_close(hFILE *fpv)
1121
0
{
1122
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123
0
    CURLcode err;
1124
0
    CURLMcode errm;
1125
0
    int save_errno = 0;
1126
1127
    // Before closing the file, unpause it and perform on it so that uploads
1128
    // have the opportunity to signal EOF to the server -- see send_callback().
1129
1130
0
    fp->buffer.len = 0;
1131
0
    fp->closing = 1;
1132
0
    fp->paused = 0;
1133
0
    if (!fp->finished) {
1134
0
        err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135
0
        if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136
0
    }
1137
1138
0
    while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139
0
        if (wait_perform(fp) < 0) save_errno = errno;
1140
1141
0
    if (fp->finished && fp->final_result != CURLE_OK)
1142
0
        save_errno = easy_errno(fp->easy, fp->final_result);
1143
1144
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145
0
    if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146
0
    fp->nrunning--;
1147
1148
0
    curl_easy_cleanup(fp->easy);
1149
0
    curl_multi_cleanup(fp->multi);
1150
1151
0
    if (fp->headers.callback) // Tell callback to free any data it needs to
1152
0
        fp->headers.callback(fp->headers.callback_data, NULL);
1153
0
    free_headers(&fp->headers.fixed, 1);
1154
0
    free_headers(&fp->headers.extra, 1);
1155
1156
0
    free(fp->preserved);
1157
1158
0
    if (save_errno) { errno = save_errno; return -1; }
1159
0
    else return 0;
1160
0
}
1161
1162
static const struct hFILE_backend libcurl_backend =
1163
{
1164
    libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165
};
1166
1167
static hFILE *
1168
libcurl_open(const char *url, const char *modes, http_headers *headers)
1169
361
{
1170
361
    hFILE_libcurl *fp;
1171
361
    struct curl_slist *list;
1172
361
    char mode;
1173
361
    const char *s;
1174
361
    CURLcode err;
1175
361
    CURLMcode errm;
1176
361
    int save, is_recursive;
1177
361
    kstring_t in_header = {0, 0, NULL};
1178
361
    long response;
1179
1180
361
    is_recursive = strchr(modes, 'R') != NULL;
1181
1182
361
    if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183
361
        mode = *s;
1184
361
        if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185
361
    }
1186
0
    else mode = '\0';
1187
1188
361
    if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189
1190
361
    fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191
361
    if (fp == NULL) goto early_error;
1192
1193
361
    if (headers) {
1194
337
        fp->headers = *headers;
1195
337
    } else {
1196
24
        memset(&fp->headers, 0, sizeof(fp->headers));
1197
24
        fp->headers.fail_on_error = 1;
1198
24
    }
1199
1200
361
    fp->file_size = -1;
1201
361
    fp->buffer.ptr.rd = NULL;
1202
361
    fp->buffer.len = 0;
1203
361
    fp->final_result = (CURLcode) -1;
1204
361
    fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205
361
    fp->can_seek = 1;
1206
361
    fp->tried_seek = 0;
1207
361
    fp->delayed_seek = fp->last_offset = -1;
1208
361
    fp->preserved = NULL;
1209
361
    fp->preserved_bytes = fp->preserved_size = 0;
1210
361
    fp->is_recursive = is_recursive;
1211
361
    fp->nrunning = 0;
1212
361
    fp->easy = NULL;
1213
1214
361
    fp->multi = curl_multi_init();
1215
361
    if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216
1217
361
    fp->easy = curl_easy_init();
1218
361
    if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219
1220
    // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221
361
    err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222
1223
    // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224
    // by full path (but not strictly compliant with RFC1738).
1225
361
    err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD,
1226
361
                            (long) CURLFTPMETHOD_NOCWD);
1227
1228
361
    if (mode == 'r') {
1229
361
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1230
361
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1231
361
        fp->is_read = 1;
1232
361
    }
1233
0
    else {
1234
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1235
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1236
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1237
0
        if (append_header(&fp->headers.fixed,
1238
0
                          "Transfer-Encoding: chunked", 1) < 0)
1239
0
            goto error;
1240
0
        fp->is_read = 0;
1241
0
    }
1242
1243
361
    err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1244
361
    err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1245
361
    {
1246
361
        char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1247
361
        if (env_curl_ca_bundle) {
1248
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1249
0
        }
1250
361
    }
1251
361
    err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1252
361
    if (fp->headers.callback) {
1253
337
        if (add_callback_headers(fp) != 0) goto error;
1254
337
    }
1255
361
    if (get_auth_token(fp, url) < 0)
1256
0
        goto error;
1257
361
    if ((list = get_header_list(fp)) != NULL)
1258
337
        err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1259
1260
361
    if (hts_verbose <= 8 && fp->headers.fail_on_error)
1261
24
        err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1262
361
    if (hts_verbose >= 8)
1263
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1264
1265
361
    if (fp->headers.redirect) {
1266
337
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1267
337
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1268
337
    } else {
1269
24
        err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1270
24
    }
1271
1272
361
    if (err != 0) { errno = ENOSYS; goto error; }
1273
1274
361
    errm = curl_multi_add_handle(fp->multi, fp->easy);
1275
361
    if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1276
361
    fp->nrunning++;
1277
1278
791
    while (! fp->paused && ! fp->finished) {
1279
430
        if (wait_perform(fp) < 0) goto error_remove;
1280
430
    }
1281
1282
361
    curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1283
361
    if (fp->headers.http_response_ptr) {
1284
337
        *fp->headers.http_response_ptr = response;
1285
337
    }
1286
1287
361
    if (fp->finished && fp->final_result != CURLE_OK) {
1288
360
        errno = easy_errno(fp->easy, fp->final_result);
1289
360
        goto error_remove;
1290
360
    }
1291
1292
1
    if (fp->headers.redirect) {
1293
1
        if (response >= 300 && response < 400) { // redirection
1294
1
            kstring_t new_url = {0, 0, NULL};
1295
1296
1
            if (fp->headers.redirect(fp->headers.redirect_data, response,
1297
1
                                     &in_header, &new_url)) {
1298
1
                errno = ENOSYS;
1299
1
                goto error;
1300
1
            }
1301
1302
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1303
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1304
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1305
0
            free(ks_release(&in_header));
1306
1307
0
            if (err != 0) { errno = ENOSYS; goto error; }
1308
0
            free(ks_release(&new_url));
1309
1310
0
            if (restart_from_position(fp, 0) < 0) {
1311
0
                goto error_remove;
1312
0
            }
1313
1314
0
            if (fp->headers.http_response_ptr) {
1315
0
                curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1316
0
                                  fp->headers.http_response_ptr);
1317
0
            }
1318
1319
0
            if (fp->finished && fp->final_result != CURLE_OK) {
1320
0
                errno = easy_errno(fp->easy, fp->final_result);
1321
0
                goto error_remove;
1322
0
            }
1323
0
        } else {
1324
            // we no longer need to look at the headers
1325
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1326
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1327
0
            free(ks_release(&in_header));
1328
1329
0
            if (err != 0) { errno = ENOSYS; goto error; }
1330
0
        }
1331
1
    }
1332
1333
0
    if (mode == 'r') {
1334
0
#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0
1335
0
        curl_off_t offset;
1336
1337
0
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
1338
0
                              &offset) == CURLE_OK && offset > 0)
1339
0
            fp->file_size = (off_t) offset;
1340
#else
1341
        double dval;
1342
1343
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1344
                              &dval) == CURLE_OK && dval >= 0.0)
1345
            fp->file_size = (off_t) (dval + 0.1);
1346
#endif
1347
0
    }
1348
0
    fp->base.backend = &libcurl_backend;
1349
0
    return &fp->base;
1350
1351
360
error_remove:
1352
360
    save = errno;
1353
360
    (void) curl_multi_remove_handle(fp->multi, fp->easy);
1354
360
    fp->nrunning--;
1355
360
    errno = save;
1356
1357
361
error:
1358
361
    if (fp->headers.redirect) free(in_header.s);
1359
361
    save = errno;
1360
361
    if (fp->easy) curl_easy_cleanup(fp->easy);
1361
361
    if (fp->multi) curl_multi_cleanup(fp->multi);
1362
361
    free_headers(&fp->headers.extra, 1);
1363
361
    hfile_destroy((hFILE *) fp);
1364
361
    errno = save;
1365
361
    return NULL;
1366
1367
0
early_error:
1368
0
    return NULL;
1369
360
}
1370
1371
static hFILE *hopen_libcurl(const char *url, const char *modes)
1372
24
{
1373
24
    return libcurl_open(url, modes, NULL);
1374
24
}
1375
1376
static int parse_va_list(http_headers *headers, va_list args)
1377
337
{
1378
337
    const char *argtype;
1379
1380
2.69k
    while ((argtype = va_arg(args, const char *)) != NULL)
1381
2.35k
        if (strcmp(argtype, "httphdr:v") == 0) {
1382
0
            const char **hdr;
1383
0
            for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1384
0
                if (append_header(&headers->fixed, *hdr, 1) < 0)
1385
0
                    return -1;
1386
0
                if (is_authorization(*hdr))
1387
0
                    headers->auth_hdr_num = -1;
1388
0
            }
1389
0
        }
1390
2.35k
        else if (strcmp(argtype, "httphdr:l") == 0) {
1391
0
            const char *hdr;
1392
0
            while ((hdr = va_arg(args, const char *)) != NULL) {
1393
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1394
0
                    return -1;
1395
0
                if (is_authorization(hdr))
1396
0
                    headers->auth_hdr_num = -1;
1397
0
            }
1398
0
        }
1399
2.35k
        else if (strcmp(argtype, "httphdr") == 0) {
1400
0
            const char *hdr = va_arg(args, const char *);
1401
0
            if (hdr) {
1402
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1403
0
                    return -1;
1404
0
                if (is_authorization(hdr))
1405
0
                    headers->auth_hdr_num = -1;
1406
0
            }
1407
0
        }
1408
2.35k
        else if (strcmp(argtype, "httphdr_callback") == 0) {
1409
337
            headers->callback = va_arg(args, const hts_httphdr_callback);
1410
337
        }
1411
2.02k
        else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1412
337
            headers->callback_data = va_arg(args, void *);
1413
337
        }
1414
1.68k
        else if (strcmp(argtype, "va_list") == 0) {
1415
337
            va_list *args2 = va_arg(args, va_list *);
1416
337
            if (args2) {
1417
0
                if (parse_va_list(headers, *args2) < 0) return -1;
1418
0
            }
1419
337
        }
1420
1.34k
        else if (strcmp(argtype, "auth_token_enabled") == 0) {
1421
0
            const char *flag = va_arg(args, const char *);
1422
0
            if (strcmp(flag, "false") == 0)
1423
0
                headers->auth_hdr_num = -3;
1424
0
        }
1425
1.34k
        else if (strcmp(argtype, "redirect_callback") == 0) {
1426
337
            headers->redirect = va_arg(args, const redirect_callback);
1427
337
        }
1428
1.01k
        else if (strcmp(argtype, "redirect_callback_data") == 0) {
1429
337
            headers->redirect_data = va_arg(args, void *);
1430
337
        }
1431
674
        else if (strcmp(argtype, "http_response_ptr") == 0) {
1432
337
            headers->http_response_ptr = va_arg(args, long *);
1433
337
        }
1434
337
        else if (strcmp(argtype, "fail_on_error") == 0) {
1435
337
            headers->fail_on_error = va_arg(args, int);
1436
337
        }
1437
0
        else { errno = EINVAL; return -1; }
1438
1439
337
    return 0;
1440
337
}
1441
1442
/*
1443
  HTTP headers to be added to the request can be passed in as extra
1444
  arguments to hopen().  The headers can be specified as follows:
1445
1446
  * Single header:
1447
    hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1448
1449
  * Multiple headers in the argument list:
1450
    hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1451
1452
  * Multiple headers in a char* array:
1453
    hopen(url, mode, "httphdr:v", hdrs, NULL);
1454
    where `hdrs` is a char **.  The list ends with a NULL pointer.
1455
1456
  * A callback function
1457
    hopen(url, mode, "httphdr_callback", func,
1458
                     "httphdr_callback_data", arg, NULL);
1459
    `func` has type
1460
         int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1461
    `arg` is passed to the callback as a void *.
1462
1463
    The function is called at file open, and when attempting to seek (which
1464
    opens a new HTTP request).  This allows, for example, access tokens
1465
    that may have gone stale to be regenerated.  The function is also
1466
    called (with `hdrs` == NULL) on file close so that the callback can
1467
    free any memory that it needs to.
1468
1469
    The callback should return 0 on success, non-zero on failure.  It should
1470
    return in *hdrs a list of strings containing the new headers (terminated
1471
    with a NULL pointer).  These will replace any headers previously supplied
1472
    by the callback.  If no changes are necessary, it can return NULL
1473
    in *hdrs, in which case the previous headers will be left unchanged.
1474
1475
    Ownership of the strings in the header list passes to hfile_libcurl,
1476
    so the callback should not attempt to use or free them itself.  The memory
1477
    containing the array belongs to the callback and will not be freed by
1478
    hfile_libcurl.
1479
1480
    Headers supplied by the callback are appended after any specified
1481
    using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1482
    is made to replace these headers (even if a key is repeated) so anything
1483
    that is expected to vary needs to come from the callback.
1484
 */
1485
1486
static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1487
337
{
1488
337
    hFILE *fp = NULL;
1489
337
    http_headers headers = { .fail_on_error = 1 };
1490
1491
337
    if (parse_va_list(&headers, args) == 0) {
1492
337
        fp = libcurl_open(url, modes, &headers);
1493
337
    }
1494
1495
337
    if (!fp) {
1496
337
        free_headers(&headers.fixed, 1);
1497
337
    }
1498
337
    return fp;
1499
337
}
1500
1501
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1502
1
{
1503
1
    static const struct hFILE_scheme_handler handler =
1504
1
        { hopen_libcurl, hfile_always_remote, "libcurl",
1505
1
          2000 + 50,
1506
1
          vhopen_libcurl };
1507
1508
#ifdef ENABLE_PLUGINS
1509
    // Embed version string for examination via strings(1) or what(1)
1510
    static const char id[] =
1511
        "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1512
    const char *version = strchr(id, '\t')+1;
1513
#else
1514
1
    const char *version = hts_version();
1515
1
#endif
1516
1
    const curl_version_info_data *info;
1517
1
    const char * const *protocol;
1518
1
    const char *auth;
1519
1
    CURLcode err;
1520
1
    CURLSHcode errsh;
1521
1522
1
    err = curl_global_init(CURL_GLOBAL_ALL);
1523
1
    if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1524
1525
1
    curl.share = curl_share_init();
1526
1
    if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1527
1
    errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1528
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1529
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1530
1
    if (errsh != 0) {
1531
0
        curl_share_cleanup(curl.share);
1532
0
        curl_global_cleanup();
1533
0
        errno = EIO;
1534
0
        return -1;
1535
0
    }
1536
1537
1
    if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1538
0
        curl.auth_path = strdup(auth);
1539
0
        curl.auth_map = kh_init(auth_map);
1540
0
        if (!curl.auth_path || !curl.auth_map) {
1541
0
            int save_errno = errno;
1542
0
            free(curl.auth_path);
1543
0
            kh_destroy(auth_map, curl.auth_map);
1544
0
            curl_share_cleanup(curl.share);
1545
0
            curl_global_cleanup();
1546
0
            errno = save_errno;
1547
0
            return -1;
1548
0
        }
1549
0
    }
1550
1
    if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1551
0
        && strcmp(auth, "I understand the risks") == 0) {
1552
0
        curl.allow_unencrypted_auth_header = 1;
1553
0
    }
1554
1555
1
    info = curl_version_info(CURLVERSION_NOW);
1556
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1557
1558
1
    self->name = "libcurl";
1559
1
    self->destroy = libcurl_exit;
1560
1561
24
    for (protocol = info->protocols; *protocol; protocol++)
1562
23
        hfile_add_scheme_handler(*protocol, &handler);
1563
1
    return 0;
1564
1
}