Coverage Report

Created: 2026-02-11 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile_libcurl.c
Line
Count
Source
1
/*  hfile_libcurl.c -- libcurl backend for low-level file streams.
2
3
    Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdarg.h>
29
#include <stdlib.h>
30
#include <string.h>
31
#include <strings.h>
32
#include <errno.h>
33
#include <pthread.h>
34
#ifndef _WIN32
35
# include <sys/select.h>
36
#endif
37
#include <assert.h>
38
39
#include "hfile_internal.h"
40
#ifdef ENABLE_PLUGINS
41
#include "version.h"
42
#endif
43
#include "htslib/hts.h"  // for hts_version() and hts_verbose
44
#include "htslib/kstring.h"
45
#include "htslib/khash.h"
46
47
#include <curl/curl.h>
48
49
// Number of seconds to take off auth_token expiry, to allow for clock skew
50
// and slow servers
51
0
#define AUTH_REFRESH_EARLY_SECS 60
52
53
// Minimum number of bytes to skip when seeking forward.  Seeks less than
54
// this will just read the data and throw it away.  The optimal value
55
// depends on how long it takes to make a new connection compared
56
// to how fast the data arrives.
57
0
#define MIN_SEEK_FORWARD 1000000
58
59
typedef struct {
60
    char *path;
61
    char *token;
62
    time_t expiry;
63
    int failed;
64
    pthread_mutex_t lock;
65
} auth_token;
66
67
// For the authorization header cache
68
KHASH_MAP_INIT_STR(auth_map, auth_token *)
69
70
// Curl-compatible header linked list
71
typedef struct {
72
    struct curl_slist *list;
73
    unsigned int num;
74
    unsigned int size;
75
} hdrlist;
76
77
typedef struct {
78
    hdrlist fixed;                   // List of headers supplied at hopen()
79
    hdrlist extra;                   // List of headers from callback
80
    hts_httphdr_callback callback;   // Callback to get more headers
81
    void *callback_data;             // Data to pass to httphdr callback
82
    auth_token *auth;                // Authentication token
83
    int auth_hdr_num;                // Location of auth_token in hdrlist extra
84
                                     // If -1, Authorization header is in fixed
85
                                     //    -2, it came from the callback
86
                                     //    -3, "auth_token_enabled", "false"
87
                                     //        passed to hopen()
88
    redirect_callback redirect;      // Callback to handle 3xx redirects
89
    void *redirect_data;             // Data to pass to redirect_callback
90
    long *http_response_ptr;         // Location to store http response code.
91
    int fail_on_error;               // Open fails on >400 response code
92
                                     //    (default true)
93
} http_headers;
94
95
typedef struct {
96
    hFILE base;
97
    CURL *easy;
98
    CURLM *multi;
99
    off_t file_size;
100
    struct {
101
        union { char *rd; const char *wr; } ptr;
102
        size_t len;
103
    } buffer;
104
    CURLcode final_result;  // easy result code for finished transfers
105
    // Flags for communicating with libcurl callbacks:
106
    unsigned paused : 1;    // callback tells us that it has paused transfer
107
    unsigned closing : 1;   // informs callback that hclose() has been invoked
108
    unsigned finished : 1;  // wait_perform() tells us transfer is complete
109
    unsigned perform_again : 1;
110
    unsigned is_read : 1;   // Opened in read mode
111
    unsigned can_seek : 1;  // Can (attempt to) seek on this handle
112
    unsigned is_recursive:1; // Opened by hfile_libcurl itself
113
    unsigned tried_seek : 1; // At least one seek has been attempted
114
    int nrunning;
115
    http_headers headers;
116
117
    off_t delayed_seek;      // Location to seek to before reading
118
    off_t last_offset;       // Location we're seeking from
119
    char *preserved;         // Preserved buffer content on seek
120
    size_t preserved_bytes;  // Number of preserved bytes
121
    size_t preserved_size;   // Size of preserved buffer
122
} hFILE_libcurl;
123
124
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence);
125
static int restart_from_position(hFILE_libcurl *fp, off_t pos);
126
127
static int http_status_errno(int status)
128
16
{
129
16
    if (status >= 500)
130
0
        switch (status) {
131
0
        case 501: return ENOSYS;
132
0
        case 503: return EBUSY;
133
0
        case 504: return ETIMEDOUT;
134
0
        default:  return EIO;
135
0
        }
136
16
    else if (status >= 400)
137
16
        switch (status) {
138
0
        case 401: return EPERM;
139
0
        case 403: return EACCES;
140
0
        case 404: return ENOENT;
141
0
        case 405: return EROFS;
142
0
        case 407: return EPERM;
143
0
        case 408: return ETIMEDOUT;
144
0
        case 410: return ENOENT;
145
16
        default:  return EINVAL;
146
16
        }
147
0
    else return 0;
148
16
}
149
150
static int easy_errno(CURL *easy, CURLcode err)
151
33
{
152
33
    long lval;
153
154
33
    switch (err) {
155
0
    case CURLE_OK:
156
0
        return 0;
157
158
0
    case CURLE_UNSUPPORTED_PROTOCOL:
159
17
    case CURLE_URL_MALFORMAT:
160
17
        return EINVAL;
161
162
0
#if LIBCURL_VERSION_NUM >= 0x071505
163
0
    case CURLE_NOT_BUILT_IN:
164
0
        return ENOSYS;
165
0
#endif
166
167
0
    case CURLE_COULDNT_RESOLVE_PROXY:
168
0
    case CURLE_COULDNT_RESOLVE_HOST:
169
0
    case CURLE_FTP_CANT_GET_HOST:
170
0
        return EDESTADDRREQ; // Lookup failure
171
172
0
    case CURLE_COULDNT_CONNECT:
173
0
    case CURLE_SEND_ERROR:
174
0
    case CURLE_RECV_ERROR:
175
0
        if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK)
176
0
            return lval;
177
0
        else
178
0
            return ECONNABORTED;
179
180
0
    case CURLE_REMOTE_ACCESS_DENIED:
181
0
    case CURLE_LOGIN_DENIED:
182
0
    case CURLE_TFTP_PERM:
183
0
        return EACCES;
184
185
0
    case CURLE_PARTIAL_FILE:
186
0
        return EPIPE;
187
188
16
    case CURLE_HTTP_RETURNED_ERROR:
189
16
        if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK)
190
16
            return http_status_errno(lval);
191
0
        else
192
0
            return EIO;
193
194
0
    case CURLE_OUT_OF_MEMORY:
195
0
        return ENOMEM;
196
197
0
    case CURLE_OPERATION_TIMEDOUT:
198
0
        return ETIMEDOUT;
199
200
0
    case CURLE_RANGE_ERROR:
201
0
        return ESPIPE;
202
203
0
    case CURLE_SSL_CONNECT_ERROR:
204
        // TODO return SSL error buffer messages
205
0
        return ECONNABORTED;
206
207
0
    case CURLE_FILE_COULDNT_READ_FILE:
208
0
    case CURLE_TFTP_NOTFOUND:
209
0
        return ENOENT;
210
211
0
    case CURLE_TOO_MANY_REDIRECTS:
212
0
        return ELOOP;
213
214
0
    case CURLE_FILESIZE_EXCEEDED:
215
0
        return EFBIG;
216
217
0
    case CURLE_REMOTE_DISK_FULL:
218
0
        return ENOSPC;
219
220
0
    case CURLE_REMOTE_FILE_EXISTS:
221
0
        return EEXIST;
222
223
0
    default:
224
0
        hts_log_error("Libcurl reported error %d (%s)", (int) err,
225
0
                      curl_easy_strerror(err));
226
0
        return EIO;
227
33
    }
228
33
}
229
230
static int multi_errno(CURLMcode errm)
231
0
{
232
0
    switch (errm) {
233
0
    case CURLM_CALL_MULTI_PERFORM:
234
0
    case CURLM_OK:
235
0
        return 0;
236
237
0
    case CURLM_BAD_HANDLE:
238
0
    case CURLM_BAD_EASY_HANDLE:
239
0
    case CURLM_BAD_SOCKET:
240
0
        return EBADF;
241
242
0
    case CURLM_OUT_OF_MEMORY:
243
0
        return ENOMEM;
244
245
0
    default:
246
0
        hts_log_error("Libcurl reported error %d (%s)", (int) errm,
247
0
                      curl_multi_strerror(errm));
248
0
        return EIO;
249
0
    }
250
0
}
251
252
static struct {
253
    kstring_t useragent;
254
    CURLSH *share;
255
    char *auth_path;
256
    khash_t(auth_map) *auth_map;
257
    int allow_unencrypted_auth_header;
258
    pthread_mutex_t auth_lock;
259
    pthread_mutex_t share_lock;
260
} curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, PTHREAD_MUTEX_INITIALIZER,
261
           PTHREAD_MUTEX_INITIALIZER };
262
263
static void share_lock(CURL *handle, curl_lock_data data,
264
160
                       curl_lock_access access, void *userptr) {
265
160
    pthread_mutex_lock(&curl.share_lock);
266
160
}
267
268
160
static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) {
269
160
    pthread_mutex_unlock(&curl.share_lock);
270
160
}
271
272
0
static void free_auth(auth_token *tok) {
273
0
    if (!tok) return;
274
0
    if (pthread_mutex_destroy(&tok->lock)) abort();
275
0
    free(tok->path);
276
0
    free(tok->token);
277
0
    free(tok);
278
0
}
279
280
static void libcurl_exit(void)
281
1
{
282
1
    if (curl_share_cleanup(curl.share) == CURLSHE_OK)
283
1
        curl.share = NULL;
284
285
1
    free(curl.useragent.s);
286
1
    curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL;
287
288
1
    free(curl.auth_path);
289
1
    curl.auth_path = NULL;
290
291
1
    if (curl.auth_map) {
292
0
        khiter_t i;
293
0
        for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) {
294
0
            if (kh_exist(curl.auth_map, i)) {
295
0
                free_auth(kh_value(curl.auth_map, i));
296
0
                kh_key(curl.auth_map, i) = NULL;
297
0
                kh_value(curl.auth_map, i) = NULL;
298
0
            }
299
0
        }
300
0
        kh_destroy(auth_map, curl.auth_map);
301
0
        curl.auth_map = NULL;
302
0
    }
303
304
1
    curl_global_cleanup();
305
1
}
306
307
0
static int append_header(hdrlist *hdrs, const char *data, int dup) {
308
0
    if (hdrs->num == hdrs->size) {
309
0
        unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i;
310
0
        struct curl_slist *new_list = realloc(hdrs->list,
311
0
                                              new_sz * sizeof(*new_list));
312
0
        if (!new_list) return -1;
313
0
        hdrs->size = new_sz;
314
0
        hdrs->list = new_list;
315
0
        for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i];
316
0
    }
317
    // Annoyingly, libcurl doesn't declare the char * as const...
318
0
    hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data;
319
0
    if (!hdrs->list[hdrs->num].data) return -1;
320
0
    if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num];
321
0
    hdrs->list[hdrs->num].next = NULL;
322
0
    hdrs->num++;
323
0
    return 0;
324
0
}
325
326
34
static void free_headers(hdrlist *hdrs, int completely) {
327
34
    unsigned int i;
328
34
    for (i = 0; i < hdrs->num; i++) {
329
0
        free(hdrs->list[i].data);
330
0
        hdrs->list[i].data = NULL;
331
0
        hdrs->list[i].next = NULL;
332
0
    }
333
34
    hdrs->num = 0;
334
34
    if (completely) {
335
34
        free(hdrs->list);
336
34
        hdrs->size = 0;
337
34
        hdrs->list = NULL;
338
34
    }
339
34
}
340
341
34
static struct curl_slist * get_header_list(hFILE_libcurl *fp) {
342
34
    if (fp->headers.fixed.num > 0)
343
0
        return &fp->headers.fixed.list[0];
344
34
    if (fp->headers.extra.num > 0)
345
0
        return &fp->headers.extra.list[0];
346
34
    return 0;
347
34
}
348
349
0
static inline int is_authorization(const char *hdr) {
350
0
    return (strncasecmp("authorization:", hdr, 14) == 0);
351
0
}
352
353
0
static int add_callback_headers(hFILE_libcurl *fp) {
354
0
    char **hdrs = NULL, **hdr;
355
356
0
    if (!fp->headers.callback)
357
0
        return 0;
358
359
    // Get the headers from the callback
360
0
    if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) {
361
0
        return -1;
362
0
    }
363
364
0
    if (!hdrs) // No change
365
0
        return 0;
366
367
    // Remove any old callback headers
368
0
    if (fp->headers.fixed.num > 0) {
369
        // Unlink lists
370
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
371
0
    }
372
0
    free_headers(&fp->headers.extra, 0);
373
374
0
    if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2)
375
0
        fp->headers.auth_hdr_num = 0; // Just removed it...
376
377
    // Convert to libcurl-suitable form
378
0
    for (hdr = hdrs; *hdr; hdr++) {
379
0
        if (append_header(&fp->headers.extra, *hdr, 0) < 0) {
380
0
            goto cleanup;
381
0
        }
382
0
        if (is_authorization(*hdr) && !fp->headers.auth_hdr_num)
383
0
            fp->headers.auth_hdr_num = -2;
384
0
    }
385
0
    for (hdr = hdrs; *hdr; hdr++) *hdr = NULL;
386
387
0
    if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) {
388
        // Relink lists
389
0
        fp->headers.fixed.list[fp->headers.fixed.num - 1].next
390
0
            = &fp->headers.extra.list[0];
391
0
    }
392
0
    return 0;
393
394
0
 cleanup:
395
0
    while (hdr && *hdr) {
396
0
        free(*hdr);
397
0
        *hdr = NULL;
398
0
    }
399
0
    return -1;
400
0
}
401
402
/*
403
 * Read an OAUTH2-style Bearer access token (see
404
 * https://tools.ietf.org/html/rfc6750#section-4).
405
 * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort);
406
 * '?' for a JSON parse error; 'm' if it runs out of memory.
407
 */
408
0
static int read_auth_json(auth_token *tok, hFILE *auth_fp) {
409
0
    hts_json_token *t = hts_json_alloc_token();
410
0
    kstring_t str = {0, 0, NULL};
411
0
    char *token = NULL, *type = NULL, *expiry = NULL;
412
0
    int ret = 'i';
413
414
0
    if (!t) goto error;
415
416
0
    if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error;
417
0
    while (hts_json_fnext(auth_fp, t, &str) != '}') {
418
0
        char *key;
419
0
        if (hts_json_token_type(t) != 's') {
420
0
            ret = '?';
421
0
            goto error;
422
0
        }
423
0
        key = hts_json_token_str(t);
424
0
        if (!key) goto error;
425
0
        if (strcmp(key, "access_token") == 0) {
426
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
427
0
            token = ks_release(&str);
428
0
        } else if (strcmp(key, "token_type") == 0) {
429
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error;
430
0
            type = ks_release(&str);
431
0
        } else if (strcmp(key, "expires_in") == 0) {
432
0
            if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error;
433
0
            expiry = ks_release(&str);
434
0
        } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') {
435
0
            ret = '?';
436
0
            goto error;
437
0
        }
438
0
    }
439
440
0
    if (!token || (type && strcmp(type, "Bearer") != 0)) {
441
0
        ret = 'i';
442
0
        goto error;
443
0
    }
444
445
0
    ret = 'm';
446
0
    str.l = 0;
447
0
    if (kputs("Authorization: Bearer ", &str) < 0) goto error;
448
0
    if (kputs(token, &str) < 0) goto error;
449
0
    free(tok->token);
450
0
    tok->token = ks_release(&str);
451
0
    if (expiry) {
452
0
        long exp = strtol(expiry, NULL, 10);
453
0
        if (exp < 0) exp = 0;
454
0
        tok->expiry = time(NULL) + exp;
455
0
    } else {
456
0
        tok->expiry = 0;
457
0
    }
458
0
    ret = 'v';
459
460
0
 error:
461
0
    free(token);
462
0
    free(type);
463
0
    free(expiry);
464
0
    free(str.s);
465
0
    hts_json_free_token(t);
466
0
    return ret;
467
0
}
468
469
0
static int read_auth_plain(auth_token *tok, hFILE *auth_fp) {
470
0
    kstring_t line = {0, 0, NULL};
471
0
    kstring_t token = {0, 0, NULL};
472
0
    const char *start, *end;
473
474
0
    if (kgetline(&line, (char * (*)(char *, int, void *)) hgets, auth_fp) < 0) goto error;
475
0
    if (kputc('\0', &line) < 0) goto error;
476
477
0
    for (start = line.s; *start && isspace_c(*start); start++) {}
478
0
    for (end = start; *end && !isspace_c(*end); end++) {}
479
480
0
    if (end > start) {
481
0
        if (kputs("Authorization: Bearer ", &token) < 0) goto error;
482
0
        if (kputsn(start, end - start, &token) < 0) goto error;
483
0
    }
484
485
0
    free(tok->token);
486
0
    tok->token = ks_release(&token);
487
0
    tok->expiry = 0;
488
0
    free(line.s);
489
0
    return 0;
490
491
0
 error:
492
0
    free(line.s);
493
0
    free(token.s);
494
0
    return -1;
495
0
}
496
497
0
static int renew_auth_token(auth_token *tok, int *changed) {
498
0
    hFILE *auth_fp = NULL;
499
0
    char buffer[16];
500
0
    ssize_t len;
501
502
0
    *changed = 0;
503
0
    if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry)
504
0
        return 0; // Still valid
505
506
0
    if (tok->failed)
507
0
        return -1;
508
509
0
    *changed = 1;
510
0
    auth_fp = hopen(tok->path, "rR");
511
0
    if (!auth_fp) {
512
        // Not worried about missing files; other errors are bad.
513
0
        if (errno != ENOENT)
514
0
            goto fail;
515
516
0
        tok->expiry = 0; // Prevent retry
517
0
        free(tok->token); // Just in case it was set
518
0
        return 0;
519
0
    }
520
521
0
    len = hpeek(auth_fp, buffer, sizeof(buffer));
522
0
    if (len < 0)
523
0
        goto fail;
524
525
0
    if (memchr(buffer, '{', len) != NULL) {
526
0
        if (read_auth_json(tok, auth_fp) != 'v')
527
0
            goto fail;
528
0
    } else {
529
0
        if (read_auth_plain(tok, auth_fp) < 0)
530
0
            goto fail;
531
0
    }
532
533
0
    return hclose(auth_fp) < 0 ? -1 : 0;
534
535
0
 fail:
536
0
    tok->failed = 1;
537
0
    if (auth_fp) hclose_abruptly(auth_fp);
538
0
    return -1;
539
0
}
540
541
0
static int add_auth_header(hFILE_libcurl *fp) {
542
0
    int changed = 0;
543
544
0
    if (fp->headers.auth_hdr_num < 0)
545
0
        return 0; // Have an Authorization header from open or header callback
546
547
0
    if (!fp->headers.auth)
548
0
        return 0; // Nothing to add
549
550
0
    pthread_mutex_lock(&fp->headers.auth->lock);
551
0
    if (renew_auth_token(fp->headers.auth, &changed) < 0)
552
0
        goto unlock_fail;
553
554
0
    if (!changed && fp->headers.auth_hdr_num > 0) {
555
0
        pthread_mutex_unlock(&fp->headers.auth->lock);
556
0
        return 0;
557
0
    }
558
559
0
    if (fp->headers.auth_hdr_num > 0) {
560
        // Had a previous header, so swap in the new one
561
0
        char *header = fp->headers.auth->token;
562
0
        char *header_copy = header ? strdup(header) : NULL;
563
0
        int idx = fp->headers.auth_hdr_num - 1;
564
0
        if (header && !header_copy)
565
0
            goto unlock_fail;
566
567
0
        if (header_copy) {
568
0
            free(fp->headers.extra.list[idx].data);
569
0
            fp->headers.extra.list[idx].data = header_copy;
570
0
        } else {
571
0
            unsigned int j;
572
            // More complicated case - need to get rid of the old header
573
            // and tidy up linked lists
574
0
            free(fp->headers.extra.list[idx].data);
575
0
            for (j = idx + 1; j < fp->headers.extra.num; j++) {
576
0
                fp->headers.extra.list[j - 1] = fp->headers.extra.list[j];
577
0
                fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j];
578
0
            }
579
0
            fp->headers.extra.num--;
580
0
            if (fp->headers.extra.num > 0) {
581
0
                fp->headers.extra.list[fp->headers.extra.num-1].next = NULL;
582
0
            } else if (fp->headers.fixed.num > 0) {
583
0
                fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL;
584
0
            }
585
0
            fp->headers.auth_hdr_num = 0;
586
0
        }
587
0
    } else if (fp->headers.auth->token) {
588
        // Add new header and remember where it is
589
0
        if (append_header(&fp->headers.extra,
590
0
                          fp->headers.auth->token, 1) < 0) {
591
0
            goto unlock_fail;
592
0
        }
593
0
        fp->headers.auth_hdr_num = fp->headers.extra.num;
594
0
    }
595
596
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
597
0
    return 0;
598
599
0
 unlock_fail:
600
0
    pthread_mutex_unlock(&fp->headers.auth->lock);
601
0
    return -1;
602
0
}
603
604
34
static int get_auth_token(hFILE_libcurl *fp, const char *url) {
605
34
    const char *host = NULL, *p, *q;
606
34
    kstring_t name = {0, 0, NULL};
607
34
    size_t host_len = 0;
608
34
    khiter_t idx;
609
34
    auth_token *tok = NULL;
610
611
    // Nothing to do if:
612
    //   curl.auth_path has not been set
613
    //   fp was made by hfile_libcurl (e.g. auth_path is a http:// url)
614
    //   we already have an Authorization header
615
34
    if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0)
616
34
        return 0;
617
618
    // Insist on having a secure connection unless the user insists harder
619
0
    if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0)
620
0
        return 0;
621
622
0
    host = strstr(url, "://");
623
0
    if (host) {
624
0
        host += 3;
625
0
        host_len = strcspn(host, "/");
626
0
    }
627
628
0
    p = curl.auth_path;
629
0
    while ((q = strstr(p, "%h")) != NULL) {
630
0
        if (q - p > INT_MAX || host_len > INT_MAX) goto error;
631
0
        if (kputsn_(p, q - p, &name) < 0) goto error;
632
0
        if (kputsn_(host, host_len, &name) < 0) goto error;
633
0
        p = q + 2;
634
0
    }
635
0
    if (kputs(p, &name) < 0) goto error;
636
637
0
    pthread_mutex_lock(&curl.auth_lock);
638
0
    idx = kh_get(auth_map, curl.auth_map, name.s);
639
0
    if (idx < kh_end(curl.auth_map)) {
640
0
        tok = kh_value(curl.auth_map, idx);
641
0
    } else {
642
0
        tok = calloc(1, sizeof(*tok));
643
0
        if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) {
644
0
            free(tok);
645
0
            tok = NULL;
646
0
        }
647
0
        if (tok) {
648
0
            int ret = -1;
649
0
            tok->path = ks_release(&name);
650
0
            tok->token = NULL;
651
0
            tok->expiry = 1; // Force refresh
652
0
            idx = kh_put(auth_map, curl.auth_map, tok->path, &ret);
653
0
            if (ret < 0) {
654
0
                free_auth(tok);
655
0
                tok = NULL;
656
0
            }
657
0
            kh_value(curl.auth_map, idx) = tok;
658
0
        }
659
0
    }
660
0
    pthread_mutex_unlock(&curl.auth_lock);
661
662
0
    fp->headers.auth = tok;
663
0
    free(name.s);
664
665
0
    return add_auth_header(fp);
666
667
0
 error:
668
0
    free(name.s);
669
0
    return -1;
670
0
}
671
672
static void process_messages(hFILE_libcurl *fp)
673
33
{
674
33
    CURLMsg *msg;
675
33
    int remaining;
676
677
66
    while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) {
678
33
        switch (msg->msg) {
679
33
        case CURLMSG_DONE:
680
33
            fp->finished = 1;
681
33
            fp->final_result = msg->data.result;
682
33
            break;
683
684
0
        default:
685
0
            break;
686
33
        }
687
33
    }
688
33
}
689
690
static int wait_perform(hFILE_libcurl *fp)
691
89
{
692
89
    fd_set rd, wr, ex;
693
89
    int maxfd, nrunning;
694
89
    long timeout;
695
89
    CURLMcode errm;
696
697
89
    if (!fp->perform_again) {
698
89
        FD_ZERO(&rd);
699
89
        FD_ZERO(&wr);
700
89
        FD_ZERO(&ex);
701
89
        if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK)
702
0
            maxfd = -1, timeout = 1000;
703
89
        else {
704
89
            if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK)
705
0
                timeout = 1000;
706
89
            else if (timeout < 0) {
707
3
                timeout = 10000;  // as recommended by curl_multi_timeout(3)
708
3
            }
709
89
        }
710
89
        if (maxfd < 0) {
711
34
            if (timeout > 100)
712
0
                timeout = 100; // as recommended by curl_multi_fdset(3)
713
#ifdef _WIN32
714
            /* Windows ignores the first argument of select, so calling select
715
             * with maxfd=-1 does not give the expected result of sleeping for
716
             * timeout milliseconds in the conditional block below.
717
             * So sleep here and skip the next block.
718
             */
719
            Sleep(timeout);
720
            timeout = 0;
721
#endif
722
34
        }
723
724
89
        if (timeout > 0) {
725
52
            struct timeval tval;
726
52
            tval.tv_sec  = (timeout / 1000);
727
52
            tval.tv_usec = (timeout % 1000) * 1000;
728
729
52
            if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1;
730
52
        }
731
89
    }
732
733
88
    errm = curl_multi_perform(fp->multi, &nrunning);
734
88
    fp->perform_again = 0;
735
88
    if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1;
736
88
    else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; }
737
738
88
    if (nrunning < fp->nrunning) process_messages(fp);
739
88
    return 0;
740
88
}
741
742
743
static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
744
0
{
745
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
746
0
    size_t n = size * nmemb;
747
748
0
    if (n > fp->buffer.len) {
749
0
        fp->paused = 1;
750
0
        return CURL_WRITEFUNC_PAUSE;
751
0
    }
752
0
    else if (n == 0) return 0;
753
754
0
    memcpy(fp->buffer.ptr.rd, ptr, n);
755
0
    fp->buffer.ptr.rd += n;
756
0
    fp->buffer.len -= n;
757
0
    return n;
758
0
}
759
760
761
static size_t header_callback(void *contents, size_t size, size_t nmemb,
762
                              void *userp)
763
0
{
764
0
    size_t realsize = size * nmemb;
765
0
    kstring_t *resp = (kstring_t *)userp;
766
767
0
    if (kputsn((const char *)contents, realsize, resp) == EOF) {
768
0
        return 0;
769
0
    }
770
771
0
    return realsize;
772
0
}
773
774
775
static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes)
776
0
{
777
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
778
0
    char *buffer = (char *) bufferv;
779
0
    off_t to_skip = -1;
780
0
    ssize_t got = 0;
781
0
    CURLcode err;
782
783
0
    if (fp->delayed_seek >= 0) {
784
0
        assert(fp->base.offset == fp->delayed_seek);
785
786
0
        if (fp->preserved
787
0
            && fp->last_offset > fp->delayed_seek
788
0
            && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) {
789
            // Can use buffer contents copied when seeking started, to
790
            // avoid having to re-read data discarded by hseek().
791
            // Note fp->last_offset is the offset of the *end* of the
792
            // preserved buffer.
793
0
            size_t n = fp->last_offset - fp->delayed_seek;
794
0
            char *start = fp->preserved + (fp->preserved_bytes - n);
795
0
            size_t bytes = n <= nbytes ? n : nbytes;
796
0
            memcpy(buffer, start, bytes);
797
0
            if (bytes < n) { // Part of the preserved buffer still left
798
0
                fp->delayed_seek += bytes;
799
0
            } else {
800
0
                fp->last_offset = fp->delayed_seek = -1;
801
0
            }
802
0
            return bytes;
803
0
        }
804
805
0
        if (fp->last_offset >= 0
806
0
            && fp->delayed_seek > fp->last_offset
807
0
            && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) {
808
            // If not seeking far, just read the data and throw it away.  This
809
            // is likely to be quicker than opening a new stream
810
0
            to_skip = fp->delayed_seek - fp->last_offset;
811
0
        } else {
812
0
            if (restart_from_position(fp, fp->delayed_seek) < 0) {
813
0
                return -1;
814
0
            }
815
0
        }
816
0
        fp->delayed_seek = -1;
817
0
        fp->last_offset = -1;
818
0
        fp->preserved_bytes = 0;
819
0
    }
820
821
0
    do {
822
0
        fp->buffer.ptr.rd = buffer;
823
0
        fp->buffer.len = nbytes;
824
0
        fp->paused = 0;
825
0
        if (!fp->finished) {
826
0
            err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
827
0
            if (err != CURLE_OK) {
828
0
                errno = easy_errno(fp->easy, err);
829
0
                return -1;
830
0
            }
831
0
        }
832
833
0
        while (! fp->paused && ! fp->finished) {
834
0
            if (wait_perform(fp) < 0) return -1;
835
0
        }
836
837
0
        got = fp->buffer.ptr.rd - buffer;
838
839
0
        if (to_skip >= 0) { // Skipping over a small seek
840
0
            if (got <= to_skip) { // Need to skip more data
841
0
                to_skip -= got;
842
0
            } else {
843
0
                got -= to_skip;
844
0
                if (got > 0) {  // If enough was skipped, return the rest
845
0
                    memmove(buffer, buffer + to_skip, got);
846
0
                    to_skip = -1;
847
0
                }
848
0
            }
849
0
        }
850
0
    } while (to_skip >= 0 && ! fp->finished);
851
0
    fp->buffer.ptr.rd = NULL;
852
0
    fp->buffer.len = 0;
853
854
0
    if (fp->finished && fp->final_result != CURLE_OK) {
855
0
        errno = easy_errno(fp->easy, fp->final_result);
856
0
        return -1;
857
0
    }
858
859
0
    return got;
860
0
}
861
862
static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv)
863
0
{
864
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
865
0
    size_t n = size * nmemb;
866
867
0
    if (fp->buffer.len == 0) {
868
        // Send buffer is empty; normally pause, or signal EOF if we're closing
869
0
        if (fp->closing) return 0;
870
0
        else { fp->paused = 1; return CURL_READFUNC_PAUSE; }
871
0
    }
872
873
0
    if (n > fp->buffer.len) n = fp->buffer.len;
874
0
    memcpy(ptr, fp->buffer.ptr.wr, n);
875
0
    fp->buffer.ptr.wr += n;
876
0
    fp->buffer.len -= n;
877
0
    return n;
878
0
}
879
880
static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes)
881
0
{
882
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
883
0
    const char *buffer = (const char *) bufferv;
884
0
    CURLcode err;
885
886
0
    fp->buffer.ptr.wr = buffer;
887
0
    fp->buffer.len = nbytes;
888
0
    fp->paused = 0;
889
0
    err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
890
0
    if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; }
891
892
0
    while (! fp->paused && ! fp->finished)
893
0
        if (wait_perform(fp) < 0) return -1;
894
895
0
    nbytes = fp->buffer.ptr.wr - buffer;
896
0
    fp->buffer.ptr.wr = NULL;
897
0
    fp->buffer.len = 0;
898
899
0
    if (fp->finished && fp->final_result != CURLE_OK) {
900
0
        errno = easy_errno(fp->easy, fp->final_result);
901
0
        return -1;
902
0
    }
903
904
0
    return nbytes;
905
0
}
906
907
static void preserve_buffer_content(hFILE_libcurl *fp)
908
0
{
909
0
    if (fp->base.begin == fp->base.end) {
910
0
        fp->preserved_bytes = 0;
911
0
        return;
912
0
    }
913
0
    if (!fp->preserved
914
0
        || fp->preserved_size < fp->base.limit - fp->base.buffer) {
915
0
        fp->preserved = malloc(fp->base.limit - fp->base.buffer);
916
0
        if (!fp->preserved) return;
917
0
        fp->preserved_size = fp->base.limit - fp->base.buffer;
918
0
    }
919
920
0
    assert(fp->base.end - fp->base.begin <= fp->preserved_size);
921
922
0
    memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin);
923
0
    fp->preserved_bytes = fp->base.end - fp->base.begin;
924
0
    return;
925
0
}
926
927
static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence)
928
0
{
929
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
930
0
    off_t origin, pos;
931
932
0
    if (!fp->is_read || !fp->can_seek) {
933
        // Cowardly refuse to seek when writing or a previous seek failed.
934
0
        errno = ESPIPE;
935
0
        return -1;
936
0
    }
937
938
0
    switch (whence) {
939
0
    case SEEK_SET:
940
0
        origin = 0;
941
0
        break;
942
0
    case SEEK_CUR:
943
0
        errno = ENOSYS;
944
0
        return -1;
945
0
    case SEEK_END:
946
0
        if (fp->file_size < 0) { errno = ESPIPE; return -1; }
947
0
        origin = fp->file_size;
948
0
        break;
949
0
    default:
950
0
        errno = EINVAL;
951
0
        return -1;
952
0
    }
953
954
    // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow
955
0
    if ((offset < 0)? origin + offset < 0
956
0
                : (fp->file_size >= 0 && offset > fp->file_size - origin)) {
957
0
        errno = EINVAL;
958
0
        return -1;
959
0
    }
960
961
0
    pos = origin + offset;
962
963
0
    if (fp->tried_seek) {
964
        /* Seeking has worked at least once, so now we can delay doing
965
           the actual work until the next read.  This avoids lots of pointless
966
           http or ftp reconnections if the caller does lots of seeks
967
           without any intervening reads. */
968
0
        if (fp->delayed_seek < 0) {
969
0
            fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer);
970
            // Stash the current hFILE buffer content in case it's useful later
971
0
            preserve_buffer_content(fp);
972
0
        }
973
0
        fp->delayed_seek = pos;
974
0
        return pos;
975
0
    }
976
977
0
    if (restart_from_position(fp, pos) < 0) {
978
        /* This value for errno may not be entirely true, but the caller may be
979
           able to carry on with the existing handle. */
980
0
        errno = ESPIPE;
981
0
        return -1;
982
0
    }
983
984
0
    fp->tried_seek = 1;
985
0
    return pos;
986
0
}
987
988
0
static int restart_from_position(hFILE_libcurl *fp, off_t pos) {
989
0
    hFILE_libcurl temp_fp;
990
0
    CURLcode err;
991
0
    CURLMcode errm;
992
0
    int update_headers = 0;
993
0
    int save_errno = 0;
994
995
    // TODO If we seem to be doing random access, use CURLOPT_RANGE to do
996
    // limited reads (e.g. about a BAM block!) so seeking can reuse the
997
    // existing connection more often.
998
999
    // Get new headers from the callback (if defined).  This changes the
1000
    // headers in fp before it gets duplicated, but they should be have been
1001
    // sent by now.
1002
1003
0
    if (fp->headers.callback) {
1004
0
        if (add_callback_headers(fp) != 0)
1005
0
            return -1;
1006
0
        update_headers = 1;
1007
0
    }
1008
0
    if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) {
1009
0
        if (add_auth_header(fp) != 0)
1010
0
            return -1;
1011
0
        update_headers = 1;
1012
0
    }
1013
0
    if (update_headers) {
1014
0
        struct curl_slist *list = get_header_list(fp);
1015
0
        if (list) {
1016
0
            err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1017
0
            if (err != CURLE_OK) {
1018
0
                errno = easy_errno(fp->easy,err);
1019
0
                return -1;
1020
0
            }
1021
0
        }
1022
0
    }
1023
1024
    /*
1025
      Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open
1026
      a new request to the server, reading from the location that we want
1027
      to seek to.  If the new request works and returns the correct data,
1028
      the original easy handle in *fp is closed and replaced with the new
1029
      one.  If not, we close the new handle and leave *fp unchanged.
1030
     */
1031
1032
0
    memcpy(&temp_fp, fp, sizeof(temp_fp));
1033
0
    temp_fp.buffer.len = 0;
1034
0
    temp_fp.buffer.ptr.rd = NULL;
1035
0
    temp_fp.easy = curl_easy_duphandle(fp->easy);
1036
0
    if (!temp_fp.easy)
1037
0
        goto early_error;
1038
1039
0
    err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos);
1040
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp);
1041
0
    err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp);
1042
0
    if (err != CURLE_OK) {
1043
0
        save_errno = easy_errno(temp_fp.easy, err);
1044
0
        goto error;
1045
0
    }
1046
1047
0
    temp_fp.buffer.len = 0;  // Ensures we only read the response headers
1048
0
    temp_fp.paused = temp_fp.finished = 0;
1049
1050
    // fp->multi and temp_fp.multi are the same.
1051
0
    errm = curl_multi_add_handle(fp->multi, temp_fp.easy);
1052
0
    if (errm != CURLM_OK) {
1053
0
        save_errno = multi_errno(errm);
1054
0
        goto error;
1055
0
    }
1056
0
    temp_fp.nrunning = ++fp->nrunning;
1057
1058
0
    while (! temp_fp.paused && ! temp_fp.finished)
1059
0
        if (wait_perform(&temp_fp) < 0) {
1060
0
            save_errno = errno;
1061
0
            goto error_remove;
1062
0
        }
1063
1064
0
    if (temp_fp.finished && temp_fp.final_result != CURLE_OK) {
1065
0
        save_errno = easy_errno(temp_fp.easy, temp_fp.final_result);
1066
0
        goto error_remove;
1067
0
    }
1068
1069
    // We've got a good response, close the original connection and
1070
    // replace it with the new one.
1071
1072
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1073
0
    if (errm != CURLM_OK) {
1074
        // Clean up as much as possible
1075
0
        curl_easy_reset(temp_fp.easy);
1076
0
        if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) {
1077
0
            fp->nrunning--;
1078
0
            curl_easy_cleanup(temp_fp.easy);
1079
0
        }
1080
0
        save_errno = multi_errno(errm);
1081
0
        goto early_error;
1082
0
    }
1083
0
    fp->nrunning--;
1084
1085
0
    curl_easy_cleanup(fp->easy);
1086
0
    fp->easy = temp_fp.easy;
1087
0
    err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1088
0
    err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1089
0
    if (err != CURLE_OK) {
1090
0
        save_errno = easy_errno(fp->easy, err);
1091
0
        curl_easy_reset(fp->easy);
1092
0
        errno = save_errno;
1093
0
        return -1;
1094
0
    }
1095
0
    fp->buffer.len = 0;
1096
0
    fp->paused = temp_fp.paused;
1097
0
    fp->finished = temp_fp.finished;
1098
0
    fp->perform_again = temp_fp.perform_again;
1099
0
    fp->final_result = temp_fp.final_result;
1100
1101
0
    return 0;
1102
1103
0
 error_remove:
1104
0
    curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp
1105
0
    errm = curl_multi_remove_handle(fp->multi, temp_fp.easy);
1106
0
    if (errm != CURLM_OK) {
1107
0
        errno = multi_errno(errm);
1108
0
        return -1;
1109
0
    }
1110
0
    fp->nrunning--;
1111
0
 error:
1112
0
    curl_easy_cleanup(temp_fp.easy);
1113
0
 early_error:
1114
0
    fp->can_seek = 0;  // Don't try to seek again
1115
0
    if (save_errno)
1116
0
        errno = save_errno;
1117
0
    return -1;
1118
0
}
1119
1120
static int libcurl_close(hFILE *fpv)
1121
0
{
1122
0
    hFILE_libcurl *fp = (hFILE_libcurl *) fpv;
1123
0
    CURLcode err;
1124
0
    CURLMcode errm;
1125
0
    int save_errno = 0;
1126
1127
    // Before closing the file, unpause it and perform on it so that uploads
1128
    // have the opportunity to signal EOF to the server -- see send_callback().
1129
1130
0
    fp->buffer.len = 0;
1131
0
    fp->closing = 1;
1132
0
    fp->paused = 0;
1133
0
    if (!fp->finished) {
1134
0
        err = curl_easy_pause(fp->easy, CURLPAUSE_CONT);
1135
0
        if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err);
1136
0
    }
1137
1138
0
    while (save_errno == 0 && ! fp->paused && ! fp->finished)
1139
0
        if (wait_perform(fp) < 0) save_errno = errno;
1140
1141
0
    if (fp->finished && fp->final_result != CURLE_OK)
1142
0
        save_errno = easy_errno(fp->easy, fp->final_result);
1143
1144
0
    errm = curl_multi_remove_handle(fp->multi, fp->easy);
1145
0
    if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm);
1146
0
    fp->nrunning--;
1147
1148
0
    curl_easy_cleanup(fp->easy);
1149
0
    curl_multi_cleanup(fp->multi);
1150
1151
0
    if (fp->headers.callback) // Tell callback to free any data it needs to
1152
0
        fp->headers.callback(fp->headers.callback_data, NULL);
1153
0
    free_headers(&fp->headers.fixed, 1);
1154
0
    free_headers(&fp->headers.extra, 1);
1155
1156
0
    free(fp->preserved);
1157
1158
0
    if (save_errno) { errno = save_errno; return -1; }
1159
0
    else return 0;
1160
0
}
1161
1162
static const struct hFILE_backend libcurl_backend =
1163
{
1164
    libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close
1165
};
1166
1167
static hFILE *
1168
libcurl_open(const char *url, const char *modes, http_headers *headers)
1169
34
{
1170
34
    hFILE_libcurl *fp;
1171
34
    struct curl_slist *list;
1172
34
    char mode;
1173
34
    const char *s;
1174
34
    CURLcode err;
1175
34
    CURLMcode errm;
1176
34
    int save, is_recursive;
1177
34
    kstring_t in_header = {0, 0, NULL};
1178
34
    long response;
1179
1180
34
    is_recursive = strchr(modes, 'R') != NULL;
1181
1182
34
    if ((s = strpbrk(modes, "rwa+")) != NULL) {
1183
34
        mode = *s;
1184
34
        if (strpbrk(&s[1], "rwa+")) mode = 'e';
1185
34
    }
1186
0
    else mode = '\0';
1187
1188
34
    if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; }
1189
1190
34
    fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0);
1191
34
    if (fp == NULL) goto early_error;
1192
1193
34
    if (headers) {
1194
0
        fp->headers = *headers;
1195
34
    } else {
1196
34
        memset(&fp->headers, 0, sizeof(fp->headers));
1197
34
        fp->headers.fail_on_error = 1;
1198
34
    }
1199
1200
34
    fp->file_size = -1;
1201
34
    fp->buffer.ptr.rd = NULL;
1202
34
    fp->buffer.len = 0;
1203
34
    fp->final_result = (CURLcode) -1;
1204
34
    fp->paused = fp->closing = fp->finished = fp->perform_again = 0;
1205
34
    fp->can_seek = 1;
1206
34
    fp->tried_seek = 0;
1207
34
    fp->delayed_seek = fp->last_offset = -1;
1208
34
    fp->preserved = NULL;
1209
34
    fp->preserved_bytes = fp->preserved_size = 0;
1210
34
    fp->is_recursive = is_recursive;
1211
34
    fp->nrunning = 0;
1212
34
    fp->easy = NULL;
1213
1214
34
    fp->multi = curl_multi_init();
1215
34
    if (fp->multi == NULL) { errno = ENOMEM; goto error; }
1216
1217
34
    fp->easy = curl_easy_init();
1218
34
    if (fp->easy == NULL) { errno = ENOMEM; goto error; }
1219
1220
    // Make a route to the hFILE_libcurl* given just a CURL* easy handle
1221
34
    err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp);
1222
1223
    // Avoid many repeated CWD calls with FTP, instead requesting the filename
1224
    // by full path (but not strictly compliant with RFC1738).
1225
34
    err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD,
1226
34
                            (long) CURLFTPMETHOD_NOCWD);
1227
1228
34
    if (mode == 'r') {
1229
34
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback);
1230
34
        err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp);
1231
34
        fp->is_read = 1;
1232
34
    }
1233
0
    else {
1234
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback);
1235
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp);
1236
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L);
1237
0
        if (append_header(&fp->headers.fixed,
1238
0
                          "Transfer-Encoding: chunked", 1) < 0)
1239
0
            goto error;
1240
0
        fp->is_read = 0;
1241
0
    }
1242
1243
34
    err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share);
1244
34
    err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url);
1245
34
    {
1246
34
        char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE");
1247
34
        if (env_curl_ca_bundle) {
1248
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle);
1249
0
        }
1250
34
    }
1251
34
    err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s);
1252
34
    if (fp->headers.callback) {
1253
0
        if (add_callback_headers(fp) != 0) goto error;
1254
0
    }
1255
34
    if (get_auth_token(fp, url) < 0)
1256
0
        goto error;
1257
34
    if ((list = get_header_list(fp)) != NULL)
1258
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list);
1259
1260
34
    if (hts_verbose <= 8 && fp->headers.fail_on_error)
1261
34
        err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L);
1262
34
    if (hts_verbose >= 8)
1263
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L);
1264
1265
34
    if (fp->headers.redirect) {
1266
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback);
1267
0
        err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header);
1268
34
    } else {
1269
34
        err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L);
1270
34
    }
1271
1272
34
    if (err != 0) { errno = ENOSYS; goto error; }
1273
1274
34
    errm = curl_multi_add_handle(fp->multi, fp->easy);
1275
34
    if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; }
1276
34
    fp->nrunning++;
1277
1278
122
    while (! fp->paused && ! fp->finished) {
1279
89
        if (wait_perform(fp) < 0) goto error_remove;
1280
89
    }
1281
1282
34
    curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response);
1283
33
    if (fp->headers.http_response_ptr) {
1284
0
        *fp->headers.http_response_ptr = response;
1285
0
    }
1286
1287
33
    if (fp->finished && fp->final_result != CURLE_OK) {
1288
33
        errno = easy_errno(fp->easy, fp->final_result);
1289
33
        goto error_remove;
1290
33
    }
1291
1292
0
    if (fp->headers.redirect) {
1293
0
        if (response >= 300 && response < 400) { // redirection
1294
0
            kstring_t new_url = {0, 0, NULL};
1295
1296
0
            if (fp->headers.redirect(fp->headers.redirect_data, response,
1297
0
                                     &in_header, &new_url)) {
1298
0
                errno = ENOSYS;
1299
0
                goto error;
1300
0
            }
1301
1302
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s);
1303
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1304
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1305
0
            free(ks_release(&in_header));
1306
1307
0
            if (err != 0) { errno = ENOSYS; goto error; }
1308
0
            free(ks_release(&new_url));
1309
1310
0
            if (restart_from_position(fp, 0) < 0) {
1311
0
                goto error_remove;
1312
0
            }
1313
1314
0
            if (fp->headers.http_response_ptr) {
1315
0
                curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE,
1316
0
                                  fp->headers.http_response_ptr);
1317
0
            }
1318
1319
0
            if (fp->finished && fp->final_result != CURLE_OK) {
1320
0
                errno = easy_errno(fp->easy, fp->final_result);
1321
0
                goto error_remove;
1322
0
            }
1323
0
        } else {
1324
            // we no longer need to look at the headers
1325
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL);
1326
0
            err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL);
1327
0
            free(ks_release(&in_header));
1328
1329
0
            if (err != 0) { errno = ENOSYS; goto error; }
1330
0
        }
1331
0
    }
1332
1333
0
    if (mode == 'r') {
1334
0
#if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0
1335
0
        curl_off_t offset;
1336
1337
0
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T,
1338
0
                              &offset) == CURLE_OK && offset > 0)
1339
0
            fp->file_size = (off_t) offset;
1340
#else
1341
        double dval;
1342
1343
        if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD,
1344
                              &dval) == CURLE_OK && dval >= 0.0)
1345
            fp->file_size = (off_t) (dval + 0.1);
1346
#endif
1347
0
    }
1348
0
    fp->base.backend = &libcurl_backend;
1349
0
    return &fp->base;
1350
1351
34
error_remove:
1352
34
    save = errno;
1353
34
    (void) curl_multi_remove_handle(fp->multi, fp->easy);
1354
34
    fp->nrunning--;
1355
34
    errno = save;
1356
1357
34
error:
1358
34
    if (fp->headers.redirect) free(in_header.s);
1359
34
    save = errno;
1360
34
    if (fp->easy) curl_easy_cleanup(fp->easy);
1361
34
    if (fp->multi) curl_multi_cleanup(fp->multi);
1362
34
    free_headers(&fp->headers.extra, 1);
1363
34
    hfile_destroy((hFILE *) fp);
1364
34
    errno = save;
1365
34
    return NULL;
1366
1367
0
early_error:
1368
0
    return NULL;
1369
34
}
1370
1371
static hFILE *hopen_libcurl(const char *url, const char *modes)
1372
34
{
1373
34
    return libcurl_open(url, modes, NULL);
1374
34
}
1375
1376
static int parse_va_list(http_headers *headers, va_list args)
1377
0
{
1378
0
    const char *argtype;
1379
1380
0
    while ((argtype = va_arg(args, const char *)) != NULL)
1381
0
        if (strcmp(argtype, "httphdr:v") == 0) {
1382
0
            const char **hdr;
1383
0
            for (hdr = va_arg(args, const char **); *hdr; hdr++) {
1384
0
                if (append_header(&headers->fixed, *hdr, 1) < 0)
1385
0
                    return -1;
1386
0
                if (is_authorization(*hdr))
1387
0
                    headers->auth_hdr_num = -1;
1388
0
            }
1389
0
        }
1390
0
        else if (strcmp(argtype, "httphdr:l") == 0) {
1391
0
            const char *hdr;
1392
0
            while ((hdr = va_arg(args, const char *)) != NULL) {
1393
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1394
0
                    return -1;
1395
0
                if (is_authorization(hdr))
1396
0
                    headers->auth_hdr_num = -1;
1397
0
            }
1398
0
        }
1399
0
        else if (strcmp(argtype, "httphdr") == 0) {
1400
0
            const char *hdr = va_arg(args, const char *);
1401
0
            if (hdr) {
1402
0
                if (append_header(&headers->fixed, hdr, 1) < 0)
1403
0
                    return -1;
1404
0
                if (is_authorization(hdr))
1405
0
                    headers->auth_hdr_num = -1;
1406
0
            }
1407
0
        }
1408
0
        else if (strcmp(argtype, "httphdr_callback") == 0) {
1409
0
            headers->callback = va_arg(args, const hts_httphdr_callback);
1410
0
        }
1411
0
        else if (strcmp(argtype, "httphdr_callback_data") == 0) {
1412
0
            headers->callback_data = va_arg(args, void *);
1413
0
        }
1414
0
        else if (strcmp(argtype, "va_list") == 0) {
1415
0
            va_list *args2 = va_arg(args, va_list *);
1416
0
            if (args2) {
1417
0
                if (parse_va_list(headers, *args2) < 0) return -1;
1418
0
            }
1419
0
        }
1420
0
        else if (strcmp(argtype, "auth_token_enabled") == 0) {
1421
0
            const char *flag = va_arg(args, const char *);
1422
0
            if (strcmp(flag, "false") == 0)
1423
0
                headers->auth_hdr_num = -3;
1424
0
        }
1425
0
        else if (strcmp(argtype, "redirect_callback") == 0) {
1426
0
            headers->redirect = va_arg(args, const redirect_callback);
1427
0
        }
1428
0
        else if (strcmp(argtype, "redirect_callback_data") == 0) {
1429
0
            headers->redirect_data = va_arg(args, void *);
1430
0
        }
1431
0
        else if (strcmp(argtype, "http_response_ptr") == 0) {
1432
0
            headers->http_response_ptr = va_arg(args, long *);
1433
0
        }
1434
0
        else if (strcmp(argtype, "fail_on_error") == 0) {
1435
0
            headers->fail_on_error = va_arg(args, int);
1436
0
        }
1437
0
        else { errno = EINVAL; return -1; }
1438
1439
0
    return 0;
1440
0
}
1441
1442
/*
1443
  HTTP headers to be added to the request can be passed in as extra
1444
  arguments to hopen().  The headers can be specified as follows:
1445
1446
  * Single header:
1447
    hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL);
1448
1449
  * Multiple headers in the argument list:
1450
    hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL);
1451
1452
  * Multiple headers in a char* array:
1453
    hopen(url, mode, "httphdr:v", hdrs, NULL);
1454
    where `hdrs` is a char **.  The list ends with a NULL pointer.
1455
1456
  * A callback function
1457
    hopen(url, mode, "httphdr_callback", func,
1458
                     "httphdr_callback_data", arg, NULL);
1459
    `func` has type
1460
         int (* hts_httphdr_callback) (void *cb_data, char ***hdrs);
1461
    `arg` is passed to the callback as a void *.
1462
1463
    The function is called at file open, and when attempting to seek (which
1464
    opens a new HTTP request).  This allows, for example, access tokens
1465
    that may have gone stale to be regenerated.  The function is also
1466
    called (with `hdrs` == NULL) on file close so that the callback can
1467
    free any memory that it needs to.
1468
1469
    The callback should return 0 on success, non-zero on failure.  It should
1470
    return in *hdrs a list of strings containing the new headers (terminated
1471
    with a NULL pointer).  These will replace any headers previously supplied
1472
    by the callback.  If no changes are necessary, it can return NULL
1473
    in *hdrs, in which case the previous headers will be left unchanged.
1474
1475
    Ownership of the strings in the header list passes to hfile_libcurl,
1476
    so the callback should not attempt to use or free them itself.  The memory
1477
    containing the array belongs to the callback and will not be freed by
1478
    hfile_libcurl.
1479
1480
    Headers supplied by the callback are appended after any specified
1481
    using the "httphdr", "httphdr:l" or "httphdr:v" methods.  No attempt
1482
    is made to replace these headers (even if a key is repeated) so anything
1483
    that is expected to vary needs to come from the callback.
1484
 */
1485
1486
static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args)
1487
0
{
1488
0
    hFILE *fp = NULL;
1489
0
    http_headers headers = { .fail_on_error = 1 };
1490
1491
0
    if (parse_va_list(&headers, args) == 0) {
1492
0
        fp = libcurl_open(url, modes, &headers);
1493
0
    }
1494
1495
0
    if (!fp) {
1496
0
        free_headers(&headers.fixed, 1);
1497
0
    }
1498
0
    return fp;
1499
0
}
1500
1501
int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self)
1502
1
{
1503
1
    static const struct hFILE_scheme_handler handler =
1504
1
        { hopen_libcurl, hfile_always_remote, "libcurl",
1505
1
          2000 + 50,
1506
1
          vhopen_libcurl };
1507
1508
#ifdef ENABLE_PLUGINS
1509
    // Embed version string for examination via strings(1) or what(1)
1510
    static const char id[] =
1511
        "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT;
1512
    const char *version = strchr(id, '\t')+1;
1513
#else
1514
1
    const char *version = hts_version();
1515
1
#endif
1516
1
    const curl_version_info_data *info;
1517
1
    const char * const *protocol;
1518
1
    const char *auth;
1519
1
    CURLcode err;
1520
1
    CURLSHcode errsh;
1521
1522
1
    err = curl_global_init(CURL_GLOBAL_ALL);
1523
1
    if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; }
1524
1525
1
    curl.share = curl_share_init();
1526
1
    if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; }
1527
1
    errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock);
1528
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock);
1529
1
    errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS);
1530
1
    if (errsh != 0) {
1531
0
        curl_share_cleanup(curl.share);
1532
0
        curl_global_cleanup();
1533
0
        errno = EIO;
1534
0
        return -1;
1535
0
    }
1536
1537
1
    if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) {
1538
0
        curl.auth_path = strdup(auth);
1539
0
        curl.auth_map = kh_init(auth_map);
1540
0
        if (!curl.auth_path || !curl.auth_map) {
1541
0
            int save_errno = errno;
1542
0
            free(curl.auth_path);
1543
0
            kh_destroy(auth_map, curl.auth_map);
1544
0
            curl_share_cleanup(curl.share);
1545
0
            curl_global_cleanup();
1546
0
            errno = save_errno;
1547
0
            return -1;
1548
0
        }
1549
0
    }
1550
1
    if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL
1551
0
        && strcmp(auth, "I understand the risks") == 0) {
1552
0
        curl.allow_unencrypted_auth_header = 1;
1553
0
    }
1554
1555
1
    info = curl_version_info(CURLVERSION_NOW);
1556
1
    ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version);
1557
1558
1
    self->name = "libcurl";
1559
1
    self->destroy = libcurl_exit;
1560
1561
24
    for (protocol = info->protocols; *protocol; protocol++)
1562
23
        hfile_add_scheme_handler(*protocol, &handler);
1563
1
    return 0;
1564
1
}