/src/htslib/hfile_libcurl.c
Line | Count | Source |
1 | | /* hfile_libcurl.c -- libcurl backend for low-level file streams. |
2 | | |
3 | | Copyright (C) 2015-2017, 2019-2020 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
26 | | #include <config.h> |
27 | | |
28 | | #include <stdarg.h> |
29 | | #include <stdlib.h> |
30 | | #include <string.h> |
31 | | #include <strings.h> |
32 | | #include <errno.h> |
33 | | #include <pthread.h> |
34 | | #ifndef _WIN32 |
35 | | # include <sys/select.h> |
36 | | #endif |
37 | | #include <assert.h> |
38 | | #include <time.h> |
39 | | |
40 | | #include "hfile_internal.h" |
41 | | #ifdef ENABLE_PLUGINS |
42 | | #include "version.h" |
43 | | #endif |
44 | | #include "htslib/hts.h" // for hts_version() and hts_verbose |
45 | | #include "htslib/hts_alloc.h" |
46 | | #include "htslib/kstring.h" |
47 | | #include "htslib/khash.h" |
48 | | |
49 | | #include <curl/curl.h> |
50 | | |
51 | | // Number of seconds to take off auth_token expiry, to allow for clock skew |
52 | | // and slow servers |
53 | 0 | #define AUTH_REFRESH_EARLY_SECS 60 |
54 | | |
55 | | // Minimum number of bytes to skip when seeking forward. Seeks less than |
56 | | // this will just read the data and throw it away. The optimal value |
57 | | // depends on how long it takes to make a new connection compared |
58 | | // to how fast the data arrives. |
59 | 0 | #define MIN_SEEK_FORWARD 1000000 |
60 | | |
61 | | typedef struct { |
62 | | char *path; |
63 | | char *token; |
64 | | time_t expiry; |
65 | | int failed; |
66 | | pthread_mutex_t lock; |
67 | | } auth_token; |
68 | | |
69 | | // For the authorization header cache |
70 | | KHASH_MAP_INIT_STR(auth_map, auth_token *) |
71 | | |
72 | | // Curl-compatible header linked list |
73 | | typedef struct { |
74 | | struct curl_slist *list; |
75 | | unsigned int num; |
76 | | unsigned int size; |
77 | | } hdrlist; |
78 | | |
79 | | typedef struct { |
80 | | hdrlist fixed; // List of headers supplied at hopen() |
81 | | hdrlist extra; // List of headers from callback |
82 | | hts_httphdr_callback callback; // Callback to get more headers |
83 | | void *callback_data; // Data to pass to httphdr callback |
84 | | auth_token *auth; // Authentication token |
85 | | int auth_hdr_num; // Location of auth_token in hdrlist extra |
86 | | // If -1, Authorization header is in fixed |
87 | | // -2, it came from the callback |
88 | | // -3, "auth_token_enabled", "false" |
89 | | // passed to hopen() |
90 | | redirect_callback redirect; // Callback to handle 3xx redirects |
91 | | void *redirect_data; // Data to pass to redirect_callback |
92 | | long *http_response_ptr; // Location to store http response code. |
93 | | int fail_on_error; // Open fails on >400 response code |
94 | | // (default true) |
95 | | } http_headers; |
96 | | |
97 | | typedef struct { |
98 | | hFILE base; |
99 | | CURL *easy; |
100 | | CURLM *multi; |
101 | | off_t file_size; |
102 | | struct { |
103 | | union { char *rd; const char *wr; } ptr; |
104 | | size_t len; |
105 | | } buffer; |
106 | | CURLcode final_result; // easy result code for finished transfers |
107 | | // Flags for communicating with libcurl callbacks: |
108 | | unsigned paused : 1; // callback tells us that it has paused transfer |
109 | | unsigned closing : 1; // informs callback that hclose() has been invoked |
110 | | unsigned finished : 1; // wait_perform() tells us transfer is complete |
111 | | unsigned perform_again : 1; |
112 | | unsigned is_read : 1; // Opened in read mode |
113 | | unsigned can_seek : 1; // Can (attempt to) seek on this handle |
114 | | unsigned is_recursive:1; // Opened by hfile_libcurl itself |
115 | | unsigned tried_seek : 1; // At least one seek has been attempted |
116 | | unsigned needs_reconnect : 1; // Deferred reconnect after retryable error |
117 | | int nrunning; |
118 | | http_headers headers; |
119 | | |
120 | | off_t delayed_seek; // Location to seek to before reading |
121 | | off_t last_offset; // Location we're seeking from |
122 | | off_t stream_pos; // Current position in remote file for retry |
123 | | char *preserved; // Preserved buffer content on seek |
124 | | size_t preserved_bytes; // Number of preserved bytes |
125 | | size_t preserved_size; // Size of preserved buffer |
126 | | } hFILE_libcurl; |
127 | | |
128 | | static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence); |
129 | | static int restart_from_position(hFILE_libcurl *fp, off_t pos); |
130 | | |
131 | | static int http_status_errno(int status) |
132 | 0 | { |
133 | 0 | if (status >= 500) |
134 | 0 | switch (status) { |
135 | 0 | case 501: return ENOSYS; |
136 | 0 | case 503: return EBUSY; |
137 | 0 | case 504: return ETIMEDOUT; |
138 | 0 | default: return EIO; |
139 | 0 | } |
140 | 0 | else if (status >= 400) |
141 | 0 | switch (status) { |
142 | 0 | case 401: return EPERM; |
143 | 0 | case 403: return EACCES; |
144 | 0 | case 404: return ENOENT; |
145 | 0 | case 405: return EROFS; |
146 | 0 | case 407: return EPERM; |
147 | 0 | case 408: return ETIMEDOUT; |
148 | 0 | case 410: return ENOENT; |
149 | 0 | default: return EINVAL; |
150 | 0 | } |
151 | 0 | else return 0; |
152 | 0 | } |
153 | | |
154 | | static int easy_errno(CURL *easy, CURLcode err) |
155 | 0 | { |
156 | 0 | long lval; |
157 | |
|
158 | 0 | switch (err) { |
159 | 0 | case CURLE_OK: |
160 | 0 | return 0; |
161 | | |
162 | 0 | case CURLE_UNSUPPORTED_PROTOCOL: |
163 | 0 | case CURLE_URL_MALFORMAT: |
164 | 0 | return EINVAL; |
165 | | |
166 | 0 | #if LIBCURL_VERSION_NUM >= 0x071505 |
167 | 0 | case CURLE_NOT_BUILT_IN: |
168 | 0 | return ENOSYS; |
169 | 0 | #endif |
170 | | |
171 | 0 | case CURLE_COULDNT_RESOLVE_PROXY: |
172 | 0 | case CURLE_COULDNT_RESOLVE_HOST: |
173 | 0 | case CURLE_FTP_CANT_GET_HOST: |
174 | 0 | return EDESTADDRREQ; // Lookup failure |
175 | | |
176 | 0 | case CURLE_COULDNT_CONNECT: |
177 | 0 | case CURLE_SEND_ERROR: |
178 | 0 | case CURLE_RECV_ERROR: |
179 | 0 | if (curl_easy_getinfo(easy, CURLINFO_OS_ERRNO, &lval) == CURLE_OK) |
180 | 0 | return lval; |
181 | 0 | else |
182 | 0 | return ECONNABORTED; |
183 | | |
184 | 0 | case CURLE_REMOTE_ACCESS_DENIED: |
185 | 0 | case CURLE_LOGIN_DENIED: |
186 | 0 | case CURLE_TFTP_PERM: |
187 | 0 | return EACCES; |
188 | | |
189 | 0 | case CURLE_PARTIAL_FILE: |
190 | 0 | return EPIPE; |
191 | | |
192 | 0 | case CURLE_HTTP_RETURNED_ERROR: |
193 | 0 | if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &lval) == CURLE_OK) |
194 | 0 | return http_status_errno(lval); |
195 | 0 | else |
196 | 0 | return EIO; |
197 | | |
198 | 0 | case CURLE_OUT_OF_MEMORY: |
199 | 0 | return ENOMEM; |
200 | | |
201 | 0 | case CURLE_OPERATION_TIMEDOUT: |
202 | 0 | return ETIMEDOUT; |
203 | | |
204 | 0 | case CURLE_RANGE_ERROR: |
205 | 0 | return ESPIPE; |
206 | | |
207 | 0 | case CURLE_SSL_CONNECT_ERROR: |
208 | | // TODO return SSL error buffer messages |
209 | 0 | return ECONNABORTED; |
210 | | |
211 | 0 | case CURLE_FILE_COULDNT_READ_FILE: |
212 | 0 | case CURLE_TFTP_NOTFOUND: |
213 | 0 | return ENOENT; |
214 | | |
215 | 0 | case CURLE_TOO_MANY_REDIRECTS: |
216 | 0 | return ELOOP; |
217 | | |
218 | 0 | case CURLE_FILESIZE_EXCEEDED: |
219 | 0 | return EFBIG; |
220 | | |
221 | 0 | case CURLE_REMOTE_DISK_FULL: |
222 | 0 | return ENOSPC; |
223 | | |
224 | 0 | case CURLE_REMOTE_FILE_EXISTS: |
225 | 0 | return EEXIST; |
226 | | |
227 | 0 | default: |
228 | 0 | hts_log_error("Libcurl reported error %d (%s)", (int) err, |
229 | 0 | curl_easy_strerror(err)); |
230 | 0 | return EIO; |
231 | 0 | } |
232 | 0 | } |
233 | | |
234 | | static int is_retryable(CURL *easy, CURLcode err) |
235 | 0 | { |
236 | 0 | switch (err) { |
237 | 0 | case CURLE_COULDNT_CONNECT: |
238 | 0 | case CURLE_SEND_ERROR: |
239 | 0 | case CURLE_RECV_ERROR: |
240 | 0 | case CURLE_PARTIAL_FILE: |
241 | 0 | case CURLE_OPERATION_TIMEDOUT: |
242 | 0 | case CURLE_GOT_NOTHING: |
243 | 0 | case CURLE_SSL_CONNECT_ERROR: |
244 | | #ifdef CURLE_HTTP2 |
245 | | case CURLE_HTTP2: |
246 | | #endif |
247 | | #ifdef CURLE_HTTP2_STREAM |
248 | | case CURLE_HTTP2_STREAM: |
249 | | #endif |
250 | 0 | return 1; |
251 | | |
252 | 0 | case CURLE_HTTP_RETURNED_ERROR: { |
253 | 0 | long response = 0; |
254 | 0 | if (curl_easy_getinfo(easy, CURLINFO_RESPONSE_CODE, &response) |
255 | 0 | == CURLE_OK) { |
256 | 0 | switch (response) { |
257 | 0 | case 429: case 500: case 502: case 503: case 504: |
258 | 0 | return 1; |
259 | 0 | default: |
260 | 0 | break; |
261 | 0 | } |
262 | 0 | } |
263 | 0 | return 0; |
264 | 0 | } |
265 | | |
266 | 0 | default: |
267 | 0 | return 0; |
268 | 0 | } |
269 | 0 | } |
270 | | |
271 | | static int multi_errno(CURLMcode errm) |
272 | 0 | { |
273 | 0 | switch (errm) { |
274 | 0 | case CURLM_CALL_MULTI_PERFORM: |
275 | 0 | case CURLM_OK: |
276 | 0 | return 0; |
277 | | |
278 | 0 | case CURLM_BAD_HANDLE: |
279 | 0 | case CURLM_BAD_EASY_HANDLE: |
280 | 0 | case CURLM_BAD_SOCKET: |
281 | 0 | return EBADF; |
282 | | |
283 | 0 | case CURLM_OUT_OF_MEMORY: |
284 | 0 | return ENOMEM; |
285 | | |
286 | 0 | default: |
287 | 0 | hts_log_error("Libcurl reported error %d (%s)", (int) errm, |
288 | 0 | curl_multi_strerror(errm)); |
289 | 0 | return EIO; |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | static struct { |
294 | | kstring_t useragent; |
295 | | CURLSH *share; |
296 | | char *auth_path; |
297 | | khash_t(auth_map) *auth_map; |
298 | | int allow_unencrypted_auth_header; |
299 | | int retry_max; // Max retry attempts (HTS_RETRY_MAX, default 3) |
300 | | long retry_delay_ms; // Initial retry delay in ms (HTS_RETRY_DELAY, default 500) |
301 | | long retry_max_delay_ms; // Max retry delay in ms (HTS_RETRY_MAX_DELAY, default 60000) |
302 | | long low_speed_limit; // Bytes/sec threshold (HTS_LOW_SPEED_LIMIT, default 1) |
303 | | long low_speed_time; // Seconds below threshold (HTS_LOW_SPEED_TIME, default 60) |
304 | | pthread_mutex_t auth_lock; |
305 | | pthread_mutex_t share_lock; |
306 | | } curl = { { 0, 0, NULL }, NULL, NULL, NULL, 0, |
307 | | 3, 500, 60000, 1, 60, |
308 | | PTHREAD_MUTEX_INITIALIZER, PTHREAD_MUTEX_INITIALIZER }; |
309 | | |
310 | | static void share_lock(CURL *handle, curl_lock_data data, |
311 | 1 | curl_lock_access access, void *userptr) { |
312 | 1 | pthread_mutex_lock(&curl.share_lock); |
313 | 1 | } |
314 | | |
315 | 1 | static void share_unlock(CURL *handle, curl_lock_data data, void *userptr) { |
316 | 1 | pthread_mutex_unlock(&curl.share_lock); |
317 | 1 | } |
318 | | |
319 | 0 | static void free_auth(auth_token *tok) { |
320 | 0 | if (!tok) return; |
321 | 0 | if (pthread_mutex_destroy(&tok->lock)) abort(); |
322 | 0 | free(tok->path); |
323 | 0 | free(tok->token); |
324 | 0 | free(tok); |
325 | 0 | } |
326 | | |
327 | | static void libcurl_exit(void) |
328 | 1 | { |
329 | 1 | if (curl_share_cleanup(curl.share) == CURLSHE_OK) |
330 | 1 | curl.share = NULL; |
331 | | |
332 | 1 | free(curl.useragent.s); |
333 | 1 | curl.useragent.l = curl.useragent.m = 0; curl.useragent.s = NULL; |
334 | | |
335 | 1 | free(curl.auth_path); |
336 | 1 | curl.auth_path = NULL; |
337 | | |
338 | 1 | if (curl.auth_map) { |
339 | 0 | khiter_t i; |
340 | 0 | for (i = kh_begin(curl.auth_map); i != kh_end(curl.auth_map); ++i) { |
341 | 0 | if (kh_exist(curl.auth_map, i)) { |
342 | 0 | free_auth(kh_value(curl.auth_map, i)); |
343 | 0 | kh_key(curl.auth_map, i) = NULL; |
344 | 0 | kh_value(curl.auth_map, i) = NULL; |
345 | 0 | } |
346 | 0 | } |
347 | 0 | kh_destroy(auth_map, curl.auth_map); |
348 | 0 | curl.auth_map = NULL; |
349 | 0 | } |
350 | | |
351 | 1 | curl_global_cleanup(); |
352 | 1 | } |
353 | | |
354 | 0 | static int append_header(hdrlist *hdrs, const char *data, int dup) { |
355 | 0 | if (hdrs->num == hdrs->size) { |
356 | 0 | unsigned int new_sz = hdrs->size ? hdrs->size * 2 : 4, i; |
357 | 0 | struct curl_slist *new_list = hts_realloc_p(hdrs->list, |
358 | 0 | sizeof(*new_list), new_sz); |
359 | 0 | if (!new_list) return -1; |
360 | 0 | hdrs->size = new_sz; |
361 | 0 | hdrs->list = new_list; |
362 | 0 | for (i = 1; i < hdrs->num; i++) hdrs->list[i-1].next = &hdrs->list[i]; |
363 | 0 | } |
364 | | // Annoyingly, libcurl doesn't declare the char * as const... |
365 | 0 | hdrs->list[hdrs->num].data = dup ? strdup(data) : (char *) data; |
366 | 0 | if (!hdrs->list[hdrs->num].data) return -1; |
367 | 0 | if (hdrs->num > 0) hdrs->list[hdrs->num - 1].next = &hdrs->list[hdrs->num]; |
368 | 0 | hdrs->list[hdrs->num].next = NULL; |
369 | 0 | hdrs->num++; |
370 | 0 | return 0; |
371 | 0 | } |
372 | | |
373 | 0 | static void free_headers(hdrlist *hdrs, int completely) { |
374 | 0 | unsigned int i; |
375 | 0 | for (i = 0; i < hdrs->num; i++) { |
376 | 0 | free(hdrs->list[i].data); |
377 | 0 | hdrs->list[i].data = NULL; |
378 | 0 | hdrs->list[i].next = NULL; |
379 | 0 | } |
380 | 0 | hdrs->num = 0; |
381 | 0 | if (completely) { |
382 | 0 | free(hdrs->list); |
383 | 0 | hdrs->size = 0; |
384 | 0 | hdrs->list = NULL; |
385 | 0 | } |
386 | 0 | } |
387 | | |
388 | 0 | static struct curl_slist * get_header_list(hFILE_libcurl *fp) { |
389 | 0 | if (fp->headers.fixed.num > 0) |
390 | 0 | return &fp->headers.fixed.list[0]; |
391 | 0 | if (fp->headers.extra.num > 0) |
392 | 0 | return &fp->headers.extra.list[0]; |
393 | 0 | return 0; |
394 | 0 | } |
395 | | |
396 | 0 | static inline int is_authorization(const char *hdr) { |
397 | 0 | return (strncasecmp("authorization:", hdr, 14) == 0); |
398 | 0 | } |
399 | | |
400 | 0 | static int add_callback_headers(hFILE_libcurl *fp) { |
401 | 0 | char **hdrs = NULL, **hdr; |
402 | |
|
403 | 0 | if (!fp->headers.callback) |
404 | 0 | return 0; |
405 | | |
406 | | // Get the headers from the callback |
407 | 0 | if (fp->headers.callback(fp->headers.callback_data, &hdrs) != 0) { |
408 | 0 | return -1; |
409 | 0 | } |
410 | | |
411 | 0 | if (!hdrs) // No change |
412 | 0 | return 0; |
413 | | |
414 | | // Remove any old callback headers |
415 | 0 | if (fp->headers.fixed.num > 0) { |
416 | | // Unlink lists |
417 | 0 | fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL; |
418 | 0 | } |
419 | 0 | free_headers(&fp->headers.extra, 0); |
420 | |
|
421 | 0 | if (fp->headers.auth_hdr_num > 0 || fp->headers.auth_hdr_num == -2) |
422 | 0 | fp->headers.auth_hdr_num = 0; // Just removed it... |
423 | | |
424 | | // Convert to libcurl-suitable form |
425 | 0 | for (hdr = hdrs; *hdr; hdr++) { |
426 | 0 | if (append_header(&fp->headers.extra, *hdr, 0) < 0) { |
427 | 0 | goto cleanup; |
428 | 0 | } |
429 | 0 | if (is_authorization(*hdr) && !fp->headers.auth_hdr_num) |
430 | 0 | fp->headers.auth_hdr_num = -2; |
431 | 0 | } |
432 | 0 | for (hdr = hdrs; *hdr; hdr++) *hdr = NULL; |
433 | |
|
434 | 0 | if (fp->headers.fixed.num > 0 && fp->headers.extra.num > 0) { |
435 | | // Relink lists |
436 | 0 | fp->headers.fixed.list[fp->headers.fixed.num - 1].next |
437 | 0 | = &fp->headers.extra.list[0]; |
438 | 0 | } |
439 | 0 | return 0; |
440 | | |
441 | 0 | cleanup: |
442 | 0 | while (hdr && *hdr) { |
443 | 0 | free(*hdr); |
444 | 0 | *hdr = NULL; |
445 | 0 | } |
446 | 0 | return -1; |
447 | 0 | } |
448 | | |
449 | | /* |
450 | | * Read an OAUTH2-style Bearer access token (see |
451 | | * https://tools.ietf.org/html/rfc6750#section-4). |
452 | | * Returns 'v' for valid; 'i' for invalid (token missing or wrong sort); |
453 | | * '?' for a JSON parse error; 'm' if it runs out of memory. |
454 | | */ |
455 | 0 | static int read_auth_json(auth_token *tok, hFILE *auth_fp) { |
456 | 0 | hts_json_token *t = hts_json_alloc_token(); |
457 | 0 | kstring_t str = {0, 0, NULL}; |
458 | 0 | char *token = NULL, *type = NULL, *expiry = NULL; |
459 | 0 | int ret = 'i'; |
460 | |
|
461 | 0 | if (!t) goto error; |
462 | | |
463 | 0 | if ((ret = hts_json_fnext(auth_fp, t, &str)) != '{') goto error; |
464 | 0 | while (hts_json_fnext(auth_fp, t, &str) != '}') { |
465 | 0 | char *key; |
466 | 0 | if (hts_json_token_type(t) != 's') { |
467 | 0 | ret = '?'; |
468 | 0 | goto error; |
469 | 0 | } |
470 | 0 | key = hts_json_token_str(t); |
471 | 0 | if (!key) goto error; |
472 | 0 | if (strcmp(key, "access_token") == 0) { |
473 | 0 | if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error; |
474 | 0 | token = ks_release(&str); |
475 | 0 | } else if (strcmp(key, "token_type") == 0) { |
476 | 0 | if ((ret = hts_json_fnext(auth_fp, t, &str)) != 's') goto error; |
477 | 0 | type = ks_release(&str); |
478 | 0 | } else if (strcmp(key, "expires_in") == 0) { |
479 | 0 | if ((ret = hts_json_fnext(auth_fp, t, &str)) != 'n') goto error; |
480 | 0 | expiry = ks_release(&str); |
481 | 0 | } else if (hts_json_fskip_value(auth_fp, '\0') != 'v') { |
482 | 0 | ret = '?'; |
483 | 0 | goto error; |
484 | 0 | } |
485 | 0 | } |
486 | | |
487 | 0 | if (!token || (type && strcmp(type, "Bearer") != 0)) { |
488 | 0 | ret = 'i'; |
489 | 0 | goto error; |
490 | 0 | } |
491 | | |
492 | 0 | ret = 'm'; |
493 | 0 | str.l = 0; |
494 | 0 | if (kputs("Authorization: Bearer ", &str) < 0) goto error; |
495 | 0 | if (kputs(token, &str) < 0) goto error; |
496 | 0 | free(tok->token); |
497 | 0 | tok->token = ks_release(&str); |
498 | 0 | if (expiry) { |
499 | 0 | long exp = strtol(expiry, NULL, 10); |
500 | 0 | if (exp < 0) exp = 0; |
501 | 0 | tok->expiry = time(NULL) + exp; |
502 | 0 | } else { |
503 | 0 | tok->expiry = 0; |
504 | 0 | } |
505 | 0 | ret = 'v'; |
506 | |
|
507 | 0 | error: |
508 | 0 | free(token); |
509 | 0 | free(type); |
510 | 0 | free(expiry); |
511 | 0 | free(str.s); |
512 | 0 | hts_json_free_token(t); |
513 | 0 | return ret; |
514 | 0 | } |
515 | | |
516 | 0 | static int read_auth_plain(auth_token *tok, hFILE *auth_fp) { |
517 | 0 | kstring_t line = {0, 0, NULL}; |
518 | 0 | kstring_t token = {0, 0, NULL}; |
519 | 0 | const char *start, *end; |
520 | |
|
521 | 0 | if (khgetline(&line, auth_fp) < 0) goto error; |
522 | 0 | if (kputc('\0', &line) < 0) goto error; |
523 | | |
524 | 0 | for (start = line.s; *start && isspace_c(*start); start++) {} |
525 | 0 | for (end = start; *end && !isspace_c(*end); end++) {} |
526 | |
|
527 | 0 | if (end > start) { |
528 | 0 | if (kputs("Authorization: Bearer ", &token) < 0) goto error; |
529 | 0 | if (kputsn(start, end - start, &token) < 0) goto error; |
530 | 0 | } |
531 | | |
532 | 0 | free(tok->token); |
533 | 0 | tok->token = ks_release(&token); |
534 | 0 | tok->expiry = 0; |
535 | 0 | free(line.s); |
536 | 0 | return 0; |
537 | | |
538 | 0 | error: |
539 | 0 | free(line.s); |
540 | 0 | free(token.s); |
541 | 0 | return -1; |
542 | 0 | } |
543 | | |
544 | 0 | static int renew_auth_token(auth_token *tok, int *changed) { |
545 | 0 | hFILE *auth_fp = NULL; |
546 | 0 | char buffer[16]; |
547 | 0 | ssize_t len; |
548 | |
|
549 | 0 | *changed = 0; |
550 | 0 | if (tok->expiry == 0 || time(NULL) + AUTH_REFRESH_EARLY_SECS < tok->expiry) |
551 | 0 | return 0; // Still valid |
552 | | |
553 | 0 | if (tok->failed) |
554 | 0 | return -1; |
555 | | |
556 | 0 | *changed = 1; |
557 | 0 | auth_fp = hopen(tok->path, "rR"); |
558 | 0 | if (!auth_fp) { |
559 | | // Not worried about missing files; other errors are bad. |
560 | 0 | if (errno != ENOENT) |
561 | 0 | goto fail; |
562 | | |
563 | 0 | tok->expiry = 0; // Prevent retry |
564 | 0 | free(tok->token); // Just in case it was set |
565 | 0 | return 0; |
566 | 0 | } |
567 | | |
568 | 0 | len = hpeek(auth_fp, buffer, sizeof(buffer)); |
569 | 0 | if (len < 0) |
570 | 0 | goto fail; |
571 | | |
572 | 0 | if (memchr(buffer, '{', len) != NULL) { |
573 | 0 | if (read_auth_json(tok, auth_fp) != 'v') |
574 | 0 | goto fail; |
575 | 0 | } else { |
576 | 0 | if (read_auth_plain(tok, auth_fp) < 0) |
577 | 0 | goto fail; |
578 | 0 | } |
579 | | |
580 | 0 | return hclose(auth_fp) < 0 ? -1 : 0; |
581 | | |
582 | 0 | fail: |
583 | 0 | tok->failed = 1; |
584 | 0 | if (auth_fp) hclose_abruptly(auth_fp); |
585 | 0 | return -1; |
586 | 0 | } |
587 | | |
588 | 0 | static int add_auth_header(hFILE_libcurl *fp) { |
589 | 0 | int changed = 0; |
590 | |
|
591 | 0 | if (fp->headers.auth_hdr_num < 0) |
592 | 0 | return 0; // Have an Authorization header from open or header callback |
593 | | |
594 | 0 | if (!fp->headers.auth) |
595 | 0 | return 0; // Nothing to add |
596 | | |
597 | 0 | pthread_mutex_lock(&fp->headers.auth->lock); |
598 | 0 | if (renew_auth_token(fp->headers.auth, &changed) < 0) |
599 | 0 | goto unlock_fail; |
600 | | |
601 | 0 | if (!changed && fp->headers.auth_hdr_num > 0) { |
602 | 0 | pthread_mutex_unlock(&fp->headers.auth->lock); |
603 | 0 | return 0; |
604 | 0 | } |
605 | | |
606 | 0 | if (fp->headers.auth_hdr_num > 0) { |
607 | | // Had a previous header, so swap in the new one |
608 | 0 | char *header = fp->headers.auth->token; |
609 | 0 | char *header_copy = header ? strdup(header) : NULL; |
610 | 0 | int idx = fp->headers.auth_hdr_num - 1; |
611 | 0 | if (header && !header_copy) |
612 | 0 | goto unlock_fail; |
613 | | |
614 | 0 | if (header_copy) { |
615 | 0 | free(fp->headers.extra.list[idx].data); |
616 | 0 | fp->headers.extra.list[idx].data = header_copy; |
617 | 0 | } else { |
618 | 0 | unsigned int j; |
619 | | // More complicated case - need to get rid of the old header |
620 | | // and tidy up linked lists |
621 | 0 | free(fp->headers.extra.list[idx].data); |
622 | 0 | for (j = idx + 1; j < fp->headers.extra.num; j++) { |
623 | 0 | fp->headers.extra.list[j - 1] = fp->headers.extra.list[j]; |
624 | 0 | fp->headers.extra.list[j - 1].next = &fp->headers.extra.list[j]; |
625 | 0 | } |
626 | 0 | fp->headers.extra.num--; |
627 | 0 | if (fp->headers.extra.num > 0) { |
628 | 0 | fp->headers.extra.list[fp->headers.extra.num-1].next = NULL; |
629 | 0 | } else if (fp->headers.fixed.num > 0) { |
630 | 0 | fp->headers.fixed.list[fp->headers.fixed.num - 1].next = NULL; |
631 | 0 | } |
632 | 0 | fp->headers.auth_hdr_num = 0; |
633 | 0 | } |
634 | 0 | } else if (fp->headers.auth->token) { |
635 | | // Add new header and remember where it is |
636 | 0 | if (append_header(&fp->headers.extra, |
637 | 0 | fp->headers.auth->token, 1) < 0) { |
638 | 0 | goto unlock_fail; |
639 | 0 | } |
640 | 0 | fp->headers.auth_hdr_num = fp->headers.extra.num; |
641 | 0 | } |
642 | | |
643 | 0 | pthread_mutex_unlock(&fp->headers.auth->lock); |
644 | 0 | return 0; |
645 | | |
646 | 0 | unlock_fail: |
647 | 0 | pthread_mutex_unlock(&fp->headers.auth->lock); |
648 | 0 | return -1; |
649 | 0 | } |
650 | | |
651 | 0 | static int get_auth_token(hFILE_libcurl *fp, const char *url) { |
652 | 0 | const char *host = NULL, *p, *q; |
653 | 0 | kstring_t name = {0, 0, NULL}; |
654 | 0 | size_t host_len = 0; |
655 | 0 | khiter_t idx; |
656 | 0 | auth_token *tok = NULL; |
657 | | |
658 | | // Nothing to do if: |
659 | | // curl.auth_path has not been set |
660 | | // fp was made by hfile_libcurl (e.g. auth_path is a http:// url) |
661 | | // we already have an Authorization header |
662 | 0 | if (!curl.auth_path || fp->is_recursive || fp->headers.auth_hdr_num != 0) |
663 | 0 | return 0; |
664 | | |
665 | | // Insist on having a secure connection unless the user insists harder |
666 | 0 | if (!curl.allow_unencrypted_auth_header && strncmp(url, "https://", 8) != 0) |
667 | 0 | return 0; |
668 | | |
669 | 0 | host = strstr(url, "://"); |
670 | 0 | if (host) { |
671 | 0 | host += 3; |
672 | 0 | host_len = strcspn(host, "/"); |
673 | 0 | } |
674 | |
|
675 | 0 | p = curl.auth_path; |
676 | 0 | while ((q = strstr(p, "%h")) != NULL) { |
677 | 0 | if (q - p > INT_MAX || host_len > INT_MAX) goto error; |
678 | 0 | if (kputsn_(p, q - p, &name) < 0) goto error; |
679 | 0 | if (kputsn_(host, host_len, &name) < 0) goto error; |
680 | 0 | p = q + 2; |
681 | 0 | } |
682 | 0 | if (kputs(p, &name) < 0) goto error; |
683 | | |
684 | 0 | pthread_mutex_lock(&curl.auth_lock); |
685 | 0 | idx = kh_get(auth_map, curl.auth_map, name.s); |
686 | 0 | if (idx < kh_end(curl.auth_map)) { |
687 | 0 | tok = kh_value(curl.auth_map, idx); |
688 | 0 | } else { |
689 | 0 | tok = calloc(1, sizeof(*tok)); |
690 | 0 | if (tok && pthread_mutex_init(&tok->lock, NULL) != 0) { |
691 | 0 | free(tok); |
692 | 0 | tok = NULL; |
693 | 0 | } |
694 | 0 | if (tok) { |
695 | 0 | int ret = -1; |
696 | 0 | tok->path = ks_release(&name); |
697 | 0 | tok->token = NULL; |
698 | 0 | tok->expiry = 1; // Force refresh |
699 | 0 | idx = kh_put(auth_map, curl.auth_map, tok->path, &ret); |
700 | 0 | if (ret < 0) { |
701 | 0 | free_auth(tok); |
702 | 0 | tok = NULL; |
703 | 0 | } |
704 | 0 | kh_value(curl.auth_map, idx) = tok; |
705 | 0 | } |
706 | 0 | } |
707 | 0 | pthread_mutex_unlock(&curl.auth_lock); |
708 | |
|
709 | 0 | fp->headers.auth = tok; |
710 | 0 | free(name.s); |
711 | |
|
712 | 0 | return add_auth_header(fp); |
713 | | |
714 | 0 | error: |
715 | 0 | free(name.s); |
716 | 0 | return -1; |
717 | 0 | } |
718 | | |
719 | | static void process_messages(hFILE_libcurl *fp) |
720 | 0 | { |
721 | 0 | CURLMsg *msg; |
722 | 0 | int remaining; |
723 | |
|
724 | 0 | while ((msg = curl_multi_info_read(fp->multi, &remaining)) != NULL) { |
725 | 0 | switch (msg->msg) { |
726 | 0 | case CURLMSG_DONE: |
727 | 0 | fp->finished = 1; |
728 | 0 | fp->final_result = msg->data.result; |
729 | 0 | break; |
730 | | |
731 | 0 | default: |
732 | 0 | break; |
733 | 0 | } |
734 | 0 | } |
735 | 0 | } |
736 | | |
737 | | static int wait_perform(hFILE_libcurl *fp) |
738 | 0 | { |
739 | 0 | fd_set rd, wr, ex; |
740 | 0 | int maxfd, nrunning; |
741 | 0 | long timeout; |
742 | 0 | CURLMcode errm; |
743 | |
|
744 | 0 | if (!fp->perform_again) { |
745 | 0 | FD_ZERO(&rd); |
746 | 0 | FD_ZERO(&wr); |
747 | 0 | FD_ZERO(&ex); |
748 | 0 | if (curl_multi_fdset(fp->multi, &rd, &wr, &ex, &maxfd) != CURLM_OK) |
749 | 0 | maxfd = -1, timeout = 1000; |
750 | 0 | else { |
751 | 0 | if (curl_multi_timeout(fp->multi, &timeout) != CURLM_OK) |
752 | 0 | timeout = 1000; |
753 | 0 | else if (timeout < 0) { |
754 | 0 | timeout = 10000; // as recommended by curl_multi_timeout(3) |
755 | 0 | } |
756 | 0 | } |
757 | 0 | if (maxfd < 0) { |
758 | 0 | if (timeout > 100) |
759 | 0 | timeout = 100; // as recommended by curl_multi_fdset(3) |
760 | | #ifdef _WIN32 |
761 | | /* Windows ignores the first argument of select, so calling select |
762 | | * with maxfd=-1 does not give the expected result of sleeping for |
763 | | * timeout milliseconds in the conditional block below. |
764 | | * So sleep here and skip the next block. |
765 | | */ |
766 | | Sleep(timeout); |
767 | | timeout = 0; |
768 | | #endif |
769 | 0 | } |
770 | |
|
771 | 0 | if (timeout > 0) { |
772 | 0 | struct timeval tval; |
773 | 0 | tval.tv_sec = (timeout / 1000); |
774 | 0 | tval.tv_usec = (timeout % 1000) * 1000; |
775 | |
|
776 | 0 | if (select(maxfd + 1, &rd, &wr, &ex, &tval) < 0) return -1; |
777 | 0 | } |
778 | 0 | } |
779 | | |
780 | 0 | errm = curl_multi_perform(fp->multi, &nrunning); |
781 | 0 | fp->perform_again = 0; |
782 | 0 | if (errm == CURLM_CALL_MULTI_PERFORM) fp->perform_again = 1; |
783 | 0 | else if (errm != CURLM_OK) { errno = multi_errno(errm); return -1; } |
784 | | |
785 | 0 | if (nrunning < fp->nrunning) process_messages(fp); |
786 | 0 | return 0; |
787 | 0 | } |
788 | | |
789 | | |
790 | | static size_t recv_callback(char *ptr, size_t size, size_t nmemb, void *fpv) |
791 | 0 | { |
792 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
793 | 0 | size_t n = size * nmemb; |
794 | |
|
795 | 0 | if (n > fp->buffer.len) { |
796 | 0 | fp->paused = 1; |
797 | 0 | return CURL_WRITEFUNC_PAUSE; |
798 | 0 | } |
799 | 0 | else if (n == 0) return 0; |
800 | | |
801 | 0 | memcpy(fp->buffer.ptr.rd, ptr, n); |
802 | 0 | fp->buffer.ptr.rd += n; |
803 | 0 | fp->buffer.len -= n; |
804 | 0 | return n; |
805 | 0 | } |
806 | | |
807 | | |
808 | | static size_t header_callback(void *contents, size_t size, size_t nmemb, |
809 | | void *userp) |
810 | 0 | { |
811 | 0 | size_t realsize = size * nmemb; |
812 | 0 | kstring_t *resp = (kstring_t *)userp; |
813 | |
|
814 | 0 | if (kputsn((const char *)contents, realsize, resp) == EOF) { |
815 | 0 | return 0; |
816 | 0 | } |
817 | | |
818 | 0 | return realsize; |
819 | 0 | } |
820 | | |
821 | | |
822 | | static void refresh_retry_config(void) |
823 | 0 | { |
824 | 0 | const char *val; |
825 | 0 | if ((val = getenv("HTS_RETRY_MAX")) != NULL) |
826 | 0 | curl.retry_max = atoi(val); |
827 | 0 | if ((val = getenv("HTS_RETRY_DELAY")) != NULL) |
828 | 0 | curl.retry_delay_ms = atol(val); |
829 | 0 | if ((val = getenv("HTS_RETRY_MAX_DELAY")) != NULL) |
830 | 0 | curl.retry_max_delay_ms = atol(val); |
831 | 0 | if ((val = getenv("HTS_LOW_SPEED_LIMIT")) != NULL) |
832 | 0 | curl.low_speed_limit = atol(val); |
833 | 0 | if ((val = getenv("HTS_LOW_SPEED_TIME")) != NULL) |
834 | 0 | curl.low_speed_time = atol(val); |
835 | 0 | } |
836 | | |
837 | | static void retry_sleep(long delay_ms) |
838 | 0 | { |
839 | | #ifdef _WIN32 |
840 | | Sleep(delay_ms); |
841 | | #else |
842 | 0 | struct timespec ts; |
843 | 0 | ts.tv_sec = delay_ms / 1000; |
844 | 0 | ts.tv_nsec = (delay_ms % 1000) * 1000000L; |
845 | 0 | nanosleep(&ts, NULL); |
846 | 0 | #endif |
847 | 0 | } |
848 | | |
849 | | static int retry_reconnect(hFILE_libcurl *fp, off_t pos) |
850 | 0 | { |
851 | 0 | int attempt; |
852 | 0 | long delay = curl.retry_delay_ms; |
853 | 0 | int save_can_seek; |
854 | |
|
855 | 0 | for (attempt = 0; attempt < curl.retry_max; attempt++) { |
856 | 0 | hts_log_warning("Retrying connection (attempt %d/%d) at offset %lld", |
857 | 0 | attempt + 1, curl.retry_max, (long long) pos); |
858 | 0 | retry_sleep(delay); |
859 | |
|
860 | 0 | save_can_seek = fp->can_seek; |
861 | 0 | if (restart_from_position(fp, pos) == 0) { |
862 | 0 | fp->needs_reconnect = 0; |
863 | 0 | return 0; |
864 | 0 | } |
865 | | // restart_from_position sets can_seek=0 on failure; restore it |
866 | 0 | fp->can_seek = save_can_seek; |
867 | | |
868 | | // Exponential backoff |
869 | 0 | delay *= 2; |
870 | 0 | if (delay > curl.retry_max_delay_ms) |
871 | 0 | delay = curl.retry_max_delay_ms; |
872 | 0 | } |
873 | | |
874 | 0 | return -1; |
875 | 0 | } |
876 | | |
877 | | static ssize_t libcurl_read(hFILE *fpv, void *bufferv, size_t nbytes) |
878 | 0 | { |
879 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
880 | 0 | char *buffer = (char *) bufferv; |
881 | 0 | off_t to_skip = -1; |
882 | 0 | ssize_t got = 0; |
883 | 0 | CURLcode err; |
884 | 0 | int retry_attempts = 0; |
885 | | |
886 | | // Handle deferred reconnection from a previous retryable error |
887 | 0 | if (fp->needs_reconnect) { |
888 | 0 | if (retry_reconnect(fp, fp->stream_pos) < 0) { |
889 | 0 | errno = EIO; |
890 | 0 | return -1; |
891 | 0 | } |
892 | 0 | } |
893 | | |
894 | 0 | retry: |
895 | 0 | if (fp->delayed_seek >= 0) { |
896 | 0 | assert(fp->base.offset == fp->delayed_seek); |
897 | |
|
898 | 0 | if (fp->preserved |
899 | 0 | && fp->last_offset > fp->delayed_seek |
900 | 0 | && fp->last_offset - fp->preserved_bytes <= fp->delayed_seek) { |
901 | | // Can use buffer contents copied when seeking started, to |
902 | | // avoid having to re-read data discarded by hseek(). |
903 | | // Note fp->last_offset is the offset of the *end* of the |
904 | | // preserved buffer. |
905 | 0 | size_t n = fp->last_offset - fp->delayed_seek; |
906 | 0 | char *start = fp->preserved + (fp->preserved_bytes - n); |
907 | 0 | size_t bytes = n <= nbytes ? n : nbytes; |
908 | 0 | memcpy(buffer, start, bytes); |
909 | 0 | if (bytes < n) { // Part of the preserved buffer still left |
910 | 0 | fp->delayed_seek += bytes; |
911 | 0 | } else { |
912 | 0 | fp->last_offset = fp->delayed_seek = -1; |
913 | 0 | } |
914 | 0 | fp->stream_pos += bytes; |
915 | 0 | return bytes; |
916 | 0 | } |
917 | | |
918 | 0 | if (fp->last_offset >= 0 |
919 | 0 | && fp->delayed_seek > fp->last_offset |
920 | 0 | && fp->delayed_seek - fp->last_offset < MIN_SEEK_FORWARD) { |
921 | | // If not seeking far, just read the data and throw it away. This |
922 | | // is likely to be quicker than opening a new stream |
923 | 0 | to_skip = fp->delayed_seek - fp->last_offset; |
924 | 0 | } else { |
925 | 0 | if (restart_from_position(fp, fp->delayed_seek) < 0) { |
926 | 0 | return -1; |
927 | 0 | } |
928 | 0 | } |
929 | 0 | fp->delayed_seek = -1; |
930 | 0 | fp->last_offset = -1; |
931 | 0 | fp->preserved_bytes = 0; |
932 | 0 | } |
933 | | |
934 | 0 | do { |
935 | 0 | fp->buffer.ptr.rd = buffer; |
936 | 0 | fp->buffer.len = nbytes; |
937 | 0 | fp->paused = 0; |
938 | 0 | if (!fp->finished) { |
939 | 0 | err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); |
940 | 0 | if (err != CURLE_OK) { |
941 | 0 | errno = easy_errno(fp->easy, err); |
942 | 0 | return -1; |
943 | 0 | } |
944 | 0 | } |
945 | | |
946 | 0 | while (! fp->paused && ! fp->finished) { |
947 | 0 | if (wait_perform(fp) < 0) return -1; |
948 | 0 | } |
949 | | |
950 | 0 | got = fp->buffer.ptr.rd - buffer; |
951 | |
|
952 | 0 | if (to_skip >= 0) { // Skipping over a small seek |
953 | 0 | if (got <= to_skip) { // Need to skip more data |
954 | 0 | to_skip -= got; |
955 | 0 | } else { |
956 | 0 | got -= to_skip; |
957 | 0 | if (got > 0) { // If enough was skipped, return the rest |
958 | 0 | memmove(buffer, buffer + to_skip, got); |
959 | 0 | to_skip = -1; |
960 | 0 | } |
961 | 0 | } |
962 | 0 | } |
963 | 0 | } while (to_skip >= 0 && ! fp->finished); |
964 | 0 | fp->buffer.ptr.rd = NULL; |
965 | 0 | fp->buffer.len = 0; |
966 | |
|
967 | 0 | if (fp->finished && fp->final_result != CURLE_OK) { |
968 | 0 | if (is_retryable(fp->easy, fp->final_result) |
969 | 0 | && curl.retry_max > 0) { |
970 | 0 | if (got > 0) { |
971 | | // Return partial data; defer reconnection to next call |
972 | 0 | fp->needs_reconnect = 1; |
973 | 0 | fp->stream_pos += got; |
974 | 0 | return got; |
975 | 0 | } |
976 | | // No data; retry inline |
977 | 0 | if (retry_attempts < curl.retry_max) { |
978 | 0 | long delay = curl.retry_delay_ms; |
979 | 0 | int save_can_seek = fp->can_seek; |
980 | 0 | int i; |
981 | 0 | for (i = 0; i < retry_attempts; i++) { |
982 | 0 | delay *= 2; |
983 | 0 | if (delay > curl.retry_max_delay_ms) { |
984 | 0 | delay = curl.retry_max_delay_ms; |
985 | 0 | break; |
986 | 0 | } |
987 | 0 | } |
988 | 0 | hts_log_warning("Retrying read (attempt %d/%d) at offset %lld", |
989 | 0 | retry_attempts + 1, curl.retry_max, |
990 | 0 | (long long) fp->stream_pos); |
991 | 0 | retry_sleep(delay); |
992 | 0 | if (restart_from_position(fp, fp->stream_pos) == 0) { |
993 | 0 | retry_attempts++; |
994 | 0 | goto retry; |
995 | 0 | } |
996 | 0 | fp->can_seek = save_can_seek; |
997 | 0 | } |
998 | 0 | } |
999 | 0 | errno = easy_errno(fp->easy, fp->final_result); |
1000 | 0 | return -1; |
1001 | 0 | } |
1002 | | |
1003 | 0 | fp->stream_pos += got; |
1004 | 0 | return got; |
1005 | 0 | } |
1006 | | |
1007 | | static size_t send_callback(char *ptr, size_t size, size_t nmemb, void *fpv) |
1008 | 0 | { |
1009 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
1010 | 0 | size_t n = size * nmemb; |
1011 | |
|
1012 | 0 | if (fp->buffer.len == 0) { |
1013 | | // Send buffer is empty; normally pause, or signal EOF if we're closing |
1014 | 0 | if (fp->closing) return 0; |
1015 | 0 | else { fp->paused = 1; return CURL_READFUNC_PAUSE; } |
1016 | 0 | } |
1017 | | |
1018 | 0 | if (n > fp->buffer.len) n = fp->buffer.len; |
1019 | 0 | memcpy(ptr, fp->buffer.ptr.wr, n); |
1020 | 0 | fp->buffer.ptr.wr += n; |
1021 | 0 | fp->buffer.len -= n; |
1022 | 0 | return n; |
1023 | 0 | } |
1024 | | |
1025 | | static ssize_t libcurl_write(hFILE *fpv, const void *bufferv, size_t nbytes) |
1026 | 0 | { |
1027 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
1028 | 0 | const char *buffer = (const char *) bufferv; |
1029 | 0 | CURLcode err; |
1030 | |
|
1031 | 0 | fp->buffer.ptr.wr = buffer; |
1032 | 0 | fp->buffer.len = nbytes; |
1033 | 0 | fp->paused = 0; |
1034 | 0 | err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); |
1035 | 0 | if (err != CURLE_OK) { errno = easy_errno(fp->easy, err); return -1; } |
1036 | | |
1037 | 0 | while (! fp->paused && ! fp->finished) |
1038 | 0 | if (wait_perform(fp) < 0) return -1; |
1039 | | |
1040 | 0 | nbytes = fp->buffer.ptr.wr - buffer; |
1041 | 0 | fp->buffer.ptr.wr = NULL; |
1042 | 0 | fp->buffer.len = 0; |
1043 | |
|
1044 | 0 | if (fp->finished && fp->final_result != CURLE_OK) { |
1045 | 0 | errno = easy_errno(fp->easy, fp->final_result); |
1046 | 0 | return -1; |
1047 | 0 | } |
1048 | | |
1049 | 0 | return nbytes; |
1050 | 0 | } |
1051 | | |
1052 | | static void preserve_buffer_content(hFILE_libcurl *fp) |
1053 | 0 | { |
1054 | 0 | if (fp->base.begin == fp->base.end) { |
1055 | 0 | fp->preserved_bytes = 0; |
1056 | 0 | return; |
1057 | 0 | } |
1058 | 0 | if (!fp->preserved |
1059 | 0 | || fp->preserved_size < fp->base.limit - fp->base.buffer) { |
1060 | 0 | fp->preserved = malloc(fp->base.limit - fp->base.buffer); |
1061 | 0 | if (!fp->preserved) return; |
1062 | 0 | fp->preserved_size = fp->base.limit - fp->base.buffer; |
1063 | 0 | } |
1064 | | |
1065 | 0 | assert(fp->base.end - fp->base.begin <= fp->preserved_size); |
1066 | |
|
1067 | 0 | memcpy(fp->preserved, fp->base.begin, fp->base.end - fp->base.begin); |
1068 | 0 | fp->preserved_bytes = fp->base.end - fp->base.begin; |
1069 | 0 | return; |
1070 | 0 | } |
1071 | | |
1072 | | static off_t libcurl_seek(hFILE *fpv, off_t offset, int whence) |
1073 | 0 | { |
1074 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
1075 | 0 | off_t origin, pos; |
1076 | |
|
1077 | 0 | if (!fp->is_read || !fp->can_seek) { |
1078 | | // Cowardly refuse to seek when writing or a previous seek failed. |
1079 | 0 | errno = ESPIPE; |
1080 | 0 | return -1; |
1081 | 0 | } |
1082 | | |
1083 | 0 | switch (whence) { |
1084 | 0 | case SEEK_SET: |
1085 | 0 | origin = 0; |
1086 | 0 | break; |
1087 | 0 | case SEEK_CUR: |
1088 | 0 | errno = ENOSYS; |
1089 | 0 | return -1; |
1090 | 0 | case SEEK_END: |
1091 | 0 | if (fp->file_size < 0) { errno = ESPIPE; return -1; } |
1092 | 0 | origin = fp->file_size; |
1093 | 0 | break; |
1094 | 0 | default: |
1095 | 0 | errno = EINVAL; |
1096 | 0 | return -1; |
1097 | 0 | } |
1098 | | |
1099 | | // Check 0 <= origin+offset < fp->file_size carefully, avoiding overflow |
1100 | 0 | if ((offset < 0)? origin + offset < 0 |
1101 | 0 | : (fp->file_size >= 0 && offset > fp->file_size - origin)) { |
1102 | 0 | errno = EINVAL; |
1103 | 0 | return -1; |
1104 | 0 | } |
1105 | | |
1106 | 0 | pos = origin + offset; |
1107 | |
|
1108 | 0 | if (fp->tried_seek) { |
1109 | | /* Seeking has worked at least once, so now we can delay doing |
1110 | | the actual work until the next read. This avoids lots of pointless |
1111 | | http or ftp reconnections if the caller does lots of seeks |
1112 | | without any intervening reads. */ |
1113 | 0 | if (fp->delayed_seek < 0) { |
1114 | 0 | fp->last_offset = fp->base.offset + (fp->base.end - fp->base.buffer); |
1115 | | // Stash the current hFILE buffer content in case it's useful later |
1116 | 0 | preserve_buffer_content(fp); |
1117 | 0 | } |
1118 | 0 | fp->delayed_seek = pos; |
1119 | 0 | fp->stream_pos = pos; |
1120 | 0 | return pos; |
1121 | 0 | } |
1122 | | |
1123 | 0 | if (restart_from_position(fp, pos) < 0) { |
1124 | | /* This value for errno may not be entirely true, but the caller may be |
1125 | | able to carry on with the existing handle. */ |
1126 | 0 | errno = ESPIPE; |
1127 | 0 | return -1; |
1128 | 0 | } |
1129 | | |
1130 | 0 | fp->tried_seek = 1; |
1131 | 0 | fp->stream_pos = pos; |
1132 | 0 | return pos; |
1133 | 0 | } |
1134 | | |
1135 | 0 | static int restart_from_position(hFILE_libcurl *fp, off_t pos) { |
1136 | 0 | hFILE_libcurl temp_fp; |
1137 | 0 | CURLcode err; |
1138 | 0 | CURLMcode errm; |
1139 | 0 | int update_headers = 0; |
1140 | 0 | int save_errno = 0; |
1141 | | |
1142 | | // TODO If we seem to be doing random access, use CURLOPT_RANGE to do |
1143 | | // limited reads (e.g. about a BAM block!) so seeking can reuse the |
1144 | | // existing connection more often. |
1145 | | |
1146 | | // Get new headers from the callback (if defined). This changes the |
1147 | | // headers in fp before it gets duplicated, but they should be have been |
1148 | | // sent by now. |
1149 | |
|
1150 | 0 | if (fp->headers.callback) { |
1151 | 0 | if (add_callback_headers(fp) != 0) |
1152 | 0 | return -1; |
1153 | 0 | update_headers = 1; |
1154 | 0 | } |
1155 | 0 | if (fp->headers.auth_hdr_num > 0 && fp->headers.auth) { |
1156 | 0 | if (add_auth_header(fp) != 0) |
1157 | 0 | return -1; |
1158 | 0 | update_headers = 1; |
1159 | 0 | } |
1160 | 0 | if (update_headers) { |
1161 | 0 | struct curl_slist *list = get_header_list(fp); |
1162 | 0 | if (list) { |
1163 | 0 | err = curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list); |
1164 | 0 | if (err != CURLE_OK) { |
1165 | 0 | errno = easy_errno(fp->easy,err); |
1166 | 0 | return -1; |
1167 | 0 | } |
1168 | 0 | } |
1169 | 0 | } |
1170 | | |
1171 | | /* |
1172 | | Duplicate the easy handle, and use CURLOPT_RESUME_FROM_LARGE to open |
1173 | | a new request to the server, reading from the location that we want |
1174 | | to seek to. If the new request works and returns the correct data, |
1175 | | the original easy handle in *fp is closed and replaced with the new |
1176 | | one. If not, we close the new handle and leave *fp unchanged. |
1177 | | */ |
1178 | | |
1179 | 0 | memcpy(&temp_fp, fp, sizeof(temp_fp)); |
1180 | 0 | temp_fp.buffer.len = 0; |
1181 | 0 | temp_fp.buffer.ptr.rd = NULL; |
1182 | 0 | temp_fp.easy = curl_easy_duphandle(fp->easy); |
1183 | 0 | if (!temp_fp.easy) |
1184 | 0 | goto early_error; |
1185 | | |
1186 | 0 | err = curl_easy_setopt(temp_fp.easy, CURLOPT_RESUME_FROM_LARGE,(curl_off_t)pos); |
1187 | 0 | err |= curl_easy_setopt(temp_fp.easy, CURLOPT_PRIVATE, &temp_fp); |
1188 | 0 | err |= curl_easy_setopt(temp_fp.easy, CURLOPT_WRITEDATA, &temp_fp); |
1189 | 0 | if (err != CURLE_OK) { |
1190 | 0 | save_errno = easy_errno(temp_fp.easy, err); |
1191 | 0 | goto error; |
1192 | 0 | } |
1193 | | |
1194 | 0 | temp_fp.buffer.len = 0; // Ensures we only read the response headers |
1195 | 0 | temp_fp.paused = temp_fp.finished = 0; |
1196 | | |
1197 | | // fp->multi and temp_fp.multi are the same. |
1198 | 0 | errm = curl_multi_add_handle(fp->multi, temp_fp.easy); |
1199 | 0 | if (errm != CURLM_OK) { |
1200 | 0 | save_errno = multi_errno(errm); |
1201 | 0 | goto error; |
1202 | 0 | } |
1203 | 0 | temp_fp.nrunning = ++fp->nrunning; |
1204 | |
|
1205 | 0 | while (! temp_fp.paused && ! temp_fp.finished) |
1206 | 0 | if (wait_perform(&temp_fp) < 0) { |
1207 | 0 | save_errno = errno; |
1208 | 0 | goto error_remove; |
1209 | 0 | } |
1210 | | |
1211 | 0 | if (temp_fp.finished && temp_fp.final_result != CURLE_OK) { |
1212 | 0 | save_errno = easy_errno(temp_fp.easy, temp_fp.final_result); |
1213 | 0 | goto error_remove; |
1214 | 0 | } |
1215 | | |
1216 | | // We've got a good response, close the original connection and |
1217 | | // replace it with the new one. |
1218 | | |
1219 | 0 | errm = curl_multi_remove_handle(fp->multi, fp->easy); |
1220 | 0 | if (errm != CURLM_OK) { |
1221 | | // Clean up as much as possible |
1222 | 0 | curl_easy_reset(temp_fp.easy); |
1223 | 0 | if (curl_multi_remove_handle(fp->multi, temp_fp.easy) == CURLM_OK) { |
1224 | 0 | fp->nrunning--; |
1225 | 0 | curl_easy_cleanup(temp_fp.easy); |
1226 | 0 | } |
1227 | 0 | save_errno = multi_errno(errm); |
1228 | 0 | goto early_error; |
1229 | 0 | } |
1230 | 0 | fp->nrunning--; |
1231 | |
|
1232 | 0 | curl_easy_cleanup(fp->easy); |
1233 | 0 | fp->easy = temp_fp.easy; |
1234 | 0 | err = curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp); |
1235 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp); |
1236 | 0 | if (err != CURLE_OK) { |
1237 | 0 | save_errno = easy_errno(fp->easy, err); |
1238 | 0 | curl_easy_reset(fp->easy); |
1239 | 0 | errno = save_errno; |
1240 | 0 | return -1; |
1241 | 0 | } |
1242 | 0 | fp->buffer.len = 0; |
1243 | 0 | fp->paused = temp_fp.paused; |
1244 | 0 | fp->finished = temp_fp.finished; |
1245 | 0 | fp->perform_again = temp_fp.perform_again; |
1246 | 0 | fp->final_result = temp_fp.final_result; |
1247 | |
|
1248 | 0 | return 0; |
1249 | | |
1250 | 0 | error_remove: |
1251 | 0 | curl_easy_reset(temp_fp.easy); // Ensure no pointers to on-stack temp_fp |
1252 | 0 | errm = curl_multi_remove_handle(fp->multi, temp_fp.easy); |
1253 | 0 | if (errm != CURLM_OK) { |
1254 | 0 | errno = multi_errno(errm); |
1255 | 0 | return -1; |
1256 | 0 | } |
1257 | 0 | fp->nrunning--; |
1258 | 0 | error: |
1259 | 0 | curl_easy_cleanup(temp_fp.easy); |
1260 | 0 | early_error: |
1261 | 0 | fp->can_seek = 0; // Don't try to seek again |
1262 | 0 | if (save_errno) |
1263 | 0 | errno = save_errno; |
1264 | 0 | return -1; |
1265 | 0 | } |
1266 | | |
1267 | | static int libcurl_close(hFILE *fpv) |
1268 | 0 | { |
1269 | 0 | hFILE_libcurl *fp = (hFILE_libcurl *) fpv; |
1270 | 0 | CURLcode err; |
1271 | 0 | CURLMcode errm; |
1272 | 0 | int save_errno = 0; |
1273 | | |
1274 | | // Before closing the file, unpause it and perform on it so that uploads |
1275 | | // have the opportunity to signal EOF to the server -- see send_callback(). |
1276 | |
|
1277 | 0 | fp->buffer.len = 0; |
1278 | 0 | fp->closing = 1; |
1279 | 0 | fp->paused = 0; |
1280 | 0 | if (!fp->finished) { |
1281 | 0 | err = curl_easy_pause(fp->easy, CURLPAUSE_CONT); |
1282 | 0 | if (err != CURLE_OK) save_errno = easy_errno(fp->easy, err); |
1283 | 0 | } |
1284 | |
|
1285 | 0 | while (save_errno == 0 && ! fp->paused && ! fp->finished) |
1286 | 0 | if (wait_perform(fp) < 0) save_errno = errno; |
1287 | |
|
1288 | 0 | if (fp->finished && fp->final_result != CURLE_OK) |
1289 | 0 | save_errno = easy_errno(fp->easy, fp->final_result); |
1290 | |
|
1291 | 0 | errm = curl_multi_remove_handle(fp->multi, fp->easy); |
1292 | 0 | if (errm != CURLM_OK && save_errno == 0) save_errno = multi_errno(errm); |
1293 | 0 | fp->nrunning--; |
1294 | |
|
1295 | 0 | curl_easy_cleanup(fp->easy); |
1296 | 0 | curl_multi_cleanup(fp->multi); |
1297 | |
|
1298 | 0 | if (fp->headers.callback) // Tell callback to free any data it needs to |
1299 | 0 | fp->headers.callback(fp->headers.callback_data, NULL); |
1300 | 0 | free_headers(&fp->headers.fixed, 1); |
1301 | 0 | free_headers(&fp->headers.extra, 1); |
1302 | |
|
1303 | 0 | free(fp->preserved); |
1304 | |
|
1305 | 0 | if (save_errno) { errno = save_errno; return -1; } |
1306 | 0 | else return 0; |
1307 | 0 | } |
1308 | | |
1309 | | static const struct hFILE_backend libcurl_backend = |
1310 | | { |
1311 | | libcurl_read, libcurl_write, libcurl_seek, NULL, libcurl_close |
1312 | | }; |
1313 | | |
1314 | | static hFILE * |
1315 | | libcurl_open(const char *url, const char *modes, http_headers *headers) |
1316 | 0 | { |
1317 | 0 | hFILE_libcurl *fp; |
1318 | 0 | struct curl_slist *list; |
1319 | 0 | char mode; |
1320 | 0 | const char *s; |
1321 | 0 | CURLcode err; |
1322 | 0 | CURLMcode errm; |
1323 | 0 | int save, is_recursive; |
1324 | 0 | kstring_t in_header = {0, 0, NULL}; |
1325 | 0 | long response; |
1326 | |
|
1327 | 0 | refresh_retry_config(); |
1328 | |
|
1329 | 0 | is_recursive = strchr(modes, 'R') != NULL; |
1330 | |
|
1331 | 0 | if ((s = strpbrk(modes, "rwa+")) != NULL) { |
1332 | 0 | mode = *s; |
1333 | 0 | if (strpbrk(&s[1], "rwa+")) mode = 'e'; |
1334 | 0 | } |
1335 | 0 | else mode = '\0'; |
1336 | |
|
1337 | 0 | if (mode != 'r' && mode != 'w') { errno = EINVAL; goto early_error; } |
1338 | | |
1339 | 0 | fp = (hFILE_libcurl *) hfile_init(sizeof (hFILE_libcurl), modes, 0); |
1340 | 0 | if (fp == NULL) goto early_error; |
1341 | | |
1342 | 0 | if (headers) { |
1343 | 0 | fp->headers = *headers; |
1344 | 0 | } else { |
1345 | 0 | memset(&fp->headers, 0, sizeof(fp->headers)); |
1346 | 0 | fp->headers.fail_on_error = 1; |
1347 | 0 | } |
1348 | |
|
1349 | 0 | fp->file_size = -1; |
1350 | 0 | fp->buffer.ptr.rd = NULL; |
1351 | 0 | fp->buffer.len = 0; |
1352 | 0 | fp->final_result = (CURLcode) -1; |
1353 | 0 | fp->paused = fp->closing = fp->finished = fp->perform_again = 0; |
1354 | 0 | fp->can_seek = 1; |
1355 | 0 | fp->tried_seek = 0; |
1356 | 0 | fp->needs_reconnect = 0; |
1357 | 0 | fp->delayed_seek = fp->last_offset = -1; |
1358 | 0 | fp->stream_pos = 0; |
1359 | 0 | fp->preserved = NULL; |
1360 | 0 | fp->preserved_bytes = fp->preserved_size = 0; |
1361 | 0 | fp->is_recursive = is_recursive; |
1362 | 0 | fp->nrunning = 0; |
1363 | 0 | fp->easy = NULL; |
1364 | |
|
1365 | 0 | fp->multi = curl_multi_init(); |
1366 | 0 | if (fp->multi == NULL) { errno = ENOMEM; goto error; } |
1367 | | |
1368 | 0 | fp->easy = curl_easy_init(); |
1369 | 0 | if (fp->easy == NULL) { errno = ENOMEM; goto error; } |
1370 | | |
1371 | | // Make a route to the hFILE_libcurl* given just a CURL* easy handle |
1372 | 0 | err = curl_easy_setopt(fp->easy, CURLOPT_PRIVATE, fp); |
1373 | | |
1374 | | // Avoid many repeated CWD calls with FTP, instead requesting the filename |
1375 | | // by full path (but not strictly compliant with RFC1738). |
1376 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_FTP_FILEMETHOD, |
1377 | 0 | (long) CURLFTPMETHOD_NOCWD); |
1378 | |
|
1379 | 0 | if (mode == 'r') { |
1380 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEFUNCTION, recv_callback); |
1381 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_WRITEDATA, fp); |
1382 | 0 | fp->is_read = 1; |
1383 | 0 | } |
1384 | 0 | else { |
1385 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_READFUNCTION, send_callback); |
1386 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_READDATA, fp); |
1387 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_UPLOAD, 1L); |
1388 | 0 | if (append_header(&fp->headers.fixed, |
1389 | 0 | "Transfer-Encoding: chunked", 1) < 0) |
1390 | 0 | goto error; |
1391 | 0 | fp->is_read = 0; |
1392 | 0 | } |
1393 | | |
1394 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_SHARE, curl.share); |
1395 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_URL, url); |
1396 | 0 | { |
1397 | 0 | char* env_curl_ca_bundle = getenv("CURL_CA_BUNDLE"); |
1398 | 0 | if (env_curl_ca_bundle) { |
1399 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_CAINFO, env_curl_ca_bundle); |
1400 | 0 | } |
1401 | 0 | } |
1402 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_USERAGENT, curl.useragent.s); |
1403 | 0 | if (curl.low_speed_limit > 0 && curl.low_speed_time > 0) { |
1404 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_LOW_SPEED_LIMIT, |
1405 | 0 | curl.low_speed_limit); |
1406 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_LOW_SPEED_TIME, |
1407 | 0 | curl.low_speed_time); |
1408 | 0 | } |
1409 | 0 | if (fp->headers.callback) { |
1410 | 0 | if (add_callback_headers(fp) != 0) goto error; |
1411 | 0 | } |
1412 | 0 | if (get_auth_token(fp, url) < 0) |
1413 | 0 | goto error; |
1414 | 0 | if ((list = get_header_list(fp)) != NULL) |
1415 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HTTPHEADER, list); |
1416 | |
|
1417 | 0 | if (hts_verbose <= 8 && fp->headers.fail_on_error) |
1418 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_FAILONERROR, 1L); |
1419 | 0 | if (hts_verbose >= 8) |
1420 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_VERBOSE, 1L); |
1421 | |
|
1422 | 0 | if (fp->headers.redirect) { |
1423 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, header_callback); |
1424 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, (void *)&in_header); |
1425 | 0 | } else { |
1426 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_FOLLOWLOCATION, 1L); |
1427 | 0 | } |
1428 | |
|
1429 | 0 | if (err != 0) { errno = ENOSYS; goto error; } |
1430 | | |
1431 | 0 | errm = curl_multi_add_handle(fp->multi, fp->easy); |
1432 | 0 | if (errm != CURLM_OK) { errno = multi_errno(errm); goto error; } |
1433 | 0 | fp->nrunning++; |
1434 | |
|
1435 | 0 | while (! fp->paused && ! fp->finished) { |
1436 | 0 | if (wait_perform(fp) < 0) goto error_remove; |
1437 | 0 | } |
1438 | | |
1439 | 0 | curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, &response); |
1440 | 0 | if (fp->headers.http_response_ptr) { |
1441 | 0 | *fp->headers.http_response_ptr = response; |
1442 | 0 | } |
1443 | |
|
1444 | 0 | if (fp->finished && fp->final_result != CURLE_OK) { |
1445 | 0 | if (is_retryable(fp->easy, fp->final_result) |
1446 | 0 | && curl.retry_max > 0) { |
1447 | 0 | long delay = curl.retry_delay_ms; |
1448 | 0 | int attempt, save_can_seek; |
1449 | 0 | for (attempt = 0; attempt < curl.retry_max; attempt++) { |
1450 | 0 | hts_log_warning("Retrying open (attempt %d/%d)", |
1451 | 0 | attempt + 1, curl.retry_max); |
1452 | 0 | retry_sleep(delay); |
1453 | 0 | save_can_seek = fp->can_seek; |
1454 | 0 | if (restart_from_position(fp, 0) == 0) { |
1455 | 0 | if (!fp->finished || fp->final_result == CURLE_OK) |
1456 | 0 | goto open_ok; |
1457 | 0 | if (!is_retryable(fp->easy, fp->final_result)) |
1458 | 0 | break; |
1459 | 0 | } |
1460 | 0 | fp->can_seek = save_can_seek; |
1461 | 0 | delay *= 2; |
1462 | 0 | if (delay > curl.retry_max_delay_ms) |
1463 | 0 | delay = curl.retry_max_delay_ms; |
1464 | 0 | } |
1465 | 0 | } |
1466 | 0 | errno = easy_errno(fp->easy, fp->final_result); |
1467 | 0 | goto error_remove; |
1468 | 0 | } |
1469 | | |
1470 | 0 | open_ok: |
1471 | 0 | if (fp->headers.redirect) { |
1472 | 0 | if (response >= 300 && response < 400) { // redirection |
1473 | 0 | kstring_t new_url = {0, 0, NULL}; |
1474 | |
|
1475 | 0 | if (fp->headers.redirect(fp->headers.redirect_data, response, |
1476 | 0 | &in_header, &new_url)) { |
1477 | 0 | errno = ENOSYS; |
1478 | 0 | goto error; |
1479 | 0 | } |
1480 | | |
1481 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_URL, new_url.s); |
1482 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL); |
1483 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL); |
1484 | 0 | free(ks_release(&in_header)); |
1485 | |
|
1486 | 0 | if (err != 0) { errno = ENOSYS; goto error; } |
1487 | 0 | free(ks_release(&new_url)); |
1488 | |
|
1489 | 0 | if (restart_from_position(fp, 0) < 0) { |
1490 | 0 | goto error_remove; |
1491 | 0 | } |
1492 | | |
1493 | 0 | if (fp->headers.http_response_ptr) { |
1494 | 0 | curl_easy_getinfo(fp->easy, CURLINFO_RESPONSE_CODE, |
1495 | 0 | fp->headers.http_response_ptr); |
1496 | 0 | } |
1497 | |
|
1498 | 0 | if (fp->finished && fp->final_result != CURLE_OK) { |
1499 | 0 | errno = easy_errno(fp->easy, fp->final_result); |
1500 | 0 | goto error_remove; |
1501 | 0 | } |
1502 | 0 | } else { |
1503 | | // we no longer need to look at the headers |
1504 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERFUNCTION, NULL); |
1505 | 0 | err |= curl_easy_setopt(fp->easy, CURLOPT_HEADERDATA, NULL); |
1506 | 0 | free(ks_release(&in_header)); |
1507 | |
|
1508 | 0 | if (err != 0) { errno = ENOSYS; goto error; } |
1509 | 0 | } |
1510 | 0 | } |
1511 | | |
1512 | 0 | if (mode == 'r') { |
1513 | 0 | #if LIBCURL_VERSION_NUM >= 0x073700 // 7.55.0 |
1514 | 0 | curl_off_t offset; |
1515 | |
|
1516 | 0 | if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, |
1517 | 0 | &offset) == CURLE_OK && offset > 0) |
1518 | 0 | fp->file_size = (off_t) offset; |
1519 | | #else |
1520 | | double dval; |
1521 | | |
1522 | | if (curl_easy_getinfo(fp->easy, CURLINFO_CONTENT_LENGTH_DOWNLOAD, |
1523 | | &dval) == CURLE_OK && dval >= 0.0) |
1524 | | fp->file_size = (off_t) (dval + 0.1); |
1525 | | #endif |
1526 | 0 | } |
1527 | 0 | fp->base.backend = &libcurl_backend; |
1528 | 0 | return &fp->base; |
1529 | | |
1530 | 0 | error_remove: |
1531 | 0 | save = errno; |
1532 | 0 | (void) curl_multi_remove_handle(fp->multi, fp->easy); |
1533 | 0 | fp->nrunning--; |
1534 | 0 | errno = save; |
1535 | |
|
1536 | 0 | error: |
1537 | 0 | if (fp->headers.redirect) free(in_header.s); |
1538 | 0 | save = errno; |
1539 | 0 | if (fp->easy) curl_easy_cleanup(fp->easy); |
1540 | 0 | if (fp->multi) curl_multi_cleanup(fp->multi); |
1541 | 0 | free_headers(&fp->headers.extra, 1); |
1542 | 0 | hfile_destroy((hFILE *) fp); |
1543 | 0 | errno = save; |
1544 | 0 | return NULL; |
1545 | | |
1546 | 0 | early_error: |
1547 | 0 | return NULL; |
1548 | 0 | } |
1549 | | |
1550 | | static hFILE *hopen_libcurl(const char *url, const char *modes) |
1551 | 0 | { |
1552 | 0 | return libcurl_open(url, modes, NULL); |
1553 | 0 | } |
1554 | | |
1555 | | static int parse_va_list(http_headers *headers, va_list args) |
1556 | 0 | { |
1557 | 0 | const char *argtype; |
1558 | |
|
1559 | 0 | while ((argtype = va_arg(args, const char *)) != NULL) |
1560 | 0 | if (strcmp(argtype, "httphdr:v") == 0) { |
1561 | 0 | const char **hdr; |
1562 | 0 | for (hdr = va_arg(args, const char **); *hdr; hdr++) { |
1563 | 0 | if (append_header(&headers->fixed, *hdr, 1) < 0) |
1564 | 0 | return -1; |
1565 | 0 | if (is_authorization(*hdr)) |
1566 | 0 | headers->auth_hdr_num = -1; |
1567 | 0 | } |
1568 | 0 | } |
1569 | 0 | else if (strcmp(argtype, "httphdr:l") == 0) { |
1570 | 0 | const char *hdr; |
1571 | 0 | while ((hdr = va_arg(args, const char *)) != NULL) { |
1572 | 0 | if (append_header(&headers->fixed, hdr, 1) < 0) |
1573 | 0 | return -1; |
1574 | 0 | if (is_authorization(hdr)) |
1575 | 0 | headers->auth_hdr_num = -1; |
1576 | 0 | } |
1577 | 0 | } |
1578 | 0 | else if (strcmp(argtype, "httphdr") == 0) { |
1579 | 0 | const char *hdr = va_arg(args, const char *); |
1580 | 0 | if (hdr) { |
1581 | 0 | if (append_header(&headers->fixed, hdr, 1) < 0) |
1582 | 0 | return -1; |
1583 | 0 | if (is_authorization(hdr)) |
1584 | 0 | headers->auth_hdr_num = -1; |
1585 | 0 | } |
1586 | 0 | } |
1587 | 0 | else if (strcmp(argtype, "httphdr_callback") == 0) { |
1588 | 0 | headers->callback = va_arg(args, const hts_httphdr_callback); |
1589 | 0 | } |
1590 | 0 | else if (strcmp(argtype, "httphdr_callback_data") == 0) { |
1591 | 0 | headers->callback_data = va_arg(args, void *); |
1592 | 0 | } |
1593 | 0 | else if (strcmp(argtype, "va_list") == 0) { |
1594 | 0 | va_list *args2 = va_arg(args, va_list *); |
1595 | 0 | if (args2) { |
1596 | 0 | if (parse_va_list(headers, *args2) < 0) return -1; |
1597 | 0 | } |
1598 | 0 | } |
1599 | 0 | else if (strcmp(argtype, "auth_token_enabled") == 0) { |
1600 | 0 | const char *flag = va_arg(args, const char *); |
1601 | 0 | if (strcmp(flag, "false") == 0) |
1602 | 0 | headers->auth_hdr_num = -3; |
1603 | 0 | } |
1604 | 0 | else if (strcmp(argtype, "redirect_callback") == 0) { |
1605 | 0 | headers->redirect = va_arg(args, const redirect_callback); |
1606 | 0 | } |
1607 | 0 | else if (strcmp(argtype, "redirect_callback_data") == 0) { |
1608 | 0 | headers->redirect_data = va_arg(args, void *); |
1609 | 0 | } |
1610 | 0 | else if (strcmp(argtype, "http_response_ptr") == 0) { |
1611 | 0 | headers->http_response_ptr = va_arg(args, long *); |
1612 | 0 | } |
1613 | 0 | else if (strcmp(argtype, "fail_on_error") == 0) { |
1614 | 0 | headers->fail_on_error = va_arg(args, int); |
1615 | 0 | } |
1616 | 0 | else { errno = EINVAL; return -1; } |
1617 | | |
1618 | 0 | return 0; |
1619 | 0 | } |
1620 | | |
1621 | | /* |
1622 | | HTTP headers to be added to the request can be passed in as extra |
1623 | | arguments to hopen(). The headers can be specified as follows: |
1624 | | |
1625 | | * Single header: |
1626 | | hopen(url, mode, "httphdr", "X-Hdr-1: text", NULL); |
1627 | | |
1628 | | * Multiple headers in the argument list: |
1629 | | hopen(url, mode, "httphdr:l", "X-Hdr-1: text", "X-Hdr-2: text", NULL, NULL); |
1630 | | |
1631 | | * Multiple headers in a char* array: |
1632 | | hopen(url, mode, "httphdr:v", hdrs, NULL); |
1633 | | where `hdrs` is a char **. The list ends with a NULL pointer. |
1634 | | |
1635 | | * A callback function |
1636 | | hopen(url, mode, "httphdr_callback", func, |
1637 | | "httphdr_callback_data", arg, NULL); |
1638 | | `func` has type |
1639 | | int (* hts_httphdr_callback) (void *cb_data, char ***hdrs); |
1640 | | `arg` is passed to the callback as a void *. |
1641 | | |
1642 | | The function is called at file open, and when attempting to seek (which |
1643 | | opens a new HTTP request). This allows, for example, access tokens |
1644 | | that may have gone stale to be regenerated. The function is also |
1645 | | called (with `hdrs` == NULL) on file close so that the callback can |
1646 | | free any memory that it needs to. |
1647 | | |
1648 | | The callback should return 0 on success, non-zero on failure. It should |
1649 | | return in *hdrs a list of strings containing the new headers (terminated |
1650 | | with a NULL pointer). These will replace any headers previously supplied |
1651 | | by the callback. If no changes are necessary, it can return NULL |
1652 | | in *hdrs, in which case the previous headers will be left unchanged. |
1653 | | |
1654 | | Ownership of the strings in the header list passes to hfile_libcurl, |
1655 | | so the callback should not attempt to use or free them itself. The memory |
1656 | | containing the array belongs to the callback and will not be freed by |
1657 | | hfile_libcurl. |
1658 | | |
1659 | | Headers supplied by the callback are appended after any specified |
1660 | | using the "httphdr", "httphdr:l" or "httphdr:v" methods. No attempt |
1661 | | is made to replace these headers (even if a key is repeated) so anything |
1662 | | that is expected to vary needs to come from the callback. |
1663 | | */ |
1664 | | |
1665 | | static hFILE *vhopen_libcurl(const char *url, const char *modes, va_list args) |
1666 | 0 | { |
1667 | 0 | hFILE *fp = NULL; |
1668 | 0 | http_headers headers = { .fail_on_error = 1 }; |
1669 | |
|
1670 | 0 | if (parse_va_list(&headers, args) == 0) { |
1671 | 0 | fp = libcurl_open(url, modes, &headers); |
1672 | 0 | } |
1673 | |
|
1674 | 0 | if (!fp) { |
1675 | 0 | free_headers(&headers.fixed, 1); |
1676 | 0 | } |
1677 | 0 | return fp; |
1678 | 0 | } |
1679 | | |
1680 | | int PLUGIN_GLOBAL(hfile_plugin_init,_libcurl)(struct hFILE_plugin *self) |
1681 | 1 | { |
1682 | 1 | static const struct hFILE_scheme_handler handler = |
1683 | 1 | { hopen_libcurl, hfile_always_remote, "libcurl", |
1684 | 1 | 2000 + 50, |
1685 | 1 | vhopen_libcurl }; |
1686 | | |
1687 | | #ifdef ENABLE_PLUGINS |
1688 | | // Embed version string for examination via strings(1) or what(1) |
1689 | | static const char id[] = |
1690 | | "@(#)hfile_libcurl plugin (htslib)\t" HTS_VERSION_TEXT; |
1691 | | const char *version = strchr(id, '\t')+1; |
1692 | | #else |
1693 | 1 | const char *version = hts_version(); |
1694 | 1 | #endif |
1695 | 1 | const curl_version_info_data *info; |
1696 | 1 | const char * const *protocol; |
1697 | 1 | const char *auth; |
1698 | 1 | CURLcode err; |
1699 | 1 | CURLSHcode errsh; |
1700 | | |
1701 | 1 | err = curl_global_init(CURL_GLOBAL_ALL); |
1702 | 1 | if (err != CURLE_OK) { errno = easy_errno(NULL, err); return -1; } |
1703 | | |
1704 | 1 | curl.share = curl_share_init(); |
1705 | 1 | if (curl.share == NULL) { curl_global_cleanup(); errno = EIO; return -1; } |
1706 | 1 | errsh = curl_share_setopt(curl.share, CURLSHOPT_LOCKFUNC, share_lock); |
1707 | 1 | errsh |= curl_share_setopt(curl.share, CURLSHOPT_UNLOCKFUNC, share_unlock); |
1708 | 1 | errsh |= curl_share_setopt(curl.share, CURLSHOPT_SHARE, CURL_LOCK_DATA_DNS); |
1709 | 1 | if (errsh != 0) { |
1710 | 0 | curl_share_cleanup(curl.share); |
1711 | 0 | curl_global_cleanup(); |
1712 | 0 | errno = EIO; |
1713 | 0 | return -1; |
1714 | 0 | } |
1715 | | |
1716 | 1 | if ((auth = getenv("HTS_AUTH_LOCATION")) != NULL) { |
1717 | 0 | curl.auth_path = strdup(auth); |
1718 | 0 | curl.auth_map = kh_init(auth_map); |
1719 | 0 | if (!curl.auth_path || !curl.auth_map) { |
1720 | 0 | int save_errno = errno; |
1721 | 0 | free(curl.auth_path); |
1722 | 0 | kh_destroy(auth_map, curl.auth_map); |
1723 | 0 | curl_share_cleanup(curl.share); |
1724 | 0 | curl_global_cleanup(); |
1725 | 0 | errno = save_errno; |
1726 | 0 | return -1; |
1727 | 0 | } |
1728 | 0 | } |
1729 | 1 | if ((auth = getenv("HTS_ALLOW_UNENCRYPTED_AUTHORIZATION_HEADER")) != NULL |
1730 | 0 | && strcmp(auth, "I understand the risks") == 0) { |
1731 | 0 | curl.allow_unencrypted_auth_header = 1; |
1732 | 0 | } |
1733 | | |
1734 | 1 | { |
1735 | 1 | const char *val; |
1736 | 1 | if ((val = getenv("HTS_RETRY_MAX")) != NULL) |
1737 | 0 | curl.retry_max = atoi(val); |
1738 | 1 | if ((val = getenv("HTS_RETRY_DELAY")) != NULL) |
1739 | 0 | curl.retry_delay_ms = atol(val); |
1740 | 1 | if ((val = getenv("HTS_RETRY_MAX_DELAY")) != NULL) |
1741 | 0 | curl.retry_max_delay_ms = atol(val); |
1742 | 1 | if ((val = getenv("HTS_LOW_SPEED_LIMIT")) != NULL) |
1743 | 0 | curl.low_speed_limit = atol(val); |
1744 | 1 | if ((val = getenv("HTS_LOW_SPEED_TIME")) != NULL) |
1745 | 0 | curl.low_speed_time = atol(val); |
1746 | 1 | } |
1747 | | |
1748 | 1 | info = curl_version_info(CURLVERSION_NOW); |
1749 | 1 | ksprintf(&curl.useragent, "htslib/%s libcurl/%s", version, info->version); |
1750 | | |
1751 | 1 | self->name = "libcurl"; |
1752 | 1 | self->destroy = libcurl_exit; |
1753 | | |
1754 | 24 | for (protocol = info->protocols; *protocol; protocol++) |
1755 | 23 | hfile_add_scheme_handler(*protocol, &handler); |
1756 | 1 | return 0; |
1757 | 1 | } |