Line | Count | Source |
1 | | /* hfile.c -- buffered low-level input/output streams. |
2 | | |
3 | | Copyright (C) 2013-2021, 2023-2024 Genome Research Ltd. |
4 | | |
5 | | Author: John Marshall <jm18@sanger.ac.uk> |
6 | | |
7 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
8 | | of this software and associated documentation files (the "Software"), to deal |
9 | | in the Software without restriction, including without limitation the rights |
10 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
11 | | copies of the Software, and to permit persons to whom the Software is |
12 | | furnished to do so, subject to the following conditions: |
13 | | |
14 | | The above copyright notice and this permission notice shall be included in |
15 | | all copies or substantial portions of the Software. |
16 | | |
17 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
18 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
19 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
20 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
21 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
22 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
23 | | DEALINGS IN THE SOFTWARE. */ |
24 | | |
25 | | #define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h |
26 | | #include <config.h> |
27 | | |
28 | | #include <stdio.h> |
29 | | #include <stdlib.h> |
30 | | #include <stddef.h> |
31 | | #include <string.h> |
32 | | #include <errno.h> |
33 | | #include <limits.h> |
34 | | |
35 | | #include <pthread.h> |
36 | | |
37 | | #ifdef ENABLE_PLUGINS |
38 | | #if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__) |
39 | | #define USING_WINDOWS_PLUGIN_DLLS |
40 | | #include <dlfcn.h> |
41 | | #endif |
42 | | #endif |
43 | | |
44 | | #include "htslib/hfile.h" |
45 | | #include "hfile_internal.h" |
46 | | #include "htslib/kstring.h" |
47 | | |
48 | | #ifndef ENOTSUP |
49 | | #define ENOTSUP EINVAL |
50 | | #endif |
51 | | #ifndef EOVERFLOW |
52 | | #define EOVERFLOW ERANGE |
53 | | #endif |
54 | | #ifndef EPROTONOSUPPORT |
55 | | #define EPROTONOSUPPORT ENOSYS |
56 | | #endif |
57 | | |
58 | | #ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */ |
59 | | #define SSIZE_MAX LONG_MAX |
60 | | #endif |
61 | | |
62 | | /* hFILE fields are used as follows: |
63 | | |
64 | | char *buffer; // Pointer to the start of the I/O buffer |
65 | | char *begin; // First not-yet-read character / unused position |
66 | | char *end; // First unfilled/unfillable position |
67 | | char *limit; // Pointer to the first position past the buffer |
68 | | |
69 | | const hFILE_backend *backend; // Methods to refill/flush I/O buffer |
70 | | |
71 | | off_t offset; // Offset within the stream of buffer position 0 |
72 | | unsigned at_eof:1;// For reading, whether EOF has been seen |
73 | | unsigned mobile:1;// Buffer is a mobile window or fixed full contents |
74 | | unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a" |
75 | | int has_errno; // Error number from the last failure on this stream |
76 | | |
77 | | For reading, begin is the first unread character in the buffer and end is the |
78 | | first unfilled position: |
79 | | |
80 | | -----------ABCDEFGHIJKLMNO--------------- |
81 | | ^buffer ^begin ^end ^limit |
82 | | |
83 | | For writing, begin is the first unused position and end is unused so remains |
84 | | equal to buffer: |
85 | | |
86 | | ABCDEFGHIJKLMNOPQRSTUVWXYZ--------------- |
87 | | ^buffer ^begin ^limit |
88 | | ^end |
89 | | |
90 | | Thus if begin > end then there is a non-empty write buffer, if begin < end |
91 | | then there is a non-empty read buffer, and if begin == end then both buffers |
92 | | are empty. In all cases, the stream's file position indicator corresponds |
93 | | to the position pointed to by begin. |
94 | | |
95 | | The above is the normal scenario of a mobile window. For in-memory |
96 | | streams (eg via hfile_init_fixed) the buffer can be used as the full |
97 | | contents without any separate backend behind it. These always have at_eof |
98 | | set, offset set to 0, need no read() method, and should just return EINVAL |
99 | | for seek(): |
100 | | |
101 | | abcdefghijkLMNOPQRSTUVWXYZ------ |
102 | | ^buffer ^begin ^end ^limit |
103 | | */ |
104 | | HTSLIB_EXPORT |
105 | | hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity) |
106 | 14.6k | { |
107 | 14.6k | hFILE *fp = (hFILE *) malloc(struct_size); |
108 | 14.6k | if (fp == NULL) goto error; |
109 | | |
110 | 14.6k | const int maxcap = 128*1024; |
111 | | |
112 | 14.6k | if (capacity == 0) capacity = maxcap; |
113 | | // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory |
114 | 14.6k | if (strchr(mode, 'r') && capacity > maxcap) capacity = maxcap; |
115 | | |
116 | 14.6k | #ifdef HAVE_POSIX_MEMALIGN |
117 | 14.6k | fp->buffer = NULL; |
118 | 14.6k | if (posix_memalign((void **)&fp->buffer, 256, capacity) < 0) |
119 | 0 | goto error; |
120 | | #else |
121 | | fp->buffer = (char *) malloc(capacity); |
122 | | if (fp->buffer == NULL) goto error; |
123 | | #endif |
124 | | |
125 | 14.6k | fp->begin = fp->end = fp->buffer; |
126 | 14.6k | fp->limit = &fp->buffer[capacity]; |
127 | | |
128 | 14.6k | fp->offset = 0; |
129 | 14.6k | fp->at_eof = 0; |
130 | 14.6k | fp->mobile = 1; |
131 | 14.6k | fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); |
132 | 14.6k | fp->preserve = 0; |
133 | 14.6k | fp->has_errno = 0; |
134 | 14.6k | return fp; |
135 | | |
136 | 0 | error: |
137 | 0 | hfile_destroy(fp); |
138 | 0 | return NULL; |
139 | 14.6k | } |
140 | | |
141 | | hFILE *hfile_init_fixed(size_t struct_size, const char *mode, |
142 | | char *buffer, size_t buf_filled, size_t buf_size) |
143 | 20.4k | { |
144 | 20.4k | hFILE *fp = (hFILE *) malloc(struct_size); |
145 | 20.4k | if (fp == NULL) return NULL; |
146 | | |
147 | 20.4k | fp->buffer = fp->begin = buffer; |
148 | 20.4k | fp->end = &fp->buffer[buf_filled]; |
149 | 20.4k | fp->limit = &fp->buffer[buf_size]; |
150 | | |
151 | 20.4k | fp->offset = 0; |
152 | 20.4k | fp->at_eof = 1; |
153 | 20.4k | fp->mobile = 0; |
154 | 20.4k | fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+')); |
155 | 20.4k | fp->preserve = 0; |
156 | 20.4k | fp->has_errno = 0; |
157 | 20.4k | return fp; |
158 | 20.4k | } |
159 | | |
160 | | static const struct hFILE_backend mem_backend; |
161 | | |
162 | | HTSLIB_EXPORT |
163 | | void hfile_destroy(hFILE *fp) |
164 | 35.2k | { |
165 | 35.2k | int save = errno; |
166 | 35.2k | if (fp) free(fp->buffer); |
167 | 35.2k | free(fp); |
168 | 35.2k | errno = save; |
169 | 35.2k | } |
170 | | |
171 | | static inline int writebuffer_is_nonempty(hFILE *fp) |
172 | 1.42M | { |
173 | 1.42M | return fp->begin > fp->end; |
174 | 1.42M | } |
175 | | |
176 | | /* Refills the read buffer from the backend (once, so may only partially |
177 | | fill the buffer), returning the number of additional characters read |
178 | | (which might be 0), or negative when an error occurred. */ |
179 | | static ssize_t refill_buffer(hFILE *fp) |
180 | 30.1k | { |
181 | 30.1k | ssize_t n; |
182 | | |
183 | | // Move any unread characters to the start of the buffer |
184 | 30.1k | if (fp->mobile && fp->begin > fp->buffer) { |
185 | 0 | fp->offset += fp->begin - fp->buffer; |
186 | 0 | memmove(fp->buffer, fp->begin, fp->end - fp->begin); |
187 | 0 | fp->end = &fp->buffer[fp->end - fp->begin]; |
188 | 0 | fp->begin = fp->buffer; |
189 | 0 | } |
190 | | |
191 | | // Read into the available buffer space at fp->[end,limit) |
192 | 30.1k | if (fp->at_eof || fp->end == fp->limit) n = 0; |
193 | 9 | else { |
194 | 9 | n = fp->backend->read(fp, fp->end, fp->limit - fp->end); |
195 | 9 | if (n < 0) { fp->has_errno = errno; return n; } |
196 | 6 | else if (n == 0) fp->at_eof = 1; |
197 | 9 | } |
198 | | |
199 | 30.1k | fp->end += n; |
200 | 30.1k | return n; |
201 | 30.1k | } |
202 | | |
203 | | /* |
204 | | * Changes the buffer size for an hFILE. Ideally this is done |
205 | | * immediately after opening. If performed later, this function may |
206 | | * fail if we are reducing the buffer size and the current offset into |
207 | | * the buffer is beyond the new capacity. |
208 | | * |
209 | | * Returns 0 on success; |
210 | | * -1 on failure. |
211 | | */ |
212 | | HTSLIB_EXPORT |
213 | 0 | int hfile_set_blksize(hFILE *fp, size_t bufsiz) { |
214 | 0 | char *buffer; |
215 | 0 | ptrdiff_t curr_used; |
216 | 0 | if (!fp) return -1; |
217 | 0 | curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer; |
218 | 0 | if (bufsiz == 0) bufsiz = 32768; |
219 | | |
220 | | // Ensure buffer resize will not erase live data |
221 | 0 | if (bufsiz < curr_used) |
222 | 0 | return -1; |
223 | | |
224 | 0 | if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1; |
225 | | |
226 | 0 | fp->begin = buffer + (fp->begin - fp->buffer); |
227 | 0 | fp->end = buffer + (fp->end - fp->buffer); |
228 | 0 | fp->buffer = buffer; |
229 | 0 | fp->limit = &fp->buffer[bufsiz]; |
230 | |
|
231 | 0 | return 0; |
232 | 0 | } |
233 | | |
234 | | /* Called only from hgetc(), when our buffer is empty. */ |
235 | | HTSLIB_EXPORT |
236 | | int hgetc2(hFILE *fp) |
237 | 205 | { |
238 | 205 | return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF; |
239 | 205 | } |
240 | | |
241 | | ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp) |
242 | 1.38M | { |
243 | 1.38M | char *found; |
244 | 1.38M | size_t n, copied = 0; |
245 | 1.38M | ssize_t got; |
246 | | |
247 | 1.38M | if (size < 1 || size > SSIZE_MAX) { |
248 | 0 | fp->has_errno = errno = EINVAL; |
249 | 0 | return -1; |
250 | 0 | } |
251 | 1.38M | if (writebuffer_is_nonempty(fp)) { |
252 | 0 | fp->has_errno = errno = EBADF; |
253 | 0 | return -1; |
254 | 0 | } |
255 | | |
256 | 1.38M | --size; /* to allow space for the NUL terminator */ |
257 | | |
258 | 1.38M | do { |
259 | 1.38M | n = fp->end - fp->begin; |
260 | 1.38M | if (n > size - copied) n = size - copied; |
261 | | |
262 | | /* Look in the hFILE buffer for the delimiter */ |
263 | 1.38M | found = memchr(fp->begin, delim, n); |
264 | 1.38M | if (found != NULL) { |
265 | 1.34M | n = found - fp->begin + 1; |
266 | 1.34M | memcpy(buffer + copied, fp->begin, n); |
267 | 1.34M | buffer[n + copied] = '\0'; |
268 | 1.34M | fp->begin += n; |
269 | 1.34M | return n + copied; |
270 | 1.34M | } |
271 | | |
272 | | /* No delimiter yet, copy as much as we can and refill if necessary */ |
273 | 37.5k | memcpy(buffer + copied, fp->begin, n); |
274 | 37.5k | fp->begin += n; |
275 | 37.5k | copied += n; |
276 | | |
277 | 37.5k | if (copied == size) { /* Output buffer full */ |
278 | 20.2k | buffer[copied] = '\0'; |
279 | 20.2k | return copied; |
280 | 20.2k | } |
281 | | |
282 | 17.3k | got = refill_buffer(fp); |
283 | 17.3k | } while (got > 0); |
284 | | |
285 | 17.3k | if (got < 0) return -1; /* Error on refill. */ |
286 | | |
287 | 17.3k | buffer[copied] = '\0'; /* EOF, return anything that was copied. */ |
288 | 17.3k | return copied; |
289 | 17.3k | } |
290 | | |
291 | | char *hgets(char *buffer, int size, hFILE *fp) |
292 | 0 | { |
293 | 0 | if (size < 1) { |
294 | 0 | fp->has_errno = errno = EINVAL; |
295 | 0 | return NULL; |
296 | 0 | } |
297 | 0 | return hgetln(buffer, size, fp) > 0 ? buffer : NULL; |
298 | 0 | } |
299 | | |
300 | | ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes) |
301 | 62.6k | { |
302 | 62.6k | size_t n = fp->end - fp->begin; |
303 | 62.6k | while (n < nbytes) { |
304 | 12.6k | ssize_t ret = refill_buffer(fp); |
305 | 12.6k | if (ret < 0) return ret; |
306 | 12.6k | else if (ret == 0) break; |
307 | 0 | else n += ret; |
308 | 12.6k | } |
309 | | |
310 | 62.6k | if (n > nbytes) n = nbytes; |
311 | 62.6k | memcpy(buffer, fp->begin, n); |
312 | 62.6k | return n; |
313 | 62.6k | } |
314 | | |
315 | | /* Called only from hread(); when called, our buffer is empty and nread bytes |
316 | | have already been placed in the destination buffer. */ |
317 | | HTSLIB_EXPORT |
318 | | ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread) |
319 | 10 | { |
320 | 10 | const size_t capacity = fp->limit - fp->buffer; |
321 | 10 | int buffer_invalidated = 0; |
322 | 10 | char *dest = (char *) destv; |
323 | 10 | dest += nread, nbytes -= nread; |
324 | | |
325 | | // Read large requests directly into the destination buffer |
326 | 10 | while (nbytes * 2 >= capacity && !fp->at_eof) { |
327 | 4 | ssize_t n = fp->backend->read(fp, dest, nbytes); |
328 | 4 | if (n < 0) { fp->has_errno = errno; return n; } |
329 | 0 | else if (n == 0) fp->at_eof = 1; |
330 | 0 | else buffer_invalidated = 1; |
331 | 0 | fp->offset += n; |
332 | 0 | dest += n, nbytes -= n; |
333 | 0 | nread += n; |
334 | 0 | } |
335 | | |
336 | 6 | if (buffer_invalidated) { |
337 | | // Our unread buffer is empty, so begin == end, but our already-read |
338 | | // buffer [buffer,begin) is likely non-empty and is no longer valid as |
339 | | // its contents are no longer adjacent to the file position indicator. |
340 | | // Discard it so that hseek() can't try to take advantage of it. |
341 | 0 | fp->offset += fp->begin - fp->buffer; |
342 | 0 | fp->begin = fp->end = fp->buffer; |
343 | 0 | } |
344 | | |
345 | 6 | while (nbytes > 0 && !fp->at_eof) { |
346 | 0 | size_t n; |
347 | 0 | ssize_t ret = refill_buffer(fp); |
348 | 0 | if (ret < 0) return ret; |
349 | | |
350 | 0 | n = fp->end - fp->begin; |
351 | 0 | if (n > nbytes) n = nbytes; |
352 | 0 | memcpy(dest, fp->begin, n); |
353 | 0 | fp->begin += n; |
354 | 0 | dest += n, nbytes -= n; |
355 | 0 | nread += n; |
356 | 0 | } |
357 | | |
358 | 6 | return nread; |
359 | 6 | } |
360 | | |
361 | | /* Flushes the write buffer, fp->[buffer,begin), out through the backend |
362 | | returning 0 on success or negative if an error occurred. */ |
363 | | static ssize_t flush_buffer(hFILE *fp) |
364 | 38.7k | { |
365 | 38.7k | const char *buffer = fp->buffer; |
366 | 74.6k | while (buffer < fp->begin) { |
367 | 35.9k | ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer); |
368 | 35.9k | if (n < 0) { fp->has_errno = errno; return n; } |
369 | 35.9k | buffer += n; |
370 | 35.9k | fp->offset += n; |
371 | 35.9k | } |
372 | | |
373 | 38.7k | fp->begin = fp->buffer; // Leave the buffer empty |
374 | 38.7k | return 0; |
375 | 38.7k | } |
376 | | |
377 | | int hflush(hFILE *fp) |
378 | 18.1k | { |
379 | 18.1k | if (flush_buffer(fp) < 0) return EOF; |
380 | 18.1k | if (fp->backend->flush) { |
381 | 18.1k | if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; } |
382 | 18.1k | } |
383 | 18.1k | return 0; |
384 | 18.1k | } |
385 | | |
386 | | /* Called only from hputc(), when our buffer is already full. */ |
387 | | HTSLIB_EXPORT |
388 | | int hputc2(int c, hFILE *fp) |
389 | 71 | { |
390 | 71 | if (flush_buffer(fp) < 0) return EOF; |
391 | 71 | *(fp->begin++) = c; |
392 | 71 | return c; |
393 | 71 | } |
394 | | |
395 | | /* Called only from hwrite() and hputs2(); when called, our buffer is either |
396 | | full and ncopied bytes from the source have already been copied to our |
397 | | buffer; or completely empty, ncopied is zero and totalbytes is greater than |
398 | | the buffer size. */ |
399 | | HTSLIB_EXPORT |
400 | | ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied) |
401 | 20.5k | { |
402 | 20.5k | const char *src = (const char *) srcv; |
403 | 20.5k | ssize_t ret; |
404 | 20.5k | const size_t capacity = fp->limit - fp->buffer; |
405 | 20.5k | size_t remaining = totalbytes - ncopied; |
406 | 20.5k | src += ncopied; |
407 | | |
408 | 20.5k | ret = flush_buffer(fp); |
409 | 20.5k | if (ret < 0) return ret; |
410 | | |
411 | | // Write large blocks out directly from the source buffer |
412 | 22.8k | while (remaining * 2 >= capacity) { |
413 | 2.30k | ssize_t n = fp->backend->write(fp, src, remaining); |
414 | 2.30k | if (n < 0) { fp->has_errno = errno; return n; } |
415 | 2.30k | fp->offset += n; |
416 | 2.30k | src += n, remaining -= n; |
417 | 2.30k | } |
418 | | |
419 | | // Just buffer any remaining characters |
420 | 20.5k | memcpy(fp->begin, src, remaining); |
421 | 20.5k | fp->begin += remaining; |
422 | | |
423 | 20.5k | return totalbytes; |
424 | 20.5k | } |
425 | | |
426 | | /* Called only from hputs(), when our buffer is already full. */ |
427 | | HTSLIB_EXPORT |
428 | | int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp) |
429 | 0 | { |
430 | 0 | return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF; |
431 | 0 | } |
432 | | |
433 | | off_t hseek(hFILE *fp, off_t offset, int whence) |
434 | 1.21k | { |
435 | 1.21k | off_t curpos, pos; |
436 | | |
437 | 1.21k | if (writebuffer_is_nonempty(fp) && fp->mobile) { |
438 | 0 | int ret = flush_buffer(fp); |
439 | 0 | if (ret < 0) return ret; |
440 | 0 | } |
441 | | |
442 | 1.21k | curpos = htell(fp); |
443 | | |
444 | | // Relative offsets are given relative to the hFILE's stream position, |
445 | | // which may differ from the backend's physical position due to buffering |
446 | | // read-ahead. Correct for this by converting to an absolute position. |
447 | 1.21k | if (whence == SEEK_CUR) { |
448 | 0 | if (curpos + offset < 0) { |
449 | | // Either a negative offset resulted in a position before the |
450 | | // start of the file, or we overflowed when given a positive offset |
451 | 0 | fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW; |
452 | 0 | return -1; |
453 | 0 | } |
454 | | |
455 | 0 | whence = SEEK_SET; |
456 | 0 | offset = curpos + offset; |
457 | 0 | } |
458 | | // For fixed immobile buffers, convert everything else to SEEK_SET too |
459 | | // so that seeking can be avoided for all (within range) requests. |
460 | 1.21k | else if (! fp->mobile && whence == SEEK_END) { |
461 | 614 | size_t length = fp->end - fp->buffer; |
462 | 614 | if (offset > 0 || -offset > length) { |
463 | 0 | fp->has_errno = errno = EINVAL; |
464 | 0 | return -1; |
465 | 0 | } |
466 | | |
467 | 614 | whence = SEEK_SET; |
468 | 614 | offset = length + offset; |
469 | 614 | } |
470 | | |
471 | | // Avoid seeking if the desired position is within our read buffer. |
472 | | // (But not when the next operation may be a write on a mobile buffer.) |
473 | 1.21k | if (whence == SEEK_SET && (! fp->mobile || fp->readonly) && |
474 | 1.21k | offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) { |
475 | 1.21k | fp->begin = &fp->buffer[offset - fp->offset]; |
476 | 1.21k | return offset; |
477 | 1.21k | } |
478 | | |
479 | 0 | pos = fp->backend->seek(fp, offset, whence); |
480 | 0 | if (pos < 0) { fp->has_errno = errno; return pos; } |
481 | | |
482 | | // Seeking succeeded, so discard any non-empty read buffer |
483 | 0 | fp->begin = fp->end = fp->buffer; |
484 | 0 | fp->at_eof = 0; |
485 | |
|
486 | 0 | fp->offset = pos; |
487 | 0 | return pos; |
488 | 0 | } |
489 | | |
490 | | int hclose(hFILE *fp) |
491 | 34.8k | { |
492 | 34.8k | int err = fp->has_errno; |
493 | | |
494 | 34.8k | if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno; |
495 | 34.8k | if (!fp->preserve) { |
496 | 34.8k | if (fp->backend->close(fp) < 0) err = errno; |
497 | 34.8k | hfile_destroy(fp); |
498 | 34.8k | } |
499 | | |
500 | 34.8k | if (err) { |
501 | 0 | errno = err; |
502 | 0 | return EOF; |
503 | 0 | } |
504 | 34.8k | else return 0; |
505 | 34.8k | } |
506 | | |
507 | | void hclose_abruptly(hFILE *fp) |
508 | 7 | { |
509 | 7 | int save = errno; |
510 | 7 | if (fp->preserve) |
511 | 0 | return; |
512 | 7 | if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ } |
513 | 7 | hfile_destroy(fp); |
514 | 7 | errno = save; |
515 | 7 | } |
516 | | |
517 | | |
518 | | /*************************** |
519 | | * File descriptor backend * |
520 | | ***************************/ |
521 | | |
522 | | #ifndef _WIN32 |
523 | | #include <sys/socket.h> |
524 | | #include <sys/stat.h> |
525 | | #define HAVE_STRUCT_STAT_ST_BLKSIZE |
526 | | #else |
527 | | #include <winsock2.h> |
528 | | #define HAVE_CLOSESOCKET |
529 | | #define HAVE_SETMODE |
530 | | #endif |
531 | | #include <fcntl.h> |
532 | | #include <unistd.h> |
533 | | |
534 | | /* For Unix, it doesn't matter whether a file descriptor is a socket. |
535 | | However Windows insists on send()/recv() and its own closesocket() |
536 | | being used when fd happens to be a socket. */ |
537 | | |
538 | | typedef struct { |
539 | | hFILE base; |
540 | | int fd; |
541 | | unsigned is_socket:1, is_shared:1; |
542 | | } hFILE_fd; |
543 | | |
544 | | static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes) |
545 | 13 | { |
546 | 13 | hFILE_fd *fp = (hFILE_fd *) fpv; |
547 | 13 | ssize_t n; |
548 | 13 | do { |
549 | 13 | n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0) |
550 | 13 | : read(fp->fd, buffer, nbytes); |
551 | 13 | } while (n < 0 && errno == EINTR); |
552 | 13 | return n; |
553 | 13 | } |
554 | | |
555 | | static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes) |
556 | 38.2k | { |
557 | 38.2k | hFILE_fd *fp = (hFILE_fd *) fpv; |
558 | 38.2k | ssize_t n; |
559 | 38.2k | do { |
560 | 38.2k | n = fp->is_socket? send(fp->fd, buffer, nbytes, 0) |
561 | 38.2k | : write(fp->fd, buffer, nbytes); |
562 | 38.2k | } while (n < 0 && errno == EINTR); |
563 | | #ifdef _WIN32 |
564 | | // On windows we have no SIGPIPE. Instead write returns |
565 | | // EINVAL. We check for this and our fd being a pipe. |
566 | | // If so, we raise SIGTERM instead of SIGPIPE. It's not |
567 | | // ideal, but I think the only alternative is extra checking |
568 | | // in every single piece of code. |
569 | | if (n < 0 && errno == EINVAL && |
570 | | GetLastError() == ERROR_NO_DATA && |
571 | | GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { |
572 | | raise(SIGTERM); |
573 | | } |
574 | | #endif |
575 | 38.2k | return n; |
576 | 38.2k | } |
577 | | |
578 | | static off_t fd_seek(hFILE *fpv, off_t offset, int whence) |
579 | 0 | { |
580 | 0 | hFILE_fd *fp = (hFILE_fd *) fpv; |
581 | | #ifdef _WIN32 |
582 | | // On windows lseek can return non-zero values even on a pipe. Instead |
583 | | // it's likely to seek somewhere within the pipe memory buffer. |
584 | | // This breaks bgzf_check_EOF among other things. |
585 | | if (GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) { |
586 | | errno = ESPIPE; |
587 | | return -1; |
588 | | } |
589 | | #endif |
590 | |
|
591 | 0 | return lseek(fp->fd, offset, whence); |
592 | 0 | } |
593 | | |
594 | | static int fd_flush(hFILE *fpv) |
595 | 18.1k | { |
596 | 18.1k | int ret = 0; |
597 | 18.1k | do { |
598 | 18.1k | #ifdef HAVE_FDATASYNC |
599 | 18.1k | hFILE_fd *fp = (hFILE_fd *) fpv; |
600 | 18.1k | ret = fdatasync(fp->fd); |
601 | | #elif defined(HAVE_FSYNC) |
602 | | hFILE_fd *fp = (hFILE_fd *) fpv; |
603 | | ret = fsync(fp->fd); |
604 | | #endif |
605 | | // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe, |
606 | | // and operation-not-supported errors (Mac OS X) |
607 | 18.1k | if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0; |
608 | 18.1k | } while (ret < 0 && errno == EINTR); |
609 | 18.1k | return ret; |
610 | 18.1k | } |
611 | | |
612 | | static int fd_close(hFILE *fpv) |
613 | 14.3k | { |
614 | 14.3k | hFILE_fd *fp = (hFILE_fd *) fpv; |
615 | 14.3k | int ret; |
616 | | |
617 | | // If we don't own the fd, return successfully without actually closing it |
618 | 14.3k | if (fp->is_shared) return 0; |
619 | | |
620 | 14.3k | do { |
621 | | #ifdef HAVE_CLOSESOCKET |
622 | | ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd); |
623 | | #else |
624 | 14.3k | ret = close(fp->fd); |
625 | 14.3k | #endif |
626 | 14.3k | } while (ret < 0 && errno == EINTR); |
627 | 14.3k | return ret; |
628 | 14.3k | } |
629 | | |
630 | | static const struct hFILE_backend fd_backend = |
631 | | { |
632 | | fd_read, fd_write, fd_seek, fd_flush, fd_close |
633 | | }; |
634 | | |
635 | | static size_t blksize(int fd) |
636 | 14.3k | { |
637 | 14.3k | #ifdef HAVE_STRUCT_STAT_ST_BLKSIZE |
638 | 14.3k | struct stat sbuf; |
639 | 14.3k | if (fstat(fd, &sbuf) != 0) return 0; |
640 | | |
641 | | // Pipes/FIFOs on linux return 4Kb here often, but it's much too small |
642 | | // for performant I/O. |
643 | 14.3k | return S_ISFIFO(sbuf.st_mode) |
644 | 14.3k | ? 128*1024 |
645 | 14.3k | : sbuf.st_blksize; |
646 | | #else |
647 | | return 0; |
648 | | #endif |
649 | 14.3k | } |
650 | | |
651 | | static hFILE *hopen_fd(const char *filename, const char *mode) |
652 | 14.5k | { |
653 | 14.5k | hFILE_fd *fp = NULL; |
654 | 14.5k | int fd = open(filename, hfile_oflags(mode), 0666); |
655 | 14.5k | if (fd < 0) goto error; |
656 | | |
657 | 14.3k | fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); |
658 | 14.3k | if (fp == NULL) goto error; |
659 | | |
660 | 14.3k | fp->fd = fd; |
661 | 14.3k | fp->is_socket = 0; |
662 | 14.3k | fp->is_shared = 0; |
663 | 14.3k | fp->base.backend = &fd_backend; |
664 | 14.3k | return &fp->base; |
665 | | |
666 | 155 | error: |
667 | 155 | if (fd >= 0) { int save = errno; (void) close(fd); errno = save; } |
668 | 155 | hfile_destroy((hFILE *) fp); |
669 | 155 | return NULL; |
670 | 14.3k | } |
671 | | |
672 | | // Loads the contents of filename to produced a read-only, in memory, |
673 | | // immobile hfile. fp is the already opened file. We always close this |
674 | | // input fp, irrespective of whether we error or whether we return a new |
675 | | // immobile hfile. |
676 | 34 | static hFILE *hpreload(hFILE *fp) { |
677 | 34 | hFILE *mem_fp; |
678 | 34 | char *buf = NULL; |
679 | 34 | off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len; |
680 | | |
681 | 104 | for (;;) { |
682 | 104 | if (buf_a - buf_sz < 5000) { |
683 | 74 | buf_a += buf_inc; |
684 | 74 | char *t = realloc(buf, buf_a); |
685 | 74 | if (!t) goto err; |
686 | 74 | buf = t; |
687 | 74 | if (buf_inc < 1000000) buf_inc *= 1.3; |
688 | 74 | } |
689 | 104 | len = hread(fp, buf+buf_sz, buf_a-buf_sz); |
690 | 104 | if (len > 0) |
691 | 70 | buf_sz += len; |
692 | 34 | else |
693 | 34 | break; |
694 | 104 | } |
695 | | |
696 | 34 | if (len < 0) goto err; |
697 | 30 | mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a); |
698 | 30 | if (!mem_fp) goto err; |
699 | 30 | mem_fp->backend = &mem_backend; |
700 | | |
701 | 30 | if (hclose(fp) < 0) { |
702 | 0 | hclose_abruptly(mem_fp); |
703 | 0 | goto err; |
704 | 0 | } |
705 | 30 | return mem_fp; |
706 | | |
707 | 4 | err: |
708 | 4 | free(buf); |
709 | 4 | hclose_abruptly(fp); |
710 | 4 | return NULL; |
711 | 30 | } |
712 | | |
713 | 34 | static int is_preload_url_remote(const char *url){ |
714 | 34 | return hisremote(url + 8); // len("preload:") = 8 |
715 | 34 | } |
716 | | |
717 | 34 | static hFILE *hopen_preload(const char *url, const char *mode){ |
718 | 34 | hFILE* fp = hopen(url + 8, mode); |
719 | 34 | return fp ? hpreload(fp) : NULL; |
720 | 34 | } |
721 | | |
722 | | hFILE *hdopen(int fd, const char *mode) |
723 | 6 | { |
724 | 6 | hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd)); |
725 | 6 | if (fp == NULL) return NULL; |
726 | | |
727 | 6 | fp->fd = fd; |
728 | 6 | fp->is_socket = (strchr(mode, 's') != NULL); |
729 | 6 | fp->is_shared = (strchr(mode, 'S') != NULL); |
730 | 6 | fp->base.backend = &fd_backend; |
731 | 6 | return &fp->base; |
732 | 6 | } |
733 | | |
734 | | static hFILE *hopen_fd_fileuri(const char *url, const char *mode) |
735 | 1 | { |
736 | 1 | if (strncmp(url, "file://localhost/", 17) == 0) url += 16; |
737 | 1 | else if (strncmp(url, "file:///", 8) == 0) url += 7; |
738 | 1 | else { errno = EPROTONOSUPPORT; return NULL; } |
739 | | |
740 | | #if defined(_WIN32) || defined(__MSYS__) |
741 | | // For cases like C:/foo |
742 | | if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++; |
743 | | #endif |
744 | | |
745 | 0 | return hopen_fd(url, mode); |
746 | 1 | } |
747 | | |
748 | | static hFILE *hopen_fd_stdinout(const char *mode) |
749 | 6 | { |
750 | 6 | int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO; |
751 | 6 | char mode_shared[101]; |
752 | 6 | snprintf(mode_shared, sizeof mode_shared, "S%s", mode); |
753 | | #if defined HAVE_SETMODE && defined O_BINARY |
754 | | if (setmode(fd, O_BINARY) < 0) return NULL; |
755 | | #endif |
756 | 6 | return hdopen(fd, mode_shared); |
757 | 6 | } |
758 | | |
759 | | HTSLIB_EXPORT |
760 | | int hfile_oflags(const char *mode) |
761 | 14.5k | { |
762 | 14.5k | int rdwr = 0, flags = 0; |
763 | 14.5k | const char *s; |
764 | 38.0k | for (s = mode; *s; s++) |
765 | 23.5k | switch (*s) { |
766 | 162 | case 'r': rdwr = O_RDONLY; break; |
767 | 14.3k | case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC; break; |
768 | 0 | case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND; break; |
769 | 0 | case '+': rdwr = O_RDWR; break; |
770 | 0 | #ifdef O_CLOEXEC |
771 | 0 | case 'e': flags |= O_CLOEXEC; break; |
772 | 0 | #endif |
773 | 0 | #ifdef O_EXCL |
774 | 0 | case 'x': flags |= O_EXCL; break; |
775 | 0 | #endif |
776 | 8.97k | default: break; |
777 | 23.5k | } |
778 | | |
779 | | #ifdef O_BINARY |
780 | | flags |= O_BINARY; |
781 | | #endif |
782 | | |
783 | 14.5k | return rdwr | flags; |
784 | 14.5k | } |
785 | | |
786 | | |
787 | | /********************* |
788 | | * In-memory backend * |
789 | | *********************/ |
790 | | |
791 | | #include "hts_internal.h" |
792 | | |
793 | | typedef struct { |
794 | | hFILE base; |
795 | | } hFILE_mem; |
796 | | |
797 | | static off_t mem_seek(hFILE *fpv, off_t offset, int whence) |
798 | 0 | { |
799 | 0 | errno = EINVAL; |
800 | 0 | return -1; |
801 | 0 | } |
802 | | |
803 | | static int mem_close(hFILE *fpv) |
804 | 20.4k | { |
805 | 20.4k | return 0; |
806 | 20.4k | } |
807 | | |
808 | | static const struct hFILE_backend mem_backend = |
809 | | { |
810 | | NULL, NULL, mem_seek, NULL, mem_close |
811 | | }; |
812 | | |
813 | | static int cmp_prefix(const char *key, const char *s) |
814 | 161 | { |
815 | 161 | while (*key) |
816 | 161 | if (tolower_c(*s) != *key) return +1; |
817 | 0 | else s++, key++; |
818 | | |
819 | 0 | return 0; |
820 | 161 | } |
821 | | |
822 | | static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size) |
823 | 20.4k | { |
824 | 20.4k | hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size); |
825 | 20.4k | if (fp == NULL) |
826 | 0 | return NULL; |
827 | | |
828 | 20.4k | fp->base.backend = &mem_backend; |
829 | 20.4k | return &fp->base; |
830 | 20.4k | } |
831 | | |
832 | | static hFILE *hopen_mem(const char *url, const char *mode) |
833 | 380 | { |
834 | 380 | size_t length, size; |
835 | 380 | char *buffer; |
836 | 380 | const char *data, *comma = strchr(url, ','); |
837 | 380 | if (comma == NULL) { errno = EINVAL; return NULL; } |
838 | 341 | data = comma+1; |
839 | | |
840 | | // TODO Implement write modes |
841 | 341 | if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; } |
842 | | |
843 | 291 | if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) { |
844 | 0 | size = hts_base64_decoded_length(strlen(data)); |
845 | 0 | buffer = malloc(size); |
846 | 0 | if (buffer == NULL) return NULL; |
847 | 0 | hts_decode_base64(buffer, &length, data); |
848 | 0 | } |
849 | 291 | else { |
850 | 291 | size = strlen(data) + 1; |
851 | 291 | buffer = malloc(size); |
852 | 291 | if (buffer == NULL) return NULL; |
853 | 291 | hts_decode_percent(buffer, &length, data); |
854 | 291 | } |
855 | 291 | hFILE* hf; |
856 | | |
857 | 291 | if(!(hf = create_hfile_mem(buffer, mode, length, size))){ |
858 | 0 | free(buffer); |
859 | 0 | return NULL; |
860 | 0 | } |
861 | | |
862 | 291 | return hf; |
863 | 291 | } |
864 | | |
865 | | static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args) |
866 | 20.1k | { |
867 | 20.1k | char* buffer = va_arg(args, char*); |
868 | 20.1k | size_t sz = va_arg(args, size_t); |
869 | 20.1k | va_end(args); |
870 | | |
871 | 20.1k | hFILE* hf; |
872 | | |
873 | 20.1k | if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){ |
874 | 0 | free(buffer); |
875 | 0 | return NULL; |
876 | 0 | } |
877 | | |
878 | 20.1k | return hf; |
879 | 20.1k | } |
880 | | |
881 | 0 | char *hfile_mem_get_buffer(hFILE *file, size_t *length) { |
882 | 0 | if (file->backend != &mem_backend) { |
883 | 0 | errno = EINVAL; |
884 | 0 | return NULL; |
885 | 0 | } |
886 | | |
887 | 0 | if (length) |
888 | 0 | *length = file->buffer - file->limit; |
889 | |
|
890 | 0 | return file->buffer; |
891 | 0 | } |
892 | | |
893 | 0 | char *hfile_mem_steal_buffer(hFILE *file, size_t *length) { |
894 | 0 | char *buf = hfile_mem_get_buffer(file, length); |
895 | 0 | if (buf) |
896 | 0 | file->buffer = NULL; |
897 | 0 | return buf; |
898 | 0 | } |
899 | | |
900 | | // open() stub for mem: which only works with the vopen() interface |
901 | | // Use 'data:,' for data encoded in the URL |
902 | 0 | static hFILE *hopen_not_supported(const char *fname, const char *mode) { |
903 | 0 | errno = EINVAL; |
904 | 0 | return NULL; |
905 | 0 | } |
906 | | |
907 | | int hfile_plugin_init_mem(struct hFILE_plugin *self) |
908 | 1 | { |
909 | | // mem files are declared remote so they work with a tabix index |
910 | 1 | static const struct hFILE_scheme_handler handler = |
911 | 1 | {hopen_not_supported, hfile_always_remote, "mem", 2000 + 50, hopenv_mem}; |
912 | 1 | self->name = "mem"; |
913 | 1 | hfile_add_scheme_handler("mem", &handler); |
914 | 1 | return 0; |
915 | 1 | } |
916 | | |
917 | | /********************************************************************** |
918 | | * Dummy crypt4gh plug-in. Does nothing apart from advise how to get * |
919 | | * the real one. It will be overridden by the actual plug-in. * |
920 | | **********************************************************************/ |
921 | | |
922 | | static hFILE *crypt4gh_needed(const char *url, const char *mode) |
923 | 0 | { |
924 | 0 | const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url; |
925 | | #if defined(ENABLE_PLUGINS) |
926 | | const char *enable_plugins = ""; |
927 | | #else |
928 | 0 | const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n"; |
929 | 0 | #endif |
930 | |
|
931 | 0 | hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n" |
932 | 0 | "It can be found at " |
933 | 0 | "https://github.com/samtools/htslib-crypt4gh\n" |
934 | 0 | "%s" |
935 | 0 | "If you have the plug-in, please ensure it can be " |
936 | 0 | "found on your HTS_PATH.", |
937 | 0 | u, enable_plugins); |
938 | |
|
939 | 0 | errno = EPROTONOSUPPORT; |
940 | 0 | return NULL; |
941 | 0 | } |
942 | | |
943 | | int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self) |
944 | 1 | { |
945 | 1 | static const struct hFILE_scheme_handler handler = |
946 | 1 | { crypt4gh_needed, hfile_always_local, "crypt4gh-needed", 0, NULL }; |
947 | 1 | self->name = "crypt4gh-needed"; |
948 | 1 | hfile_add_scheme_handler("crypt4gh", &handler); |
949 | 1 | return 0; |
950 | 1 | } |
951 | | |
952 | | |
953 | | /***************************************** |
954 | | * Plugin and hopen() backend dispatcher * |
955 | | *****************************************/ |
956 | | |
957 | | #include "htslib/khash.h" |
958 | | |
959 | | KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *) |
960 | | static khash_t(scheme_string) *schemes = NULL; |
961 | | |
962 | | struct hFILE_plugin_list { |
963 | | struct hFILE_plugin plugin; |
964 | | struct hFILE_plugin_list *next; |
965 | | }; |
966 | | |
967 | | static struct hFILE_plugin_list *plugins = NULL; |
968 | | static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER; |
969 | | |
970 | | void hfile_shutdown(int do_close_plugin) |
971 | 1 | { |
972 | 1 | pthread_mutex_lock(&plugins_lock); |
973 | | |
974 | 1 | if (schemes) { |
975 | 1 | kh_destroy(scheme_string, schemes); |
976 | 1 | schemes = NULL; |
977 | 1 | } |
978 | | |
979 | 6 | while (plugins != NULL) { |
980 | 5 | struct hFILE_plugin_list *p = plugins; |
981 | 5 | if (p->plugin.destroy) p->plugin.destroy(); |
982 | | #ifdef ENABLE_PLUGINS |
983 | | if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj); |
984 | | #endif |
985 | 5 | plugins = p->next; |
986 | 5 | free(p); |
987 | 5 | } |
988 | | |
989 | 1 | pthread_mutex_unlock(&plugins_lock); |
990 | 1 | } |
991 | | |
992 | | static void hfile_exit(void) |
993 | 1 | { |
994 | 1 | hfile_shutdown(0); |
995 | 1 | pthread_mutex_destroy(&plugins_lock); |
996 | 1 | } |
997 | | |
998 | | static inline int priority(const struct hFILE_scheme_handler *handler) |
999 | 2 | { |
1000 | 2 | return handler->priority % 1000; |
1001 | 2 | } |
1002 | | |
1003 | | #ifdef USING_WINDOWS_PLUGIN_DLLS |
1004 | | /* |
1005 | | * Work-around for Windows plug-in dlls where the plug-in could be |
1006 | | * using a different HTSlib library to the executable (for example |
1007 | | * because the latter was build against a static libhts.a). When this |
1008 | | * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler(). |
1009 | | * If this is detected, it calls this function which attempts to fix the |
1010 | | * problem by redirecting to the hfile_add_scheme_handler() in the main |
1011 | | * executable. |
1012 | | */ |
1013 | | static int try_exe_add_scheme_handler(const char *scheme, |
1014 | | const struct hFILE_scheme_handler *handler) |
1015 | | { |
1016 | | static void (*add_scheme_handler)(const char *scheme, |
1017 | | const struct hFILE_scheme_handler *handler); |
1018 | | if (!add_scheme_handler) { |
1019 | | // dlopen the main executable and resolve hfile_add_scheme_handler |
1020 | | void *exe_handle = dlopen(NULL, RTLD_LAZY); |
1021 | | if (!exe_handle) return -1; |
1022 | | *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler"); |
1023 | | dlclose(exe_handle); |
1024 | | } |
1025 | | // Check that the symbol was obtained and isn't the one in this copy |
1026 | | // of the library (to avoid infinite recursion) |
1027 | | if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler) |
1028 | | return -1; |
1029 | | add_scheme_handler(scheme, handler); |
1030 | | return 0; |
1031 | | } |
1032 | | #else |
1033 | | static int try_exe_add_scheme_handler(const char *scheme, |
1034 | | const struct hFILE_scheme_handler *handler) |
1035 | 0 | { |
1036 | 0 | return -1; |
1037 | 0 | } |
1038 | | #endif |
1039 | | |
1040 | | HTSLIB_EXPORT |
1041 | | void hfile_add_scheme_handler(const char *scheme, |
1042 | | const struct hFILE_scheme_handler *handler) |
1043 | 34 | { |
1044 | 34 | int absent; |
1045 | 34 | if (handler->open == NULL || handler->isremote == NULL) { |
1046 | 0 | hts_log_warning("Couldn't register scheme handler for %s: missing method", scheme); |
1047 | 0 | return; |
1048 | 0 | } |
1049 | 34 | if (!schemes) { |
1050 | 0 | if (try_exe_add_scheme_handler(scheme, handler) != 0) { |
1051 | 0 | hts_log_warning("Couldn't register scheme handler for %s", scheme); |
1052 | 0 | } |
1053 | 0 | return; |
1054 | 0 | } |
1055 | 34 | khint_t k = kh_put(scheme_string, schemes, scheme, &absent); |
1056 | 34 | if (absent < 0) { |
1057 | 0 | hts_log_warning("Couldn't register scheme handler for %s : %s", |
1058 | 0 | scheme, strerror(errno)); |
1059 | 0 | return; |
1060 | 0 | } |
1061 | 34 | if (absent || priority(handler) > priority(kh_value(schemes, k))) { |
1062 | 33 | kh_value(schemes, k) = handler; |
1063 | 33 | } |
1064 | 34 | } |
1065 | | |
1066 | | static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *), |
1067 | | const char *pluginname) |
1068 | 5 | { |
1069 | 5 | struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list)); |
1070 | 5 | if (p == NULL) { |
1071 | 0 | hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname); |
1072 | 0 | return -1; |
1073 | 0 | } |
1074 | | |
1075 | 5 | p->plugin.api_version = 1; |
1076 | 5 | p->plugin.obj = obj; |
1077 | 5 | p->plugin.name = NULL; |
1078 | 5 | p->plugin.destroy = NULL; |
1079 | | |
1080 | 5 | int ret = (*init)(&p->plugin); |
1081 | | |
1082 | 5 | if (ret != 0) { |
1083 | 0 | hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret); |
1084 | 0 | free(p); |
1085 | 0 | return ret; |
1086 | 0 | } |
1087 | | |
1088 | 5 | hts_log_debug("Loaded \"%s\"", pluginname); |
1089 | | |
1090 | 5 | p->next = plugins, plugins = p; |
1091 | 5 | return 0; |
1092 | 5 | } |
1093 | | |
1094 | | /* |
1095 | | * Returns 0 on success, |
1096 | | * <0 on failure |
1097 | | */ |
1098 | | static int load_hfile_plugins(void) |
1099 | 1 | { |
1100 | 1 | static const struct hFILE_scheme_handler |
1101 | 1 | data = { hopen_mem, hfile_always_local, "built-in", 80 }, |
1102 | 1 | file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 }, |
1103 | 1 | preload = { hopen_preload, is_preload_url_remote, "built-in", 80 }; |
1104 | | |
1105 | 1 | schemes = kh_init(scheme_string); |
1106 | 1 | if (schemes == NULL) |
1107 | 0 | return -1; |
1108 | | |
1109 | 1 | hfile_add_scheme_handler("data", &data); |
1110 | 1 | hfile_add_scheme_handler("file", &file); |
1111 | 1 | hfile_add_scheme_handler("preload", &preload); |
1112 | 1 | init_add_plugin(NULL, hfile_plugin_init_mem, "mem"); |
1113 | 1 | init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed"); |
1114 | | |
1115 | | #ifdef ENABLE_PLUGINS |
1116 | | struct hts_path_itr path; |
1117 | | const char *pluginname; |
1118 | | hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0); |
1119 | | while ((pluginname = hts_path_itr_next(&path)) != NULL) { |
1120 | | void *obj; |
1121 | | int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *)) |
1122 | | load_plugin(&obj, pluginname, "hfile_plugin_init"); |
1123 | | |
1124 | | if (init) { |
1125 | | if (init_add_plugin(obj, init, pluginname) != 0) |
1126 | | close_plugin(obj); |
1127 | | } |
1128 | | } |
1129 | | #else |
1130 | | |
1131 | 1 | #ifdef HAVE_LIBCURL |
1132 | 1 | init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl"); |
1133 | 1 | #endif |
1134 | 1 | #ifdef ENABLE_GCS |
1135 | 1 | init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs"); |
1136 | 1 | #endif |
1137 | 1 | #ifdef ENABLE_S3 |
1138 | 1 | init_add_plugin(NULL, hfile_plugin_init_s3, "s3"); |
1139 | 1 | #endif |
1140 | | |
1141 | 1 | #endif |
1142 | | |
1143 | | // In the unlikely event atexit() fails, it's better to succeed here and |
1144 | | // carry on; then eventually when the program exits, we'll merely close |
1145 | | // down the plugins uncleanly, as if we had aborted. |
1146 | 1 | (void) atexit(hfile_exit); |
1147 | | |
1148 | 1 | return 0; |
1149 | 1 | } |
1150 | | |
1151 | | /* A filename like "foo:bar" in which we don't recognise the scheme is |
1152 | | either an ordinary file or an indication of a missing or broken plugin. |
1153 | | Try to open it as an ordinary file; but if there's no such file, set |
1154 | | errno distinctively to make the plugin issue apparent. */ |
1155 | | static hFILE *hopen_unknown_scheme(const char *fname, const char *mode) |
1156 | 41 | { |
1157 | 41 | hFILE *fp = hopen_fd(fname, mode); |
1158 | 41 | if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT; |
1159 | 41 | return fp; |
1160 | 41 | } |
1161 | | |
1162 | | /* Returns the appropriate handler, or NULL if the string isn't an URL. */ |
1163 | | static const struct hFILE_scheme_handler *find_scheme_handler(const char *s) |
1164 | 41.6k | { |
1165 | 41.6k | static const struct hFILE_scheme_handler unknown_scheme = |
1166 | 41.6k | { hopen_unknown_scheme, hfile_always_local, "built-in", 0 }; |
1167 | | |
1168 | 41.6k | char scheme[12]; |
1169 | 41.6k | int i; |
1170 | | |
1171 | 129k | for (i = 0; i < sizeof scheme; i++) |
1172 | 129k | if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.') |
1173 | 88.0k | scheme[i] = tolower_c(s[i]); |
1174 | 41.6k | else if (s[i] == ':') break; |
1175 | 19.9k | else return NULL; |
1176 | | |
1177 | | // 1 byte schemes are likely windows C:/foo pathnames |
1178 | 21.6k | if (i <= 1 || i >= sizeof scheme) return NULL; |
1179 | 21.6k | scheme[i] = '\0'; |
1180 | | |
1181 | 21.6k | pthread_mutex_lock(&plugins_lock); |
1182 | 21.6k | if (!schemes && load_hfile_plugins() < 0) { |
1183 | 0 | pthread_mutex_unlock(&plugins_lock); |
1184 | 0 | return NULL; |
1185 | 0 | } |
1186 | 21.6k | pthread_mutex_unlock(&plugins_lock); |
1187 | | |
1188 | 21.6k | khint_t k = kh_get(scheme_string, schemes, scheme); |
1189 | 21.6k | return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme; |
1190 | 21.6k | } |
1191 | | |
1192 | | |
1193 | | /*************************** |
1194 | | * Library introspection functions |
1195 | | ***************************/ |
1196 | | |
1197 | | /* |
1198 | | * Fills out sc_list[] with the list of known URL schemes. |
1199 | | * This can be restricted to just ones from a specific plugin, |
1200 | | * or all (plugin == NULL). |
1201 | | * |
1202 | | * Returns number of schemes found on success; |
1203 | | * -1 on failure. |
1204 | | */ |
1205 | | HTSLIB_EXPORT |
1206 | | int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes) |
1207 | 0 | { |
1208 | 0 | pthread_mutex_lock(&plugins_lock); |
1209 | 0 | if (!schemes && load_hfile_plugins() < 0) { |
1210 | 0 | pthread_mutex_unlock(&plugins_lock); |
1211 | 0 | return -1; |
1212 | 0 | } |
1213 | 0 | pthread_mutex_unlock(&plugins_lock); |
1214 | |
|
1215 | 0 | khiter_t k; |
1216 | 0 | int ns = 0; |
1217 | |
|
1218 | 0 | for (k = kh_begin(schemes); k != kh_end(schemes); k++) { |
1219 | 0 | if (!kh_exist(schemes, k)) |
1220 | 0 | continue; |
1221 | | |
1222 | 0 | const struct hFILE_scheme_handler *s = kh_value(schemes, k); |
1223 | 0 | if (plugin && strcmp(s->provider, plugin) != 0) |
1224 | 0 | continue; |
1225 | | |
1226 | 0 | if (ns < *nschemes) |
1227 | 0 | sc_list[ns] = kh_key(schemes, k); |
1228 | 0 | ns++; |
1229 | 0 | } |
1230 | |
|
1231 | 0 | if (*nschemes > ns) |
1232 | 0 | *nschemes = ns; |
1233 | |
|
1234 | 0 | return ns; |
1235 | 0 | } |
1236 | | |
1237 | | |
1238 | | /* |
1239 | | * Fills out plist[] with the list of known hFILE plugins. |
1240 | | * |
1241 | | * Returns number of schemes found on success; |
1242 | | * -1 on failure |
1243 | | */ |
1244 | | HTSLIB_EXPORT |
1245 | | int hfile_list_plugins(const char *plist[], int *nplugins) |
1246 | 0 | { |
1247 | 0 | pthread_mutex_lock(&plugins_lock); |
1248 | 0 | if (!schemes && load_hfile_plugins() < 0) { |
1249 | 0 | pthread_mutex_unlock(&plugins_lock); |
1250 | 0 | return -1; |
1251 | 0 | } |
1252 | 0 | pthread_mutex_unlock(&plugins_lock); |
1253 | |
|
1254 | 0 | int np = 0; |
1255 | 0 | if (*nplugins) |
1256 | 0 | plist[np++] = "built-in"; |
1257 | |
|
1258 | 0 | struct hFILE_plugin_list *p = plugins; |
1259 | 0 | while (p) { |
1260 | 0 | if (np < *nplugins) |
1261 | 0 | plist[np] = p->plugin.name; |
1262 | |
|
1263 | 0 | p = p->next; |
1264 | 0 | np++; |
1265 | 0 | } |
1266 | |
|
1267 | 0 | if (*nplugins > np) |
1268 | 0 | *nplugins = np; |
1269 | |
|
1270 | 0 | return np; |
1271 | 0 | } |
1272 | | |
1273 | | |
1274 | | /* |
1275 | | * Tests for the presence of a specific hFILE plugin. |
1276 | | * |
1277 | | * Returns 1 if true |
1278 | | * 0 otherwise |
1279 | | */ |
1280 | | HTSLIB_EXPORT |
1281 | | int hfile_has_plugin(const char *name) |
1282 | 0 | { |
1283 | 0 | pthread_mutex_lock(&plugins_lock); |
1284 | 0 | if (!schemes && load_hfile_plugins() < 0) { |
1285 | 0 | pthread_mutex_unlock(&plugins_lock); |
1286 | 0 | return -1; |
1287 | 0 | } |
1288 | 0 | pthread_mutex_unlock(&plugins_lock); |
1289 | |
|
1290 | 0 | struct hFILE_plugin_list *p = plugins; |
1291 | 0 | while (p) { |
1292 | 0 | if (strcmp(p->plugin.name, name) == 0) |
1293 | 0 | return 1; |
1294 | 0 | p = p->next; |
1295 | 0 | } |
1296 | | |
1297 | 0 | return 0; |
1298 | 0 | } |
1299 | | |
1300 | | /*************************** |
1301 | | * hFILE interface proper |
1302 | | ***************************/ |
1303 | | |
1304 | | hFILE *hopen(const char *fname, const char *mode, ...) |
1305 | 35.4k | { |
1306 | 35.4k | const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); |
1307 | 35.4k | if (handler) { |
1308 | 20.9k | if (strchr(mode, ':') == NULL |
1309 | 20.1k | || handler->priority < 2000 |
1310 | 20.1k | || handler->vopen == NULL) { |
1311 | 815 | return handler->open(fname, mode); |
1312 | 815 | } |
1313 | 20.1k | else { |
1314 | 20.1k | hFILE *fp; |
1315 | 20.1k | va_list arg; |
1316 | 20.1k | va_start(arg, mode); |
1317 | 20.1k | fp = handler->vopen(fname, mode, arg); |
1318 | 20.1k | va_end(arg); |
1319 | 20.1k | return fp; |
1320 | 20.1k | } |
1321 | 20.9k | } |
1322 | 14.4k | else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode); |
1323 | 14.4k | else return hopen_fd(fname, mode); |
1324 | 35.4k | } |
1325 | | |
1326 | | HTSLIB_EXPORT |
1327 | 372 | int hfile_always_local (const char *fname) { return 0; } |
1328 | | |
1329 | | HTSLIB_EXPORT |
1330 | 303 | int hfile_always_remote(const char *fname) { return 1; } |
1331 | | |
1332 | | int hisremote(const char *fname) |
1333 | 6.22k | { |
1334 | 6.22k | const struct hFILE_scheme_handler *handler = find_scheme_handler(fname); |
1335 | 6.22k | return handler? handler->isremote(fname) : 0; |
1336 | 6.22k | } |
1337 | | |
1338 | | // Remove an extension, if any, from the basename part of [start,limit). |
1339 | | // Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that. |
1340 | | static const char *strip_extension(const char *start, const char *limit) |
1341 | 0 | { |
1342 | 0 | const char *s = limit; |
1343 | 0 | while (s > start) { |
1344 | 0 | --s; |
1345 | 0 | if (*s == '.') return s; |
1346 | 0 | else if (*s == '/') break; |
1347 | 0 | } |
1348 | 0 | return limit; |
1349 | 0 | } |
1350 | | |
1351 | | char *haddextension(struct kstring_t *buffer, const char *filename, |
1352 | | int replace, const char *new_extension) |
1353 | 0 | { |
1354 | 0 | const char *trailing, *end; |
1355 | |
|
1356 | 0 | if (find_scheme_handler(filename)) { |
1357 | | // URL, so alter extensions before any trailing query or fragment parts |
1358 | | // Allow # symbols in s3 URLs |
1359 | 0 | trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11)) ? strcspn(filename, "?#") : strcspn(filename, "?")); |
1360 | 0 | } |
1361 | 0 | else { |
1362 | | // Local path, so alter extensions at the end of the filename |
1363 | 0 | trailing = strchr(filename, '\0'); |
1364 | 0 | } |
1365 | |
|
1366 | 0 | end = replace? strip_extension(filename, trailing) : trailing; |
1367 | |
|
1368 | 0 | buffer->l = 0; |
1369 | 0 | if (kputsn(filename, end - filename, buffer) >= 0 && |
1370 | 0 | kputs(new_extension, buffer) >= 0 && |
1371 | 0 | kputs(trailing, buffer) >= 0) return buffer->s; |
1372 | 0 | else return NULL; |
1373 | 0 | } |
1374 | | |
1375 | | |
1376 | | /* |
1377 | | * ---------------------------------------------------------------------- |
1378 | | * Minimal stub functions for knet, added after the removal of |
1379 | | * hfile_net.c and knetfile.c. |
1380 | | * |
1381 | | * They exist purely for ABI compatibility, but are simply wrappers to |
1382 | | * hFILE. API should be compatible except knet_fileno (unused?). |
1383 | | * |
1384 | | * CULL THESE and knetfile.h at the next .so version bump. |
1385 | | */ |
1386 | | typedef struct knetFile_s { |
1387 | | // As per htslib/knetfile.h. Duplicated here as we don't wish to |
1388 | | // have any dependence on the deprecated knetfile.h interface, plus |
1389 | | // it's hopefully only temporary. |
1390 | | int type, fd; |
1391 | | int64_t offset; |
1392 | | char *host, *port; |
1393 | | int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready; |
1394 | | char *response, *retr, *size_cmd; |
1395 | | int64_t seek_offset; |
1396 | | int64_t file_size; |
1397 | | char *path, *http_host; |
1398 | | |
1399 | | // Our local addition |
1400 | | hFILE *hf; |
1401 | | } knetFile; |
1402 | | |
1403 | | HTSLIB_EXPORT |
1404 | 0 | knetFile *knet_open(const char *fn, const char *mode) { |
1405 | 0 | knetFile *fp = calloc(1, sizeof(*fp)); |
1406 | 0 | if (!fp) return NULL; |
1407 | 0 | if (!(fp->hf = hopen(fn, mode))) { |
1408 | 0 | free(fp); |
1409 | 0 | return NULL; |
1410 | 0 | } |
1411 | | |
1412 | | // FD backend is the only one implementing knet_fileno |
1413 | 0 | fp->fd = fp->hf->backend == &fd_backend |
1414 | 0 | ? ((hFILE_fd *)fp->hf)->fd |
1415 | 0 | : -1; |
1416 | |
|
1417 | 0 | return fp; |
1418 | 0 | } |
1419 | | |
1420 | | HTSLIB_EXPORT |
1421 | 0 | knetFile *knet_dopen(int fd, const char *mode) { |
1422 | 0 | knetFile *fp = calloc(1, sizeof(*fp)); |
1423 | 0 | if (!fp) return NULL; |
1424 | 0 | if (!(fp->hf = hdopen(fd, mode))) { |
1425 | 0 | free(fp); |
1426 | 0 | return NULL; |
1427 | 0 | } |
1428 | 0 | fp->fd = fd; |
1429 | 0 | return fp; |
1430 | 0 | } |
1431 | | |
1432 | | HTSLIB_EXPORT |
1433 | 0 | ssize_t knet_read(knetFile *fp, void *buf, size_t len) { |
1434 | 0 | ssize_t r = hread(fp->hf, buf, len); |
1435 | 0 | fp->offset += r>0?r:0; |
1436 | 0 | return r; |
1437 | 0 | } |
1438 | | |
1439 | | HTSLIB_EXPORT |
1440 | 0 | off_t knet_seek(knetFile *fp, off_t off, int whence) { |
1441 | 0 | off_t r = hseek(fp->hf, off, whence); |
1442 | 0 | if (r >= 0) |
1443 | 0 | fp->offset = r; |
1444 | 0 | return r; |
1445 | 0 | } |
1446 | | |
1447 | | HTSLIB_EXPORT |
1448 | 0 | int knet_close(knetFile *fp) { |
1449 | 0 | int r = hclose(fp->hf); |
1450 | 0 | free(fp); |
1451 | 0 | return r; |
1452 | 0 | } |