Coverage Report

Created: 2026-02-14 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/hfile.c
Line
Count
Source
1
/*  hfile.c -- buffered low-level input/output streams.
2
3
    Copyright (C) 2013-2021, 2023-2024 Genome Research Ltd.
4
5
    Author: John Marshall <jm18@sanger.ac.uk>
6
7
Permission is hereby granted, free of charge, to any person obtaining a copy
8
of this software and associated documentation files (the "Software"), to deal
9
in the Software without restriction, including without limitation the rights
10
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11
copies of the Software, and to permit persons to whom the Software is
12
furnished to do so, subject to the following conditions:
13
14
The above copyright notice and this permission notice shall be included in
15
all copies or substantial portions of the Software.
16
17
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23
DEALINGS IN THE SOFTWARE.  */
24
25
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
26
#include <config.h>
27
28
#include <stdio.h>
29
#include <stdlib.h>
30
#include <stddef.h>
31
#include <string.h>
32
#include <errno.h>
33
#include <limits.h>
34
35
#include <pthread.h>
36
37
#ifdef ENABLE_PLUGINS
38
#if defined(_WIN32) || defined(__CYGWIN__) || defined(__MSYS__)
39
#define USING_WINDOWS_PLUGIN_DLLS
40
#include <dlfcn.h>
41
#endif
42
#endif
43
44
#include "htslib/hfile.h"
45
#include "hfile_internal.h"
46
#include "htslib/kstring.h"
47
48
#ifndef ENOTSUP
49
#define ENOTSUP EINVAL
50
#endif
51
#ifndef EOVERFLOW
52
#define EOVERFLOW ERANGE
53
#endif
54
#ifndef EPROTONOSUPPORT
55
#define EPROTONOSUPPORT ENOSYS
56
#endif
57
58
#ifndef SSIZE_MAX /* SSIZE_MAX is POSIX 1 */
59
#define SSIZE_MAX LONG_MAX
60
#endif
61
62
/* hFILE fields are used as follows:
63
64
   char *buffer;     // Pointer to the start of the I/O buffer
65
   char *begin;      // First not-yet-read character / unused position
66
   char *end;        // First unfilled/unfillable position
67
   char *limit;      // Pointer to the first position past the buffer
68
69
   const hFILE_backend *backend;  // Methods to refill/flush I/O buffer
70
71
   off_t offset;     // Offset within the stream of buffer position 0
72
   unsigned at_eof:1;// For reading, whether EOF has been seen
73
   unsigned mobile:1;// Buffer is a mobile window or fixed full contents
74
   unsigned readonly:1;// Whether opened as "r" rather than "r+"/"w"/"a"
75
   int has_errno;    // Error number from the last failure on this stream
76
77
For reading, begin is the first unread character in the buffer and end is the
78
first unfilled position:
79
80
   -----------ABCDEFGHIJKLMNO---------------
81
   ^buffer    ^begin         ^end           ^limit
82
83
For writing, begin is the first unused position and end is unused so remains
84
equal to buffer:
85
86
   ABCDEFGHIJKLMNOPQRSTUVWXYZ---------------
87
   ^buffer                   ^begin         ^limit
88
   ^end
89
90
Thus if begin > end then there is a non-empty write buffer, if begin < end
91
then there is a non-empty read buffer, and if begin == end then both buffers
92
are empty.  In all cases, the stream's file position indicator corresponds
93
to the position pointed to by begin.
94
95
The above is the normal scenario of a mobile window.  For in-memory
96
streams (eg via hfile_init_fixed) the buffer can be used as the full
97
contents without any separate backend behind it.  These always have at_eof
98
set, offset set to 0, need no read() method, and should just return EINVAL
99
for seek():
100
101
   abcdefghijkLMNOPQRSTUVWXYZ------
102
   ^buffer    ^begin         ^end  ^limit
103
*/
104
HTSLIB_EXPORT
105
hFILE *hfile_init(size_t struct_size, const char *mode, size_t capacity)
106
14.6k
{
107
14.6k
    hFILE *fp = (hFILE *) malloc(struct_size);
108
14.6k
    if (fp == NULL) goto error;
109
110
14.6k
    const int maxcap = 128*1024;
111
112
14.6k
    if (capacity == 0) capacity = maxcap;
113
    // FIXME For now, clamp input buffer sizes so mpileup doesn't eat memory
114
14.6k
    if (strchr(mode, 'r') && capacity > maxcap) capacity = maxcap;
115
116
14.6k
#ifdef HAVE_POSIX_MEMALIGN
117
14.6k
    fp->buffer = NULL;
118
14.6k
    if (posix_memalign((void **)&fp->buffer, 256, capacity) < 0)
119
0
        goto error;
120
#else
121
    fp->buffer = (char *) malloc(capacity);
122
    if (fp->buffer == NULL) goto error;
123
#endif
124
125
14.6k
    fp->begin = fp->end = fp->buffer;
126
14.6k
    fp->limit = &fp->buffer[capacity];
127
128
14.6k
    fp->offset = 0;
129
14.6k
    fp->at_eof = 0;
130
14.6k
    fp->mobile = 1;
131
14.6k
    fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
132
14.6k
    fp->preserve = 0;
133
14.6k
    fp->has_errno = 0;
134
14.6k
    return fp;
135
136
0
error:
137
0
    hfile_destroy(fp);
138
0
    return NULL;
139
14.6k
}
140
141
hFILE *hfile_init_fixed(size_t struct_size, const char *mode,
142
                        char *buffer, size_t buf_filled, size_t buf_size)
143
20.4k
{
144
20.4k
    hFILE *fp = (hFILE *) malloc(struct_size);
145
20.4k
    if (fp == NULL) return NULL;
146
147
20.4k
    fp->buffer = fp->begin = buffer;
148
20.4k
    fp->end = &fp->buffer[buf_filled];
149
20.4k
    fp->limit = &fp->buffer[buf_size];
150
151
20.4k
    fp->offset = 0;
152
20.4k
    fp->at_eof = 1;
153
20.4k
    fp->mobile = 0;
154
20.4k
    fp->readonly = (strchr(mode, 'r') && ! strchr(mode, '+'));
155
20.4k
    fp->preserve = 0;
156
20.4k
    fp->has_errno = 0;
157
20.4k
    return fp;
158
20.4k
}
159
160
static const struct hFILE_backend mem_backend;
161
162
HTSLIB_EXPORT
163
void hfile_destroy(hFILE *fp)
164
35.2k
{
165
35.2k
    int save = errno;
166
35.2k
    if (fp) free(fp->buffer);
167
35.2k
    free(fp);
168
35.2k
    errno = save;
169
35.2k
}
170
171
static inline int writebuffer_is_nonempty(hFILE *fp)
172
1.42M
{
173
1.42M
    return fp->begin > fp->end;
174
1.42M
}
175
176
/* Refills the read buffer from the backend (once, so may only partially
177
   fill the buffer), returning the number of additional characters read
178
   (which might be 0), or negative when an error occurred.  */
179
static ssize_t refill_buffer(hFILE *fp)
180
30.1k
{
181
30.1k
    ssize_t n;
182
183
    // Move any unread characters to the start of the buffer
184
30.1k
    if (fp->mobile && fp->begin > fp->buffer) {
185
0
        fp->offset += fp->begin - fp->buffer;
186
0
        memmove(fp->buffer, fp->begin, fp->end - fp->begin);
187
0
        fp->end = &fp->buffer[fp->end - fp->begin];
188
0
        fp->begin = fp->buffer;
189
0
    }
190
191
    // Read into the available buffer space at fp->[end,limit)
192
30.1k
    if (fp->at_eof || fp->end == fp->limit) n = 0;
193
9
    else {
194
9
        n = fp->backend->read(fp, fp->end, fp->limit - fp->end);
195
9
        if (n < 0) { fp->has_errno = errno; return n; }
196
6
        else if (n == 0) fp->at_eof = 1;
197
9
    }
198
199
30.1k
    fp->end += n;
200
30.1k
    return n;
201
30.1k
}
202
203
/*
204
 * Changes the buffer size for an hFILE.  Ideally this is done
205
 * immediately after opening.  If performed later, this function may
206
 * fail if we are reducing the buffer size and the current offset into
207
 * the buffer is beyond the new capacity.
208
 *
209
 * Returns 0 on success;
210
 *        -1 on failure.
211
 */
212
HTSLIB_EXPORT
213
0
int hfile_set_blksize(hFILE *fp, size_t bufsiz) {
214
0
    char *buffer;
215
0
    ptrdiff_t curr_used;
216
0
    if (!fp) return -1;
217
0
    curr_used = (fp->begin > fp->end ? fp->begin : fp->end) - fp->buffer;
218
0
    if (bufsiz == 0) bufsiz = 32768;
219
220
    // Ensure buffer resize will not erase live data
221
0
    if (bufsiz < curr_used)
222
0
        return -1;
223
224
0
    if (!(buffer = (char *) realloc(fp->buffer, bufsiz))) return -1;
225
226
0
    fp->begin  = buffer + (fp->begin - fp->buffer);
227
0
    fp->end    = buffer + (fp->end   - fp->buffer);
228
0
    fp->buffer = buffer;
229
0
    fp->limit  = &fp->buffer[bufsiz];
230
231
0
    return 0;
232
0
}
233
234
/* Called only from hgetc(), when our buffer is empty.  */
235
HTSLIB_EXPORT
236
int hgetc2(hFILE *fp)
237
205
{
238
205
    return (refill_buffer(fp) > 0)? (unsigned char) *(fp->begin++) : EOF;
239
205
}
240
241
ssize_t hgetdelim(char *buffer, size_t size, int delim, hFILE *fp)
242
1.38M
{
243
1.38M
    char *found;
244
1.38M
    size_t n, copied = 0;
245
1.38M
    ssize_t got;
246
247
1.38M
    if (size < 1 || size > SSIZE_MAX) {
248
0
        fp->has_errno = errno = EINVAL;
249
0
        return -1;
250
0
    }
251
1.38M
    if (writebuffer_is_nonempty(fp)) {
252
0
        fp->has_errno = errno = EBADF;
253
0
        return -1;
254
0
    }
255
256
1.38M
    --size; /* to allow space for the NUL terminator */
257
258
1.38M
    do {
259
1.38M
        n = fp->end - fp->begin;
260
1.38M
        if (n > size - copied) n = size - copied;
261
262
        /* Look in the hFILE buffer for the delimiter */
263
1.38M
        found = memchr(fp->begin, delim, n);
264
1.38M
        if (found != NULL) {
265
1.34M
            n = found - fp->begin + 1;
266
1.34M
            memcpy(buffer + copied, fp->begin, n);
267
1.34M
            buffer[n + copied] = '\0';
268
1.34M
            fp->begin += n;
269
1.34M
            return n + copied;
270
1.34M
        }
271
272
        /* No delimiter yet, copy as much as we can and refill if necessary */
273
37.5k
        memcpy(buffer + copied, fp->begin, n);
274
37.5k
        fp->begin += n;
275
37.5k
        copied += n;
276
277
37.5k
        if (copied == size) { /* Output buffer full */
278
20.2k
            buffer[copied] = '\0';
279
20.2k
            return copied;
280
20.2k
        }
281
282
17.3k
        got = refill_buffer(fp);
283
17.3k
    } while (got > 0);
284
285
17.3k
    if (got < 0) return -1; /* Error on refill. */
286
287
17.3k
    buffer[copied] = '\0';  /* EOF, return anything that was copied. */
288
17.3k
    return copied;
289
17.3k
}
290
291
char *hgets(char *buffer, int size, hFILE *fp)
292
0
{
293
0
    if (size < 1) {
294
0
        fp->has_errno = errno = EINVAL;
295
0
        return NULL;
296
0
    }
297
0
    return hgetln(buffer, size, fp) > 0 ? buffer : NULL;
298
0
}
299
300
ssize_t hpeek(hFILE *fp, void *buffer, size_t nbytes)
301
62.6k
{
302
62.6k
    size_t n = fp->end - fp->begin;
303
62.6k
    while (n < nbytes) {
304
12.6k
        ssize_t ret = refill_buffer(fp);
305
12.6k
        if (ret < 0) return ret;
306
12.6k
        else if (ret == 0) break;
307
0
        else n += ret;
308
12.6k
    }
309
310
62.6k
    if (n > nbytes) n = nbytes;
311
62.6k
    memcpy(buffer, fp->begin, n);
312
62.6k
    return n;
313
62.6k
}
314
315
/* Called only from hread(); when called, our buffer is empty and nread bytes
316
   have already been placed in the destination buffer.  */
317
HTSLIB_EXPORT
318
ssize_t hread2(hFILE *fp, void *destv, size_t nbytes, size_t nread)
319
10
{
320
10
    const size_t capacity = fp->limit - fp->buffer;
321
10
    int buffer_invalidated = 0;
322
10
    char *dest = (char *) destv;
323
10
    dest += nread, nbytes -= nread;
324
325
    // Read large requests directly into the destination buffer
326
10
    while (nbytes * 2 >= capacity && !fp->at_eof) {
327
4
        ssize_t n = fp->backend->read(fp, dest, nbytes);
328
4
        if (n < 0) { fp->has_errno = errno; return n; }
329
0
        else if (n == 0) fp->at_eof = 1;
330
0
        else buffer_invalidated = 1;
331
0
        fp->offset += n;
332
0
        dest += n, nbytes -= n;
333
0
        nread += n;
334
0
    }
335
336
6
    if (buffer_invalidated) {
337
        // Our unread buffer is empty, so begin == end, but our already-read
338
        // buffer [buffer,begin) is likely non-empty and is no longer valid as
339
        // its contents are no longer adjacent to the file position indicator.
340
        // Discard it so that hseek() can't try to take advantage of it.
341
0
        fp->offset += fp->begin - fp->buffer;
342
0
        fp->begin = fp->end = fp->buffer;
343
0
    }
344
345
6
    while (nbytes > 0 && !fp->at_eof) {
346
0
        size_t n;
347
0
        ssize_t ret = refill_buffer(fp);
348
0
        if (ret < 0) return ret;
349
350
0
        n = fp->end - fp->begin;
351
0
        if (n > nbytes) n = nbytes;
352
0
        memcpy(dest, fp->begin, n);
353
0
        fp->begin += n;
354
0
        dest += n, nbytes -= n;
355
0
        nread += n;
356
0
    }
357
358
6
    return nread;
359
6
}
360
361
/* Flushes the write buffer, fp->[buffer,begin), out through the backend
362
   returning 0 on success or negative if an error occurred.  */
363
static ssize_t flush_buffer(hFILE *fp)
364
38.7k
{
365
38.7k
    const char *buffer = fp->buffer;
366
74.6k
    while (buffer < fp->begin) {
367
35.9k
        ssize_t n = fp->backend->write(fp, buffer, fp->begin - buffer);
368
35.9k
        if (n < 0) { fp->has_errno = errno; return n; }
369
35.9k
        buffer += n;
370
35.9k
        fp->offset += n;
371
35.9k
    }
372
373
38.7k
    fp->begin = fp->buffer;  // Leave the buffer empty
374
38.7k
    return 0;
375
38.7k
}
376
377
int hflush(hFILE *fp)
378
18.1k
{
379
18.1k
    if (flush_buffer(fp) < 0) return EOF;
380
18.1k
    if (fp->backend->flush) {
381
18.1k
        if (fp->backend->flush(fp) < 0) { fp->has_errno = errno; return EOF; }
382
18.1k
    }
383
18.1k
    return 0;
384
18.1k
}
385
386
/* Called only from hputc(), when our buffer is already full.  */
387
HTSLIB_EXPORT
388
int hputc2(int c, hFILE *fp)
389
71
{
390
71
    if (flush_buffer(fp) < 0) return EOF;
391
71
    *(fp->begin++) = c;
392
71
    return c;
393
71
}
394
395
/* Called only from hwrite() and hputs2(); when called, our buffer is either
396
   full and ncopied bytes from the source have already been copied to our
397
   buffer; or completely empty, ncopied is zero and totalbytes is greater than
398
   the buffer size.  */
399
HTSLIB_EXPORT
400
ssize_t hwrite2(hFILE *fp, const void *srcv, size_t totalbytes, size_t ncopied)
401
20.5k
{
402
20.5k
    const char *src = (const char *) srcv;
403
20.5k
    ssize_t ret;
404
20.5k
    const size_t capacity = fp->limit - fp->buffer;
405
20.5k
    size_t remaining = totalbytes - ncopied;
406
20.5k
    src += ncopied;
407
408
20.5k
    ret = flush_buffer(fp);
409
20.5k
    if (ret < 0) return ret;
410
411
    // Write large blocks out directly from the source buffer
412
22.8k
    while (remaining * 2 >= capacity) {
413
2.30k
        ssize_t n = fp->backend->write(fp, src, remaining);
414
2.30k
        if (n < 0) { fp->has_errno = errno; return n; }
415
2.30k
        fp->offset += n;
416
2.30k
        src += n, remaining -= n;
417
2.30k
    }
418
419
    // Just buffer any remaining characters
420
20.5k
    memcpy(fp->begin, src, remaining);
421
20.5k
    fp->begin += remaining;
422
423
20.5k
    return totalbytes;
424
20.5k
}
425
426
/* Called only from hputs(), when our buffer is already full.  */
427
HTSLIB_EXPORT
428
int hputs2(const char *text, size_t totalbytes, size_t ncopied, hFILE *fp)
429
0
{
430
0
    return (hwrite2(fp, text, totalbytes, ncopied) >= 0)? 0 : EOF;
431
0
}
432
433
off_t hseek(hFILE *fp, off_t offset, int whence)
434
1.21k
{
435
1.21k
    off_t curpos, pos;
436
437
1.21k
    if (writebuffer_is_nonempty(fp) && fp->mobile) {
438
0
        int ret = flush_buffer(fp);
439
0
        if (ret < 0) return ret;
440
0
    }
441
442
1.21k
    curpos = htell(fp);
443
444
    // Relative offsets are given relative to the hFILE's stream position,
445
    // which may differ from the backend's physical position due to buffering
446
    // read-ahead.  Correct for this by converting to an absolute position.
447
1.21k
    if (whence == SEEK_CUR) {
448
0
        if (curpos + offset < 0) {
449
            // Either a negative offset resulted in a position before the
450
            // start of the file, or we overflowed when given a positive offset
451
0
            fp->has_errno = errno = (offset < 0)? EINVAL : EOVERFLOW;
452
0
            return -1;
453
0
        }
454
455
0
        whence = SEEK_SET;
456
0
        offset = curpos + offset;
457
0
    }
458
    // For fixed immobile buffers, convert everything else to SEEK_SET too
459
    // so that seeking can be avoided for all (within range) requests.
460
1.21k
    else if (! fp->mobile && whence == SEEK_END) {
461
614
        size_t length = fp->end - fp->buffer;
462
614
        if (offset > 0 || -offset > length) {
463
0
            fp->has_errno = errno = EINVAL;
464
0
            return -1;
465
0
        }
466
467
614
        whence = SEEK_SET;
468
614
        offset = length + offset;
469
614
    }
470
471
    // Avoid seeking if the desired position is within our read buffer.
472
    // (But not when the next operation may be a write on a mobile buffer.)
473
1.21k
    if (whence == SEEK_SET && (! fp->mobile || fp->readonly) &&
474
1.21k
        offset >= fp->offset && offset - fp->offset <= fp->end - fp->buffer) {
475
1.21k
        fp->begin = &fp->buffer[offset - fp->offset];
476
1.21k
        return offset;
477
1.21k
    }
478
479
0
    pos = fp->backend->seek(fp, offset, whence);
480
0
    if (pos < 0) { fp->has_errno = errno; return pos; }
481
482
    // Seeking succeeded, so discard any non-empty read buffer
483
0
    fp->begin = fp->end = fp->buffer;
484
0
    fp->at_eof = 0;
485
486
0
    fp->offset = pos;
487
0
    return pos;
488
0
}
489
490
int hclose(hFILE *fp)
491
34.8k
{
492
34.8k
    int err = fp->has_errno;
493
494
34.8k
    if (writebuffer_is_nonempty(fp) && hflush(fp) < 0) err = fp->has_errno;
495
34.8k
    if (!fp->preserve) {
496
34.8k
        if (fp->backend->close(fp) < 0) err = errno;
497
34.8k
        hfile_destroy(fp);
498
34.8k
    }
499
500
34.8k
    if (err) {
501
0
        errno = err;
502
0
        return EOF;
503
0
    }
504
34.8k
    else return 0;
505
34.8k
}
506
507
void hclose_abruptly(hFILE *fp)
508
7
{
509
7
    int save = errno;
510
7
    if (fp->preserve)
511
0
        return;
512
7
    if (fp->backend->close(fp) < 0) { /* Ignore subsequent errors */ }
513
7
    hfile_destroy(fp);
514
7
    errno = save;
515
7
}
516
517
518
/***************************
519
 * File descriptor backend *
520
 ***************************/
521
522
#ifndef _WIN32
523
#include <sys/socket.h>
524
#include <sys/stat.h>
525
#define HAVE_STRUCT_STAT_ST_BLKSIZE
526
#else
527
#include <winsock2.h>
528
#define HAVE_CLOSESOCKET
529
#define HAVE_SETMODE
530
#endif
531
#include <fcntl.h>
532
#include <unistd.h>
533
534
/* For Unix, it doesn't matter whether a file descriptor is a socket.
535
   However Windows insists on send()/recv() and its own closesocket()
536
   being used when fd happens to be a socket.  */
537
538
typedef struct {
539
    hFILE base;
540
    int fd;
541
    unsigned is_socket:1, is_shared:1;
542
} hFILE_fd;
543
544
static ssize_t fd_read(hFILE *fpv, void *buffer, size_t nbytes)
545
13
{
546
13
    hFILE_fd *fp = (hFILE_fd *) fpv;
547
13
    ssize_t n;
548
13
    do {
549
13
        n = fp->is_socket? recv(fp->fd, buffer, nbytes, 0)
550
13
                         : read(fp->fd, buffer, nbytes);
551
13
    } while (n < 0 && errno == EINTR);
552
13
    return n;
553
13
}
554
555
static ssize_t fd_write(hFILE *fpv, const void *buffer, size_t nbytes)
556
38.2k
{
557
38.2k
    hFILE_fd *fp = (hFILE_fd *) fpv;
558
38.2k
    ssize_t n;
559
38.2k
    do {
560
38.2k
        n = fp->is_socket?  send(fp->fd, buffer, nbytes, 0)
561
38.2k
                         : write(fp->fd, buffer, nbytes);
562
38.2k
    } while (n < 0 && errno == EINTR);
563
#ifdef _WIN32
564
        // On windows we have no SIGPIPE.  Instead write returns
565
        // EINVAL.  We check for this and our fd being a pipe.
566
        // If so, we raise SIGTERM instead of SIGPIPE.  It's not
567
        // ideal, but I think the only alternative is extra checking
568
        // in every single piece of code.
569
        if (n < 0 && errno == EINVAL &&
570
            GetLastError() == ERROR_NO_DATA &&
571
            GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
572
            raise(SIGTERM);
573
        }
574
#endif
575
38.2k
    return n;
576
38.2k
}
577
578
static off_t fd_seek(hFILE *fpv, off_t offset, int whence)
579
0
{
580
0
    hFILE_fd *fp = (hFILE_fd *) fpv;
581
#ifdef _WIN32
582
    // On windows lseek can return non-zero values even on a pipe.  Instead
583
    // it's likely to seek somewhere within the pipe memory buffer.
584
    // This breaks bgzf_check_EOF among other things.
585
    if (GetFileType((HANDLE)_get_osfhandle(fp->fd)) == FILE_TYPE_PIPE) {
586
        errno = ESPIPE;
587
        return -1;
588
    }
589
#endif
590
591
0
    return lseek(fp->fd, offset, whence);
592
0
}
593
594
static int fd_flush(hFILE *fpv)
595
18.1k
{
596
18.1k
    int ret = 0;
597
18.1k
    do {
598
18.1k
#ifdef HAVE_FDATASYNC
599
18.1k
        hFILE_fd *fp = (hFILE_fd *) fpv;
600
18.1k
        ret = fdatasync(fp->fd);
601
#elif defined(HAVE_FSYNC)
602
        hFILE_fd *fp = (hFILE_fd *) fpv;
603
        ret = fsync(fp->fd);
604
#endif
605
        // Ignore invalid-for-fsync(2) errors due to being, e.g., a pipe,
606
        // and operation-not-supported errors (Mac OS X)
607
18.1k
        if (ret < 0 && (errno == EINVAL || errno == ENOTSUP)) ret = 0;
608
18.1k
    } while (ret < 0 && errno == EINTR);
609
18.1k
    return ret;
610
18.1k
}
611
612
static int fd_close(hFILE *fpv)
613
14.3k
{
614
14.3k
    hFILE_fd *fp = (hFILE_fd *) fpv;
615
14.3k
    int ret;
616
617
    // If we don't own the fd, return successfully without actually closing it
618
14.3k
    if (fp->is_shared) return 0;
619
620
14.3k
    do {
621
#ifdef HAVE_CLOSESOCKET
622
        ret = fp->is_socket? closesocket(fp->fd) : close(fp->fd);
623
#else
624
14.3k
        ret = close(fp->fd);
625
14.3k
#endif
626
14.3k
    } while (ret < 0 && errno == EINTR);
627
14.3k
    return ret;
628
14.3k
}
629
630
static const struct hFILE_backend fd_backend =
631
{
632
    fd_read, fd_write, fd_seek, fd_flush, fd_close
633
};
634
635
static size_t blksize(int fd)
636
14.3k
{
637
14.3k
#ifdef HAVE_STRUCT_STAT_ST_BLKSIZE
638
14.3k
    struct stat sbuf;
639
14.3k
    if (fstat(fd, &sbuf) != 0) return 0;
640
641
    // Pipes/FIFOs on linux return 4Kb here often, but it's much too small
642
    // for performant I/O.
643
14.3k
    return S_ISFIFO(sbuf.st_mode)
644
14.3k
        ? 128*1024
645
14.3k
        : sbuf.st_blksize;
646
#else
647
    return 0;
648
#endif
649
14.3k
}
650
651
static hFILE *hopen_fd(const char *filename, const char *mode)
652
14.5k
{
653
14.5k
    hFILE_fd *fp = NULL;
654
14.5k
    int fd = open(filename, hfile_oflags(mode), 0666);
655
14.5k
    if (fd < 0) goto error;
656
657
14.3k
    fp = (hFILE_fd *) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
658
14.3k
    if (fp == NULL) goto error;
659
660
14.3k
    fp->fd = fd;
661
14.3k
    fp->is_socket = 0;
662
14.3k
    fp->is_shared = 0;
663
14.3k
    fp->base.backend = &fd_backend;
664
14.3k
    return &fp->base;
665
666
155
error:
667
155
    if (fd >= 0) { int save = errno; (void) close(fd); errno = save; }
668
155
    hfile_destroy((hFILE *) fp);
669
155
    return NULL;
670
14.3k
}
671
672
// Loads the contents of filename to produced a read-only, in memory,
673
// immobile hfile.  fp is the already opened file.  We always close this
674
// input fp, irrespective of whether we error or whether we return a new
675
// immobile hfile.
676
34
static hFILE *hpreload(hFILE *fp) {
677
34
    hFILE *mem_fp;
678
34
    char *buf = NULL;
679
34
    off_t buf_sz = 0, buf_a = 0, buf_inc = 8192, len;
680
681
104
    for (;;) {
682
104
        if (buf_a - buf_sz < 5000) {
683
74
            buf_a += buf_inc;
684
74
            char *t = realloc(buf, buf_a);
685
74
            if (!t) goto err;
686
74
            buf = t;
687
74
            if (buf_inc < 1000000) buf_inc *= 1.3;
688
74
        }
689
104
        len = hread(fp, buf+buf_sz, buf_a-buf_sz);
690
104
        if (len > 0)
691
70
            buf_sz += len;
692
34
        else
693
34
            break;
694
104
    }
695
696
34
    if (len < 0) goto err;
697
30
    mem_fp = hfile_init_fixed(sizeof(hFILE), "r", buf, buf_sz, buf_a);
698
30
    if (!mem_fp) goto err;
699
30
    mem_fp->backend = &mem_backend;
700
701
30
    if (hclose(fp) < 0) {
702
0
        hclose_abruptly(mem_fp);
703
0
        goto err;
704
0
    }
705
30
    return mem_fp;
706
707
4
 err:
708
4
    free(buf);
709
4
    hclose_abruptly(fp);
710
4
    return NULL;
711
30
}
712
713
34
static int is_preload_url_remote(const char *url){
714
34
    return hisremote(url + 8); // len("preload:") = 8
715
34
}
716
717
34
static hFILE *hopen_preload(const char *url, const char *mode){
718
34
    hFILE* fp = hopen(url + 8, mode);
719
34
    return fp ? hpreload(fp) : NULL;
720
34
}
721
722
hFILE *hdopen(int fd, const char *mode)
723
6
{
724
6
    hFILE_fd *fp = (hFILE_fd*) hfile_init(sizeof (hFILE_fd), mode, blksize(fd));
725
6
    if (fp == NULL) return NULL;
726
727
6
    fp->fd = fd;
728
6
    fp->is_socket = (strchr(mode, 's') != NULL);
729
6
    fp->is_shared = (strchr(mode, 'S') != NULL);
730
6
    fp->base.backend = &fd_backend;
731
6
    return &fp->base;
732
6
}
733
734
static hFILE *hopen_fd_fileuri(const char *url, const char *mode)
735
1
{
736
1
    if (strncmp(url, "file://localhost/", 17) == 0) url += 16;
737
1
    else if (strncmp(url, "file:///", 8) == 0) url += 7;
738
1
    else { errno = EPROTONOSUPPORT; return NULL; }
739
740
#if defined(_WIN32) || defined(__MSYS__)
741
    // For cases like C:/foo
742
    if (url[0] == '/' && url[1] && url[2] == ':' && url[3] == '/') url++;
743
#endif
744
745
0
    return hopen_fd(url, mode);
746
1
}
747
748
static hFILE *hopen_fd_stdinout(const char *mode)
749
6
{
750
6
    int fd = (strchr(mode, 'r') != NULL)? STDIN_FILENO : STDOUT_FILENO;
751
6
    char mode_shared[101];
752
6
    snprintf(mode_shared, sizeof mode_shared, "S%s", mode);
753
#if defined HAVE_SETMODE && defined O_BINARY
754
    if (setmode(fd, O_BINARY) < 0) return NULL;
755
#endif
756
6
    return hdopen(fd, mode_shared);
757
6
}
758
759
HTSLIB_EXPORT
760
int hfile_oflags(const char *mode)
761
14.5k
{
762
14.5k
    int rdwr = 0, flags = 0;
763
14.5k
    const char *s;
764
38.0k
    for (s = mode; *s; s++)
765
23.5k
        switch (*s) {
766
162
        case 'r': rdwr = O_RDONLY;  break;
767
14.3k
        case 'w': rdwr = O_WRONLY; flags |= O_CREAT | O_TRUNC;  break;
768
0
        case 'a': rdwr = O_WRONLY; flags |= O_CREAT | O_APPEND;  break;
769
0
        case '+': rdwr = O_RDWR;  break;
770
0
#ifdef O_CLOEXEC
771
0
        case 'e': flags |= O_CLOEXEC;  break;
772
0
#endif
773
0
#ifdef O_EXCL
774
0
        case 'x': flags |= O_EXCL;  break;
775
0
#endif
776
8.97k
        default:  break;
777
23.5k
        }
778
779
#ifdef O_BINARY
780
    flags |= O_BINARY;
781
#endif
782
783
14.5k
    return rdwr | flags;
784
14.5k
}
785
786
787
/*********************
788
 * In-memory backend *
789
 *********************/
790
791
#include "hts_internal.h"
792
793
typedef struct {
794
    hFILE base;
795
} hFILE_mem;
796
797
static off_t mem_seek(hFILE *fpv, off_t offset, int whence)
798
0
{
799
0
    errno = EINVAL;
800
0
    return -1;
801
0
}
802
803
static int mem_close(hFILE *fpv)
804
20.4k
{
805
20.4k
    return 0;
806
20.4k
}
807
808
static const struct hFILE_backend mem_backend =
809
{
810
    NULL, NULL, mem_seek, NULL, mem_close
811
};
812
813
static int cmp_prefix(const char *key, const char *s)
814
161
{
815
161
    while (*key)
816
161
        if (tolower_c(*s) != *key) return +1;
817
0
        else s++, key++;
818
819
0
    return 0;
820
161
}
821
822
static hFILE *create_hfile_mem(char* buffer, const char* mode, size_t buf_filled, size_t buf_size)
823
20.4k
{
824
20.4k
    hFILE_mem *fp = (hFILE_mem *) hfile_init_fixed(sizeof(hFILE_mem), mode, buffer, buf_filled, buf_size);
825
20.4k
    if (fp == NULL)
826
0
        return NULL;
827
828
20.4k
    fp->base.backend = &mem_backend;
829
20.4k
    return &fp->base;
830
20.4k
}
831
832
static hFILE *hopen_mem(const char *url, const char *mode)
833
380
{
834
380
    size_t length, size;
835
380
    char *buffer;
836
380
    const char *data, *comma = strchr(url, ',');
837
380
    if (comma == NULL) { errno = EINVAL; return NULL; }
838
341
    data = comma+1;
839
840
    // TODO Implement write modes
841
341
    if (strchr(mode, 'r') == NULL) { errno = EROFS; return NULL; }
842
843
291
    if (comma - url >= 7 && cmp_prefix(";base64", &comma[-7]) == 0) {
844
0
        size = hts_base64_decoded_length(strlen(data));
845
0
        buffer = malloc(size);
846
0
        if (buffer == NULL) return NULL;
847
0
        hts_decode_base64(buffer, &length, data);
848
0
    }
849
291
    else {
850
291
        size = strlen(data) + 1;
851
291
        buffer = malloc(size);
852
291
        if (buffer == NULL) return NULL;
853
291
        hts_decode_percent(buffer, &length, data);
854
291
    }
855
291
    hFILE* hf;
856
857
291
    if(!(hf = create_hfile_mem(buffer, mode, length, size))){
858
0
        free(buffer);
859
0
        return NULL;
860
0
    }
861
862
291
    return hf;
863
291
}
864
865
static hFILE *hopenv_mem(const char *filename, const char *mode, va_list args)
866
20.1k
{
867
20.1k
    char* buffer = va_arg(args, char*);
868
20.1k
    size_t sz = va_arg(args, size_t);
869
20.1k
    va_end(args);
870
871
20.1k
    hFILE* hf;
872
873
20.1k
    if(!(hf = create_hfile_mem(buffer, mode, sz, sz))){
874
0
        free(buffer);
875
0
        return NULL;
876
0
    }
877
878
20.1k
    return hf;
879
20.1k
}
880
881
0
char *hfile_mem_get_buffer(hFILE *file, size_t *length) {
882
0
    if (file->backend != &mem_backend) {
883
0
        errno = EINVAL;
884
0
        return NULL;
885
0
    }
886
887
0
    if (length)
888
0
        *length = file->buffer - file->limit;
889
890
0
    return file->buffer;
891
0
}
892
893
0
char *hfile_mem_steal_buffer(hFILE *file, size_t *length) {
894
0
    char *buf = hfile_mem_get_buffer(file, length);
895
0
    if (buf)
896
0
        file->buffer = NULL;
897
0
    return buf;
898
0
}
899
900
// open() stub for mem: which only works with the vopen() interface
901
// Use 'data:,' for data encoded in the URL
902
0
static hFILE *hopen_not_supported(const char *fname, const char *mode) {
903
0
    errno = EINVAL;
904
0
    return NULL;
905
0
}
906
907
int hfile_plugin_init_mem(struct hFILE_plugin *self)
908
1
{
909
    // mem files are declared remote so they work with a tabix index
910
1
    static const struct hFILE_scheme_handler handler =
911
1
            {hopen_not_supported, hfile_always_remote, "mem", 2000 + 50, hopenv_mem};
912
1
    self->name = "mem";
913
1
    hfile_add_scheme_handler("mem", &handler);
914
1
    return 0;
915
1
}
916
917
/**********************************************************************
918
 * Dummy crypt4gh plug-in.  Does nothing apart from advise how to get *
919
 * the real one.  It will be overridden by the actual plug-in.        *
920
 **********************************************************************/
921
922
static hFILE *crypt4gh_needed(const char *url, const char *mode)
923
0
{
924
0
    const char *u = strncmp(url, "crypt4gh:", 9) == 0 ? url + 9 : url;
925
#if defined(ENABLE_PLUGINS)
926
    const char *enable_plugins = "";
927
#else
928
0
    const char *enable_plugins = "You also need to rebuild HTSlib with plug-ins enabled.\n";
929
0
#endif
930
931
0
    hts_log_error("Accessing \"%s\" needs the crypt4gh plug-in.\n"
932
0
                  "It can be found at "
933
0
                  "https://github.com/samtools/htslib-crypt4gh\n"
934
0
                  "%s"
935
0
                  "If you have the plug-in, please ensure it can be "
936
0
                  "found on your HTS_PATH.",
937
0
                  u, enable_plugins);
938
939
0
    errno = EPROTONOSUPPORT;
940
0
    return NULL;
941
0
}
942
943
int hfile_plugin_init_crypt4gh_needed(struct hFILE_plugin *self)
944
1
{
945
1
    static const struct hFILE_scheme_handler handler =
946
1
        { crypt4gh_needed, hfile_always_local, "crypt4gh-needed", 0, NULL };
947
1
    self->name = "crypt4gh-needed";
948
1
    hfile_add_scheme_handler("crypt4gh", &handler);
949
1
    return 0;
950
1
}
951
952
953
/*****************************************
954
 * Plugin and hopen() backend dispatcher *
955
 *****************************************/
956
957
#include "htslib/khash.h"
958
959
KHASH_MAP_INIT_STR(scheme_string, const struct hFILE_scheme_handler *)
960
static khash_t(scheme_string) *schemes = NULL;
961
962
struct hFILE_plugin_list {
963
    struct hFILE_plugin plugin;
964
    struct hFILE_plugin_list *next;
965
};
966
967
static struct hFILE_plugin_list *plugins = NULL;
968
static pthread_mutex_t plugins_lock = PTHREAD_MUTEX_INITIALIZER;
969
970
void hfile_shutdown(int do_close_plugin)
971
1
{
972
1
    pthread_mutex_lock(&plugins_lock);
973
974
1
    if (schemes) {
975
1
        kh_destroy(scheme_string, schemes);
976
1
        schemes = NULL;
977
1
    }
978
979
6
    while (plugins != NULL) {
980
5
        struct hFILE_plugin_list *p = plugins;
981
5
        if (p->plugin.destroy) p->plugin.destroy();
982
#ifdef ENABLE_PLUGINS
983
        if (p->plugin.obj && do_close_plugin) close_plugin(p->plugin.obj);
984
#endif
985
5
        plugins = p->next;
986
5
        free(p);
987
5
    }
988
989
1
    pthread_mutex_unlock(&plugins_lock);
990
1
}
991
992
static void hfile_exit(void)
993
1
{
994
1
    hfile_shutdown(0);
995
1
    pthread_mutex_destroy(&plugins_lock);
996
1
}
997
998
static inline int priority(const struct hFILE_scheme_handler *handler)
999
2
{
1000
2
    return handler->priority % 1000;
1001
2
}
1002
1003
#ifdef USING_WINDOWS_PLUGIN_DLLS
1004
/*
1005
 * Work-around for Windows plug-in dlls where the plug-in could be
1006
 * using a different HTSlib library to the executable (for example
1007
 * because the latter was build against a static libhts.a).  When this
1008
 * happens, the plug-in can call the wrong copy of hfile_add_scheme_handler().
1009
 * If this is detected, it calls this function which attempts to fix the
1010
 * problem by redirecting to the hfile_add_scheme_handler() in the main
1011
 * executable.
1012
 */
1013
static int try_exe_add_scheme_handler(const char *scheme,
1014
                                      const struct hFILE_scheme_handler *handler)
1015
{
1016
    static void (*add_scheme_handler)(const char *scheme,
1017
                                      const struct hFILE_scheme_handler *handler);
1018
    if (!add_scheme_handler) {
1019
        // dlopen the main executable and resolve hfile_add_scheme_handler
1020
        void *exe_handle = dlopen(NULL, RTLD_LAZY);
1021
        if (!exe_handle) return -1;
1022
        *(void **) (&add_scheme_handler) = dlsym(exe_handle, "hfile_add_scheme_handler");
1023
        dlclose(exe_handle);
1024
    }
1025
    // Check that the symbol was obtained and isn't the one in this copy
1026
    // of the library (to avoid infinite recursion)
1027
    if (!add_scheme_handler || add_scheme_handler == hfile_add_scheme_handler)
1028
        return -1;
1029
    add_scheme_handler(scheme, handler);
1030
    return 0;
1031
}
1032
#else
1033
static int try_exe_add_scheme_handler(const char *scheme,
1034
                                      const struct hFILE_scheme_handler *handler)
1035
0
{
1036
0
    return -1;
1037
0
}
1038
#endif
1039
1040
HTSLIB_EXPORT
1041
void hfile_add_scheme_handler(const char *scheme,
1042
                              const struct hFILE_scheme_handler *handler)
1043
34
{
1044
34
    int absent;
1045
34
    if (handler->open == NULL || handler->isremote == NULL) {
1046
0
        hts_log_warning("Couldn't register scheme handler for %s: missing method", scheme);
1047
0
        return;
1048
0
    }
1049
34
    if (!schemes) {
1050
0
        if (try_exe_add_scheme_handler(scheme, handler) != 0) {
1051
0
            hts_log_warning("Couldn't register scheme handler for %s", scheme);
1052
0
        }
1053
0
        return;
1054
0
    }
1055
34
    khint_t k = kh_put(scheme_string, schemes, scheme, &absent);
1056
34
    if (absent < 0) {
1057
0
        hts_log_warning("Couldn't register scheme handler for %s : %s",
1058
0
                        scheme, strerror(errno));
1059
0
        return;
1060
0
    }
1061
34
    if (absent || priority(handler) > priority(kh_value(schemes, k))) {
1062
33
        kh_value(schemes, k) = handler;
1063
33
    }
1064
34
}
1065
1066
static int init_add_plugin(void *obj, int (*init)(struct hFILE_plugin *),
1067
                           const char *pluginname)
1068
5
{
1069
5
    struct hFILE_plugin_list *p = malloc (sizeof (struct hFILE_plugin_list));
1070
5
    if (p == NULL) {
1071
0
        hts_log_debug("Failed to allocate memory for plugin \"%s\"", pluginname);
1072
0
        return -1;
1073
0
    }
1074
1075
5
    p->plugin.api_version = 1;
1076
5
    p->plugin.obj = obj;
1077
5
    p->plugin.name = NULL;
1078
5
    p->plugin.destroy = NULL;
1079
1080
5
    int ret = (*init)(&p->plugin);
1081
1082
5
    if (ret != 0) {
1083
0
        hts_log_debug("Initialisation failed for plugin \"%s\": %d", pluginname, ret);
1084
0
        free(p);
1085
0
        return ret;
1086
0
    }
1087
1088
5
    hts_log_debug("Loaded \"%s\"", pluginname);
1089
1090
5
    p->next = plugins, plugins = p;
1091
5
    return 0;
1092
5
}
1093
1094
/*
1095
 * Returns 0 on success,
1096
 *        <0 on failure
1097
 */
1098
static int load_hfile_plugins(void)
1099
1
{
1100
1
    static const struct hFILE_scheme_handler
1101
1
        data = { hopen_mem, hfile_always_local, "built-in", 80 },
1102
1
        file = { hopen_fd_fileuri, hfile_always_local, "built-in", 80 },
1103
1
        preload = { hopen_preload, is_preload_url_remote, "built-in", 80 };
1104
1105
1
    schemes = kh_init(scheme_string);
1106
1
    if (schemes == NULL)
1107
0
        return -1;
1108
1109
1
    hfile_add_scheme_handler("data", &data);
1110
1
    hfile_add_scheme_handler("file", &file);
1111
1
    hfile_add_scheme_handler("preload", &preload);
1112
1
    init_add_plugin(NULL, hfile_plugin_init_mem, "mem");
1113
1
    init_add_plugin(NULL, hfile_plugin_init_crypt4gh_needed, "crypt4gh-needed");
1114
1115
#ifdef ENABLE_PLUGINS
1116
    struct hts_path_itr path;
1117
    const char *pluginname;
1118
    hts_path_itr_setup(&path, NULL, NULL, "hfile_", 6, NULL, 0);
1119
    while ((pluginname = hts_path_itr_next(&path)) != NULL) {
1120
        void *obj;
1121
        int (*init)(struct hFILE_plugin *) = (int (*)(struct hFILE_plugin *))
1122
            load_plugin(&obj, pluginname, "hfile_plugin_init");
1123
1124
        if (init) {
1125
            if (init_add_plugin(obj, init, pluginname) != 0)
1126
                close_plugin(obj);
1127
        }
1128
    }
1129
#else
1130
1131
1
#ifdef HAVE_LIBCURL
1132
1
    init_add_plugin(NULL, hfile_plugin_init_libcurl, "libcurl");
1133
1
#endif
1134
1
#ifdef ENABLE_GCS
1135
1
    init_add_plugin(NULL, hfile_plugin_init_gcs, "gcs");
1136
1
#endif
1137
1
#ifdef ENABLE_S3
1138
1
    init_add_plugin(NULL, hfile_plugin_init_s3, "s3");
1139
1
#endif
1140
1141
1
#endif
1142
1143
    // In the unlikely event atexit() fails, it's better to succeed here and
1144
    // carry on; then eventually when the program exits, we'll merely close
1145
    // down the plugins uncleanly, as if we had aborted.
1146
1
    (void) atexit(hfile_exit);
1147
1148
1
    return 0;
1149
1
}
1150
1151
/* A filename like "foo:bar" in which we don't recognise the scheme is
1152
   either an ordinary file or an indication of a missing or broken plugin.
1153
   Try to open it as an ordinary file; but if there's no such file, set
1154
   errno distinctively to make the plugin issue apparent.  */
1155
static hFILE *hopen_unknown_scheme(const char *fname, const char *mode)
1156
41
{
1157
41
    hFILE *fp = hopen_fd(fname, mode);
1158
41
    if (fp == NULL && errno == ENOENT) errno = EPROTONOSUPPORT;
1159
41
    return fp;
1160
41
}
1161
1162
/* Returns the appropriate handler, or NULL if the string isn't an URL.  */
1163
static const struct hFILE_scheme_handler *find_scheme_handler(const char *s)
1164
41.6k
{
1165
41.6k
    static const struct hFILE_scheme_handler unknown_scheme =
1166
41.6k
        { hopen_unknown_scheme, hfile_always_local, "built-in", 0 };
1167
1168
41.6k
    char scheme[12];
1169
41.6k
    int i;
1170
1171
129k
    for (i = 0; i < sizeof scheme; i++)
1172
129k
        if (isalnum_c(s[i]) || s[i] == '+' || s[i] == '-' || s[i] == '.')
1173
88.0k
            scheme[i] = tolower_c(s[i]);
1174
41.6k
        else if (s[i] == ':') break;
1175
19.9k
        else return NULL;
1176
1177
    // 1 byte schemes are likely windows C:/foo pathnames
1178
21.6k
    if (i <= 1 || i >= sizeof scheme) return NULL;
1179
21.6k
    scheme[i] = '\0';
1180
1181
21.6k
    pthread_mutex_lock(&plugins_lock);
1182
21.6k
    if (!schemes && load_hfile_plugins() < 0) {
1183
0
        pthread_mutex_unlock(&plugins_lock);
1184
0
        return NULL;
1185
0
    }
1186
21.6k
    pthread_mutex_unlock(&plugins_lock);
1187
1188
21.6k
    khint_t k = kh_get(scheme_string, schemes, scheme);
1189
21.6k
    return (k != kh_end(schemes))? kh_value(schemes, k) : &unknown_scheme;
1190
21.6k
}
1191
1192
1193
/***************************
1194
 * Library introspection functions
1195
 ***************************/
1196
1197
/*
1198
 * Fills out sc_list[] with the list of known URL schemes.
1199
 * This can be restricted to just ones from a specific plugin,
1200
 * or all (plugin == NULL).
1201
 *
1202
 * Returns number of schemes found on success;
1203
 *        -1 on failure.
1204
 */
1205
HTSLIB_EXPORT
1206
int hfile_list_schemes(const char *plugin, const char *sc_list[], int *nschemes)
1207
0
{
1208
0
    pthread_mutex_lock(&plugins_lock);
1209
0
    if (!schemes && load_hfile_plugins() < 0) {
1210
0
        pthread_mutex_unlock(&plugins_lock);
1211
0
        return -1;
1212
0
    }
1213
0
    pthread_mutex_unlock(&plugins_lock);
1214
1215
0
    khiter_t k;
1216
0
    int ns = 0;
1217
1218
0
    for (k = kh_begin(schemes); k != kh_end(schemes); k++) {
1219
0
        if (!kh_exist(schemes, k))
1220
0
            continue;
1221
1222
0
        const struct hFILE_scheme_handler *s = kh_value(schemes, k);
1223
0
        if (plugin && strcmp(s->provider, plugin) != 0)
1224
0
            continue;
1225
1226
0
        if (ns < *nschemes)
1227
0
            sc_list[ns] = kh_key(schemes, k);
1228
0
        ns++;
1229
0
    }
1230
1231
0
    if (*nschemes > ns)
1232
0
        *nschemes = ns;
1233
1234
0
    return ns;
1235
0
}
1236
1237
1238
/*
1239
 * Fills out plist[] with the list of known hFILE plugins.
1240
 *
1241
 * Returns number of schemes found on success;
1242
 *        -1 on failure
1243
 */
1244
HTSLIB_EXPORT
1245
int hfile_list_plugins(const char *plist[], int *nplugins)
1246
0
{
1247
0
    pthread_mutex_lock(&plugins_lock);
1248
0
    if (!schemes && load_hfile_plugins() < 0) {
1249
0
        pthread_mutex_unlock(&plugins_lock);
1250
0
        return -1;
1251
0
    }
1252
0
    pthread_mutex_unlock(&plugins_lock);
1253
1254
0
    int np = 0;
1255
0
    if (*nplugins)
1256
0
        plist[np++] = "built-in";
1257
1258
0
    struct hFILE_plugin_list *p = plugins;
1259
0
    while (p) {
1260
0
        if (np < *nplugins)
1261
0
            plist[np] = p->plugin.name;
1262
1263
0
        p = p->next;
1264
0
        np++;
1265
0
    }
1266
1267
0
    if (*nplugins > np)
1268
0
        *nplugins = np;
1269
1270
0
    return np;
1271
0
}
1272
1273
1274
/*
1275
 * Tests for the presence of a specific hFILE plugin.
1276
 *
1277
 * Returns 1 if true
1278
 *         0 otherwise
1279
 */
1280
HTSLIB_EXPORT
1281
int hfile_has_plugin(const char *name)
1282
0
{
1283
0
    pthread_mutex_lock(&plugins_lock);
1284
0
    if (!schemes && load_hfile_plugins() < 0) {
1285
0
        pthread_mutex_unlock(&plugins_lock);
1286
0
        return -1;
1287
0
    }
1288
0
    pthread_mutex_unlock(&plugins_lock);
1289
1290
0
    struct hFILE_plugin_list *p = plugins;
1291
0
    while (p) {
1292
0
        if (strcmp(p->plugin.name, name) == 0)
1293
0
            return 1;
1294
0
        p = p->next;
1295
0
    }
1296
1297
0
    return 0;
1298
0
}
1299
1300
/***************************
1301
 * hFILE interface proper
1302
 ***************************/
1303
1304
hFILE *hopen(const char *fname, const char *mode, ...)
1305
35.4k
{
1306
35.4k
    const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1307
35.4k
    if (handler) {
1308
20.9k
        if (strchr(mode, ':') == NULL
1309
20.1k
            || handler->priority < 2000
1310
20.1k
            || handler->vopen == NULL) {
1311
815
            return handler->open(fname, mode);
1312
815
        }
1313
20.1k
        else {
1314
20.1k
            hFILE *fp;
1315
20.1k
            va_list arg;
1316
20.1k
            va_start(arg, mode);
1317
20.1k
            fp = handler->vopen(fname, mode, arg);
1318
20.1k
            va_end(arg);
1319
20.1k
            return fp;
1320
20.1k
        }
1321
20.9k
    }
1322
14.4k
    else if (strcmp(fname, "-") == 0) return hopen_fd_stdinout(mode);
1323
14.4k
    else return hopen_fd(fname, mode);
1324
35.4k
}
1325
1326
HTSLIB_EXPORT
1327
372
int hfile_always_local (const char *fname) { return 0; }
1328
1329
HTSLIB_EXPORT
1330
303
int hfile_always_remote(const char *fname) { return 1; }
1331
1332
int hisremote(const char *fname)
1333
6.22k
{
1334
6.22k
    const struct hFILE_scheme_handler *handler = find_scheme_handler(fname);
1335
6.22k
    return handler? handler->isremote(fname) : 0;
1336
6.22k
}
1337
1338
// Remove an extension, if any, from the basename part of [start,limit).
1339
// Note: Doesn't notice percent-encoded '.' and '/' characters. Don't do that.
1340
static const char *strip_extension(const char *start, const char *limit)
1341
0
{
1342
0
    const char *s = limit;
1343
0
    while (s > start) {
1344
0
        --s;
1345
0
        if (*s == '.') return s;
1346
0
        else if (*s == '/') break;
1347
0
    }
1348
0
    return limit;
1349
0
}
1350
1351
char *haddextension(struct kstring_t *buffer, const char *filename,
1352
                    int replace, const char *new_extension)
1353
0
{
1354
0
    const char *trailing, *end;
1355
1356
0
    if (find_scheme_handler(filename)) {
1357
        // URL, so alter extensions before any trailing query or fragment parts
1358
        // Allow # symbols in s3 URLs
1359
0
        trailing = filename + ((strncmp(filename, "s3://", 5) && strncmp(filename, "s3+http://", 10) && strncmp(filename, "s3+https://", 11))  ? strcspn(filename, "?#") : strcspn(filename, "?"));
1360
0
    }
1361
0
    else {
1362
        // Local path, so alter extensions at the end of the filename
1363
0
        trailing = strchr(filename, '\0');
1364
0
    }
1365
1366
0
    end = replace? strip_extension(filename, trailing) : trailing;
1367
1368
0
    buffer->l = 0;
1369
0
    if (kputsn(filename, end - filename, buffer) >= 0 &&
1370
0
        kputs(new_extension, buffer) >= 0 &&
1371
0
        kputs(trailing, buffer) >= 0) return buffer->s;
1372
0
    else return NULL;
1373
0
}
1374
1375
1376
/*
1377
 * ----------------------------------------------------------------------
1378
 * Minimal stub functions for knet, added after the removal of
1379
 * hfile_net.c and knetfile.c.
1380
 *
1381
 * They exist purely for ABI compatibility, but are simply wrappers to
1382
 * hFILE.  API should be compatible except knet_fileno (unused?).
1383
 *
1384
 * CULL THESE and knetfile.h at the next .so version bump.
1385
 */
1386
typedef struct knetFile_s {
1387
    // As per htslib/knetfile.h.  Duplicated here as we don't wish to
1388
    // have any dependence on the deprecated knetfile.h interface, plus
1389
    // it's hopefully only temporary.
1390
    int type, fd;
1391
    int64_t offset;
1392
    char *host, *port;
1393
    int ctrl_fd, pasv_ip[4], pasv_port, max_response, no_reconnect, is_ready;
1394
    char *response, *retr, *size_cmd;
1395
    int64_t seek_offset;
1396
    int64_t file_size;
1397
    char *path, *http_host;
1398
1399
    // Our local addition
1400
    hFILE *hf;
1401
} knetFile;
1402
1403
HTSLIB_EXPORT
1404
0
knetFile *knet_open(const char *fn, const char *mode) {
1405
0
    knetFile *fp = calloc(1, sizeof(*fp));
1406
0
    if (!fp) return NULL;
1407
0
    if (!(fp->hf = hopen(fn, mode))) {
1408
0
        free(fp);
1409
0
        return NULL;
1410
0
    }
1411
1412
    // FD backend is the only one implementing knet_fileno
1413
0
    fp->fd = fp->hf->backend == &fd_backend
1414
0
        ? ((hFILE_fd *)fp->hf)->fd
1415
0
        : -1;
1416
1417
0
    return fp;
1418
0
}
1419
1420
HTSLIB_EXPORT
1421
0
knetFile *knet_dopen(int fd, const char *mode) {
1422
0
    knetFile *fp = calloc(1, sizeof(*fp));
1423
0
    if (!fp) return NULL;
1424
0
    if (!(fp->hf = hdopen(fd, mode))) {
1425
0
        free(fp);
1426
0
        return NULL;
1427
0
    }
1428
0
    fp->fd = fd;
1429
0
    return fp;
1430
0
}
1431
1432
HTSLIB_EXPORT
1433
0
ssize_t knet_read(knetFile *fp, void *buf, size_t len) {
1434
0
    ssize_t r = hread(fp->hf, buf, len);
1435
0
    fp->offset += r>0?r:0;
1436
0
    return r;
1437
0
}
1438
1439
HTSLIB_EXPORT
1440
0
off_t knet_seek(knetFile *fp, off_t off, int whence) {
1441
0
    off_t r = hseek(fp->hf, off, whence);
1442
0
    if (r >= 0)
1443
0
        fp->offset = r;
1444
0
    return r;
1445
0
}
1446
1447
HTSLIB_EXPORT
1448
0
int knet_close(knetFile *fp) {
1449
0
    int r = hclose(fp->hf);
1450
0
    free(fp);
1451
0
    return r;
1452
0
}