Coverage Report

Created: 2025-11-09 07:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/htslib/hts.h
Line
Count
Source
1
/// @file htslib/hts.h
2
/// Format-neutral I/O, indexing, and iterator API functions.
3
/*
4
    Copyright (C) 2012-2022 Genome Research Ltd.
5
    Copyright (C) 2010, 2012 Broad Institute.
6
    Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk>
7
8
    Author: Heng Li <lh3@sanger.ac.uk>
9
10
Permission is hereby granted, free of charge, to any person obtaining a copy
11
of this software and associated documentation files (the "Software"), to deal
12
in the Software without restriction, including without limitation the rights
13
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
copies of the Software, and to permit persons to whom the Software is
15
furnished to do so, subject to the following conditions:
16
17
The above copyright notice and this permission notice shall be included in
18
all copies or substantial portions of the Software.
19
20
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26
DEALINGS IN THE SOFTWARE.  */
27
28
#ifndef HTSLIB_HTS_H
29
#define HTSLIB_HTS_H
30
31
#include <stddef.h>
32
#include <stdint.h>
33
#include <inttypes.h>
34
35
#include "hts_defs.h"
36
#include "hts_log.h"
37
#include "kstring.h"
38
#include "kroundup.h"
39
40
#ifdef __cplusplus
41
extern "C" {
42
#endif
43
44
// Separator used to split HTS_PATH (for plugins); REF_PATH (cram references)
45
#if defined(_WIN32) || defined(__MSYS__)
46
#define HTS_PATH_SEPARATOR_CHAR ';'
47
#define HTS_PATH_SEPARATOR_STR  ";"
48
#else
49
17
#define HTS_PATH_SEPARATOR_CHAR ':'
50
#define HTS_PATH_SEPARATOR_STR  ":"
51
#endif
52
53
#ifndef HTS_BGZF_TYPEDEF
54
typedef struct BGZF BGZF;
55
#define HTS_BGZF_TYPEDEF
56
#endif
57
struct cram_fd;
58
struct hFILE;
59
struct hts_tpool;
60
struct sam_hdr_t;
61
62
/**
63
 * @hideinitializer
64
 * Deprecated macro to expand a dynamic array of a given type
65
 *
66
 * @param         type_t The type of the array elements
67
 * @param[in]     n      Requested number of elements of type type_t
68
 * @param[in,out] m      Size of memory allocated
69
 * @param[in,out] ptr    Pointer to the array
70
 *
71
 * @discussion
72
 * Do not use this macro.  Use hts_resize() instead as allows allocation
73
 * failures to be handled more gracefully.
74
 *
75
 * The array *ptr will be expanded if necessary so that it can hold @p n
76
 * or more elements.  If the array is expanded then the new size will be
77
 * written to @p m and the value in @p ptr may change.
78
 *
79
 * It must be possible to take the address of @p ptr and @p m must be usable
80
 * as an lvalue.
81
 *
82
 * @bug
83
 * If the memory allocation fails, this will call exit(1).  This is
84
 * not ideal behaviour in a library.
85
 */
86
73.9k
#define hts_expand(type_t, n, m, ptr) do {                              \
87
73.9k
        if ((n) > (m)) {                                                \
88
5.14k
            size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
89
5.14k
                                      int, void **, const char *);      \
90
5.14k
            (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
91
5.14k
                                     sizeof(type_t),  0,                \
92
5.14k
                                     (void **)&(ptr), __func__);        \
93
5.14k
        }                                                               \
94
73.9k
    } while (0)
95
96
/**
97
 * @hideinitializer
98
 * Macro to expand a dynamic array, zeroing any newly-allocated memory
99
 *
100
 * @param         type_t The type of the array elements
101
 * @param[in]     n      Requested number of elements of type type_t
102
 * @param[in,out] m      Size of memory allocated
103
 * @param[in,out] ptr    Pointer to the array
104
 *
105
 * @discussion
106
 * Do not use this macro.  Use hts_resize() instead as allows allocation
107
 * failures to be handled more gracefully.
108
 *
109
 * As for hts_expand(), except the bytes that make up the array elements
110
 * between the old and new values of @p m are set to zero using memset().
111
 *
112
 * @bug
113
 * If the memory allocation fails, this will call exit(1).  This is
114
 * not ideal behaviour in a library.
115
 */
116
117
118
0
#define hts_expand0(type_t, n, m, ptr) do {                             \
119
0
        if ((n) > (m)) {                                                \
120
0
            size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
121
0
                                      int, void **, const char *);      \
122
0
            (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
123
0
                                     sizeof(type_t), 1,                 \
124
0
                                     (void **)&(ptr), __func__);        \
125
0
        }                                                               \
126
0
    } while (0)
127
128
// For internal use (by hts_resize()) only
129
HTSLIB_EXPORT
130
int hts_resize_array_(size_t, size_t, size_t, void *, void **, int,
131
                      const char *);
132
133
27.4k
#define HTS_RESIZE_CLEAR 1
134
135
/**
136
 * @hideinitializer
137
 * Macro to expand a dynamic array of a given type
138
 *
139
 * @param         type_t    The type of the array elements
140
 * @param[in]     num       Requested number of elements of type type_t
141
 * @param[in,out] size_ptr  Pointer to where the size (in elements) of the
142
                            array is stored.
143
 * @param[in,out] ptr       Location of the pointer to the array
144
 * @param[in]     flags     Option flags
145
 *
146
 * @return        0 for success, or negative if an error occurred.
147
 *
148
 * @discussion
149
 * The array *ptr will be expanded if necessary so that it can hold @p num
150
 * or more elements.  If the array is expanded then the new size will be
151
 * written to @p *size_ptr and the value in @p *ptr may change.
152
 *
153
 * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will
154
 * be cleared.
155
 */
156
157
#define hts_resize(type_t, num, size_ptr, ptr, flags)       \
158
112k
    ((num) > (*(size_ptr))                                  \
159
112k
     ? hts_resize_array_(sizeof(type_t), (num),             \
160
27.4k
                         sizeof(*(size_ptr)), (size_ptr),   \
161
27.4k
                         (void **)(ptr), (flags), __func__) \
162
112k
     : 0)
163
164
/// Release resources when dlclosing a dynamically loaded HTSlib
165
/** @discussion
166
 *  Normally HTSlib cleans up automatically when your program exits,
167
 *  whether that is via exit(3) or returning from main(). However if you
168
 *  have dlopen(3)ed HTSlib and wish to close it before your main program
169
 *  exits, you must call hts_lib_shutdown() before dlclose(3).
170
*/
171
HTSLIB_EXPORT
172
void hts_lib_shutdown(void);
173
174
/**
175
 * Wrapper function for free(). Enables memory deallocation across DLL
176
 * boundary. Should be used by all applications, which are compiled
177
 * with a different standard library than htslib and call htslib
178
 * methods that return dynamically allocated data.
179
 */
180
HTSLIB_EXPORT
181
void hts_free(void *ptr);
182
183
/************
184
 * File I/O *
185
 ************/
186
187
// Add new entries only at the end (but before the *_maximum entry)
188
// of these enums, as their numbering is part of the htslib ABI.
189
190
enum htsFormatCategory {
191
    unknown_category,
192
    sequence_data,    // Sequence data -- SAM, BAM, CRAM, etc
193
    variant_data,     // Variant calling data -- VCF, BCF, etc
194
    index_file,       // Index file associated with some data file
195
    region_list,      // Coordinate intervals or regions -- BED, etc
196
    category_maximum = 32767
197
};
198
199
enum htsExactFormat {
200
    unknown_format,
201
    binary_format, text_format,
202
    sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,
203
    htsget,
204
    json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget,
205
    empty_format,  // File is empty (or empty after decompression)
206
    fasta_format, fastq_format, fai_format, fqi_format,
207
    hts_crypt4gh_format,
208
    d4_format,
209
    format_maximum = 32767
210
};
211
212
enum htsCompression {
213
    no_compression, gzip, bgzf, custom, bzip2_compression, razf_compression,
214
    xz_compression, zstd_compression,
215
    compression_maximum = 32767
216
};
217
218
typedef struct htsFormat {
219
    enum htsFormatCategory category;
220
    enum htsExactFormat format;
221
    struct { short major, minor; } version;
222
    enum htsCompression compression;
223
    short compression_level;  // currently unused
224
    void *specific;  // format specific options; see struct hts_opt.
225
} htsFormat;
226
227
struct hts_idx_t;
228
typedef struct hts_idx_t hts_idx_t;
229
struct hts_filter_t;
230
231
/**
232
 * @brief File handle returned by hts_open() etc.
233
 * This structure should be considered opaque by end users. There should be
234
 * no need to access most fields directly in user code, and in cases where
235
 * it is desirable accessor functions such as hts_get_format() are provided.
236
 */
237
// Maintainers note htsFile cannot be an incomplete struct because some of its
238
// fields are part of libhts.so's ABI (hence these fields must not be moved):
239
//  - fp is used in the public sam_itr_next()/etc macros
240
//  - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
241
//  - is_write and is_cram are used directly in samtools <= 1.1
242
//  - fp is used directly in samtools (up to and including current develop)
243
//  - line is used directly in bcftools (up to and including current develop)
244
//  - is_bgzf and is_cram flags indicate which fp union member to use.
245
//    Note is_bgzf being set does not indicate the flag is BGZF compressed,
246
//    nor even whether it is compressed at all (eg on naked BAMs).
247
typedef struct htsFile {
248
    uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27;
249
    int64_t lineno;
250
    kstring_t line;
251
    char *fn, *fn_aux;
252
    union {
253
        BGZF *bgzf;
254
        struct cram_fd *cram;
255
        struct hFILE *hfile;
256
    } fp;
257
    void *state;  // format specific state information
258
    htsFormat format;
259
    hts_idx_t *idx;
260
    const char *fnidx;
261
    struct sam_hdr_t *bam_header;
262
    struct hts_filter_t *filter;
263
} htsFile;
264
265
// A combined thread pool and queue allocation size.
266
// The pool should already be defined, but qsize may be zero to
267
// indicate an appropriate queue size is taken from the pool.
268
//
269
// Reasons for explicitly setting it could be where many more file
270
// descriptors are in use than threads, so keeping memory low is
271
// important.
272
typedef struct htsThreadPool {
273
    struct hts_tpool *pool; // The shared thread pool itself
274
    int qsize;    // Size of I/O queue to use for this fp
275
} htsThreadPool;
276
277
// REQUIRED_FIELDS
278
enum sam_fields {
279
    SAM_QNAME = 0x00000001,
280
    SAM_FLAG  = 0x00000002,
281
    SAM_RNAME = 0x00000004,
282
    SAM_POS   = 0x00000008,
283
    SAM_MAPQ  = 0x00000010,
284
    SAM_CIGAR = 0x00000020,
285
    SAM_RNEXT = 0x00000040,
286
    SAM_PNEXT = 0x00000080,
287
    SAM_TLEN  = 0x00000100,
288
    SAM_SEQ   = 0x00000200,
289
    SAM_QUAL  = 0x00000400,
290
    SAM_AUX   = 0x00000800,
291
    SAM_RGAUX = 0x00001000,
292
};
293
294
// Mostly CRAM only, but this could also include other format options
295
enum hts_fmt_option {
296
    // CRAM specific
297
    CRAM_OPT_DECODE_MD,
298
    CRAM_OPT_PREFIX,
299
    CRAM_OPT_VERBOSITY,  // obsolete, use hts_set_log_level() instead
300
    CRAM_OPT_SEQS_PER_SLICE,
301
    CRAM_OPT_SLICES_PER_CONTAINER,
302
    CRAM_OPT_RANGE,
303
    CRAM_OPT_VERSION,    // rename to cram_version?
304
    CRAM_OPT_EMBED_REF,
305
    CRAM_OPT_IGNORE_MD5,
306
    CRAM_OPT_REFERENCE,  // make general
307
    CRAM_OPT_MULTI_SEQ_PER_SLICE,
308
    CRAM_OPT_NO_REF,
309
    CRAM_OPT_USE_BZIP2,
310
    CRAM_OPT_SHARED_REF,
311
    CRAM_OPT_NTHREADS,   // deprecated, use HTS_OPT_NTHREADS
312
    CRAM_OPT_THREAD_POOL,// make general
313
    CRAM_OPT_USE_LZMA,
314
    CRAM_OPT_USE_RANS,
315
    CRAM_OPT_REQUIRED_FIELDS,
316
    CRAM_OPT_LOSSY_NAMES,
317
    CRAM_OPT_BASES_PER_SLICE,
318
    CRAM_OPT_STORE_MD,
319
    CRAM_OPT_STORE_NM,
320
    CRAM_OPT_RANGE_NOSEEK, // CRAM_OPT_RANGE minus the seek
321
    CRAM_OPT_USE_TOK,
322
    CRAM_OPT_USE_FQZ,
323
    CRAM_OPT_USE_ARITH,
324
    CRAM_OPT_POS_DELTA,  // force delta for AP, even on non-pos sorted data
325
326
    // General purpose
327
    HTS_OPT_COMPRESSION_LEVEL = 100,
328
    HTS_OPT_NTHREADS,
329
    HTS_OPT_THREAD_POOL,
330
    HTS_OPT_CACHE_SIZE,
331
    HTS_OPT_BLOCK_SIZE,
332
    HTS_OPT_FILTER,
333
    HTS_OPT_PROFILE,
334
335
    // Fastq
336
337
    // Boolean.
338
    // Read / Write CASAVA 1.8 format.
339
    // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf
340
    //
341
    // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+
342
    // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag,
343
    // \d+ is a control number, and the sequence at the end is
344
    // for barcode sequence.  Barcodes are read into the aux tag defined
345
    // by FASTQ_OPT_BARCODE ("BC" by default).
346
    FASTQ_OPT_CASAVA = 1000,
347
348
    // String.
349
    // Whether to read / write extra SAM format aux tags from the fastq
350
    // identifier line.  For reading this can simply be "1" to request
351
    // decoding aux tags.  For writing it is a comma separated list of aux
352
    // tag types to be written out.
353
    FASTQ_OPT_AUX,
354
355
    // Boolean.
356
    // Whether to add /1 and /2 to read identifiers when writing FASTQ.
357
    // These come from the BAM_FREAD1 or BAM_FREAD2 flags.
358
    // (Detecting the /1 and /2 is automatic when reading fastq.)
359
    FASTQ_OPT_RNUM,
360
361
    // Two character string.
362
    // Barcode aux tag for CASAVA; defaults to "BC".
363
    FASTQ_OPT_BARCODE,
364
365
    // Process SRA and ENA read names which pointlessly move the original
366
    // name to the second field and insert a constructed <run>.<number>
367
    // name in its place.
368
    FASTQ_OPT_NAME2,
369
370
    // Process the UMI tag.  Tag or Tag,tag,tag...
371
    // On read, this converts the last read-name element (Illumina) to the tag.
372
    // On write, it queries the tags in turn and copies the first found
373
    // to the read name suffix, converting any non-alpha to "+".
374
    FASTQ_OPT_UMI,
375
376
    // Regex to use for matching read name.
377
    // Def: "^[^:]+:[^:]+:[^:]+:[^:]+:[^:]+:[^:]+:[^:]+:([^:#/]+)"
378
    FASTQ_OPT_UMI_REGEX,
379
};
380
381
// Profile options for encoding; primarily used at present in CRAM
382
// but also usable in BAM as a synonym for deflate compression levels.
383
enum hts_profile_option {
384
    HTS_PROFILE_FAST,
385
    HTS_PROFILE_NORMAL,
386
    HTS_PROFILE_SMALL,
387
    HTS_PROFILE_ARCHIVE,
388
};
389
390
// For backwards compatibility
391
#define cram_option hts_fmt_option
392
393
typedef struct hts_opt {
394
    char *arg;                // string form, strdup()ed
395
    enum hts_fmt_option opt;  // tokenised key
396
    union {                   // ... and value
397
        int i;
398
        char *s;
399
    } val;
400
    struct hts_opt *next;
401
} hts_opt;
402
403
#define HTS_FILE_OPTS_INIT {{0},0}
404
405
/*
406
 * Explicit index file name delimiter, see below
407
 */
408
71.4k
#define HTS_IDX_DELIM "##idx##"
409
410
411
/**********************
412
 * Exported functions *
413
 **********************/
414
415
/*
416
 * Parses arg and appends it to the option list.
417
 *
418
 * Returns 0 on success;
419
 *        -1 on failure.
420
 */
421
HTSLIB_EXPORT
422
int hts_opt_add(hts_opt **opts, const char *c_arg);
423
424
/*
425
 * Applies an hts_opt option list to a given htsFile.
426
 *
427
 * Returns 0 on success
428
 *        -1 on failure
429
 */
430
HTSLIB_EXPORT
431
int hts_opt_apply(htsFile *fp, hts_opt *opts);
432
433
/*
434
 * Frees an hts_opt list.
435
 */
436
HTSLIB_EXPORT
437
void hts_opt_free(hts_opt *opts);
438
439
/*
440
 * Accepts a string file format (sam, bam, cram, vcf, bam) optionally
441
 * followed by a comma separated list of key=value options and splits
442
 * these up into the fields of htsFormat struct.
443
 *
444
 * Returns 0 on success
445
 *        -1 on failure.
446
 */
447
HTSLIB_EXPORT
448
int hts_parse_format(htsFormat *opt, const char *str);
449
450
/*
451
 * Tokenise options as (key(=value)?,)*(key(=value)?)?
452
 * NB: No provision for ',' appearing in the value!
453
 * Add backslashing rules?
454
 *
455
 * This could be used as part of a general command line option parser or
456
 * as a string concatenated onto the file open mode.
457
 *
458
 * Returns 0 on success
459
 *        -1 on failure.
460
 */
461
HTSLIB_EXPORT
462
int hts_parse_opt_list(htsFormat *opt, const char *str);
463
464
/*! @abstract Table for converting a nucleotide character to 4-bit encoding.
465
The input character may be either an IUPAC ambiguity code, '=' for 0, or
466
'0'/'1'/'2'/'3' for a result of 1/2/4/8.  The result is encoded as 1/2/4/8
467
for A/C/G/T or combinations of these bits for ambiguous bases.
468
Additionally RNA U is treated as a T (8).
469
*/
470
HTSLIB_EXPORT
471
extern const unsigned char seq_nt16_table[256];
472
473
/*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC
474
ambiguity code letter (or '=' when given 0).
475
*/
476
HTSLIB_EXPORT
477
extern const char seq_nt16_str[];
478
479
/*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits.
480
Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous).
481
*/
482
HTSLIB_EXPORT
483
extern const int seq_nt16_int[];
484
485
/*!
486
  @abstract  Get the htslib version number
487
  @return    For released versions, a string like "N.N[.N]"; or git describe
488
  output if using a library built within a Git repository.
489
*/
490
HTSLIB_EXPORT
491
const char *hts_version(void);
492
493
/*!
494
  @abstract  Compile-time HTSlib version number, for use in #if checks
495
  @return    For released versions X.Y[.Z], an integer of the form XYYYZZ;
496
  useful for preprocessor conditionals such as
497
      #if HTS_VERSION >= 101000  // Check for v1.10 or later
498
*/
499
// Maintainers: Bump this in the final stage of preparing a new release.
500
// Immediately after release, bump ZZ to 90 to distinguish in-development
501
// Git repository builds from the release; you may wish to increment this
502
// further when significant features are merged.
503
#define HTS_VERSION 102290
504
505
/*! @abstract Introspection on the features enabled in htslib
506
 *
507
 * @return a bitfield of HTS_FEATURE_* macros.
508
 */
509
HTSLIB_EXPORT
510
unsigned int hts_features(void);
511
512
HTSLIB_EXPORT
513
const char *hts_test_feature(unsigned int id);
514
515
/*! @abstract Introspection on the features enabled in htslib, string form
516
 *
517
 * @return a string describing htslib build features
518
 */
519
HTSLIB_EXPORT
520
const char *hts_feature_string(void);
521
522
// Whether ./configure was used or vanilla Makefile
523
0
#define HTS_FEATURE_CONFIGURE    1
524
525
// Whether --enable-plugins was used
526
0
#define HTS_FEATURE_PLUGINS      2
527
528
// Transport specific
529
0
#define HTS_FEATURE_LIBCURL      (1u<<10)
530
0
#define HTS_FEATURE_S3           (1u<<11)
531
0
#define HTS_FEATURE_GCS          (1u<<12)
532
533
// Compression options
534
0
#define HTS_FEATURE_LIBDEFLATE   (1u<<20)
535
0
#define HTS_FEATURE_LZMA         (1u<<21)
536
0
#define HTS_FEATURE_BZIP2        (1u<<22)
537
0
#define HTS_FEATURE_HTSCODECS    (1u<<23) // htscodecs library version
538
539
// Build params
540
0
#define HTS_FEATURE_CC           (1u<<27)
541
0
#define HTS_FEATURE_CFLAGS       (1u<<28)
542
0
#define HTS_FEATURE_CPPFLAGS     (1u<<29)
543
0
#define HTS_FEATURE_LDFLAGS      (1u<<30)
544
545
546
/*!
547
  @abstract    Determine format by peeking at the start of a file
548
  @param fp    File opened for reading, positioned at the beginning
549
  @param fmt   Format structure that will be filled out on return
550
  @return      0 for success, or negative if an error occurred.
551
552
  Equivalent to hts_detect_format2(fp, NULL, fmt).
553
*/
554
HTSLIB_EXPORT
555
int hts_detect_format(struct hFILE *fp, htsFormat *fmt);
556
557
/*!
558
  @abstract    Determine format primarily by peeking at the start of a file
559
  @param fp    File opened for reading, positioned at the beginning
560
  @param fname Name of the file, or NULL if not available
561
  @param fmt   Format structure that will be filled out on return
562
  @return      0 for success, or negative if an error occurred.
563
  @since       1.15
564
565
Some formats are only recognised if the filename is available and has the
566
expected extension, as otherwise more generic files may be misrecognised.
567
In particular:
568
 - FASTA/Q indexes must have .fai/.fqi extensions; without this requirement,
569
   some similar BED files would be misrecognised as indexes.
570
*/
571
HTSLIB_EXPORT
572
int hts_detect_format2(struct hFILE *fp, const char *fname, htsFormat *fmt);
573
574
/*!
575
  @abstract    Get a human-readable description of the file format
576
  @param fmt   Format structure holding type, version, compression, etc.
577
  @return      Description string, to be freed by the caller after use.
578
*/
579
HTSLIB_EXPORT
580
char *hts_format_description(const htsFormat *format);
581
582
/*!
583
  @abstract       Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF)
584
                  or possibly-compressed textual line-orientated file
585
  @param fn       The file name or "-" for stdin/stdout. For indexed files
586
                  with a non-standard naming, the file name can include the
587
                  name of the index file delimited with HTS_IDX_DELIM
588
  @param mode     Mode matching / [rwa][bcefFguxz0-9]* /
589
  @discussion
590
      With 'r' opens for reading; any further format mode letters are ignored
591
      as the format is detected by checking the first few bytes or BGZF blocks
592
      of the file.  With 'w' or 'a' opens for writing or appending, with format
593
      specifier letters:
594
        b  binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc)
595
        c  CRAM format
596
        f  FASTQ format
597
        F  FASTA format
598
        g  gzip compressed
599
        u  uncompressed
600
        z  bgzf compressed
601
        [0-9]  zlib compression level
602
      and with non-format option letters (for any of 'r'/'w'/'a'):
603
        e  close the file on exec(2) (opens with O_CLOEXEC, where supported)
604
        x  create the file exclusively (opens with O_EXCL, where supported)
605
      Note that there is a distinction between 'u' and '0': the first yields
606
      plain uncompressed output whereas the latter outputs uncompressed data
607
      wrapped in the zlib format.
608
  @example
609
      [rw]b  .. compressed BCF, BAM, FAI
610
      [rw]bu .. uncompressed BCF
611
      [rw]z  .. compressed VCF
612
      [rw]   .. uncompressed VCF
613
*/
614
HTSLIB_EXPORT
615
htsFile *hts_open(const char *fn, const char *mode);
616
617
/*!
618
  @abstract       Open a SAM/BAM/CRAM/VCF/BCF/etc file
619
  @param fn       The file name or "-" for stdin/stdout
620
  @param mode     Open mode, as per hts_open()
621
  @param fmt      Optional format specific parameters
622
  @discussion
623
      See hts_open() for description of fn and mode.
624
      // TODO Update documentation for s/opts/fmt/
625
      Opts contains a format string (sam, bam, cram, vcf, bcf) which will,
626
      if defined, override mode.  Opts also contains a linked list of hts_opt
627
      structures to apply to the open file handle.  These can contain things
628
      like pointers to the reference or information on compression levels,
629
      block sizes, etc.
630
*/
631
HTSLIB_EXPORT
632
htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt);
633
634
/*!
635
  @abstract       Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file
636
  @param fn       The already-open file handle
637
  @param mode     Open mode, as per hts_open()
638
*/
639
HTSLIB_EXPORT
640
htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode);
641
642
/*!
643
  @abstract  For output streams, flush any buffered data
644
  @param fp  The file handle to be flushed
645
  @return    0 for success, or negative if an error occurred.
646
  @since     1.14
647
*/
648
HTSLIB_EXPORT
649
int hts_flush(htsFile *fp);
650
651
/*!
652
  @abstract  Close a file handle, flushing buffered data for output streams
653
  @param fp  The file handle to be closed
654
  @return    0 for success, or negative if an error occurred.
655
*/
656
HTSLIB_EXPORT
657
int hts_close(htsFile *fp);
658
659
/*!
660
  @abstract  Returns the file's format information
661
  @param fp  The file handle
662
  @return    Read-only pointer to the file's htsFormat.
663
*/
664
HTSLIB_EXPORT
665
const htsFormat *hts_get_format(htsFile *fp);
666
667
/*!
668
  @ abstract      Returns a string containing the file format extension.
669
  @ param format  Format structure containing the file type.
670
  @ return        A string ("sam", "bam", etc) or "?" for unknown formats.
671
 */
672
HTSLIB_EXPORT
673
const char *hts_format_file_extension(const htsFormat *format);
674
675
/*!
676
  @abstract  Sets a specified CRAM option on the open file handle.
677
  @param fp  The file handle open the open file.
678
  @param opt The CRAM_OPT_* option.
679
  @param ... Optional arguments, dependent on the option used.
680
  @return    0 for success, or negative if an error occurred.
681
*/
682
HTSLIB_EXPORT
683
int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...);
684
685
/*!
686
  @abstract         Read a line (and its \n or \r\n terminator) from a file
687
  @param fp         The file handle
688
  @param delimiter  Unused, but must be '\n' (or KS_SEP_LINE)
689
  @param str        The line (not including the terminator) is written here
690
  @return           Length of the string read (capped at INT_MAX);
691
                    -1 on end-of-file; <= -2 on error
692
*/
693
HTSLIB_EXPORT
694
int hts_getline(htsFile *fp, int delimiter, kstring_t *str);
695
696
HTSLIB_EXPORT
697
char **hts_readlines(const char *fn, int *_n);
698
/*!
699
    @abstract       Parse comma-separated list or read list from a file
700
    @param list     File name or comma-separated list
701
    @param is_file
702
    @param _n       Size of the output array (number of items read)
703
    @return         NULL on failure or pointer to newly allocated array of
704
                    strings
705
*/
706
HTSLIB_EXPORT
707
char **hts_readlist(const char *fn, int is_file, int *_n);
708
709
/*!
710
  @abstract  Create extra threads to aid compress/decompression for this file
711
  @param fp  The file handle
712
  @param n   The number of worker threads to create
713
  @return    0 for success, or negative if an error occurred.
714
  @notes     This function creates non-shared threads for use solely by fp.
715
             The hts_set_thread_pool function is the recommended alternative.
716
*/
717
HTSLIB_EXPORT
718
int hts_set_threads(htsFile *fp, int n);
719
720
/*!
721
  @abstract  Create extra threads to aid compress/decompression for this file
722
  @param fp  The file handle
723
  @param p   A pool of worker threads, previously allocated by hts_create_threads().
724
  @return    0 for success, or negative if an error occurred.
725
*/
726
HTSLIB_EXPORT
727
int hts_set_thread_pool(htsFile *fp, htsThreadPool *p);
728
729
/*!
730
  @abstract  Adds a cache of decompressed blocks, potentially speeding up seeks.
731
             This may not work for all file types (currently it is bgzf only).
732
  @param fp  The file handle
733
  @param n   The size of cache, in bytes
734
*/
735
HTSLIB_EXPORT
736
void hts_set_cache_size(htsFile *fp, int n);
737
738
/*!
739
  @abstract  Set .fai filename for a file opened for reading
740
  @return    0 for success, negative on failure
741
  @discussion
742
      Called before *_hdr_read(), this provides the name of a .fai file
743
      used to provide a reference list if the htsFile contains no @SQ headers.
744
*/
745
HTSLIB_EXPORT
746
int hts_set_fai_filename(htsFile *fp, const char *fn_aux);
747
748
749
/*!
750
  @abstract  Sets a filter expression
751
  @return    0 for success, negative on failure
752
  @discussion
753
      To clear an existing filter, specifying expr as NULL.
754
*/
755
HTSLIB_EXPORT
756
int hts_set_filter_expression(htsFile *fp, const char *expr);
757
758
/*!
759
  @abstract  Determine whether a given htsFile contains a valid EOF block
760
  @return    3 for a non-EOF checkable filetype;
761
             2 for an unseekable file type where EOF cannot be checked;
762
             1 for a valid EOF block;
763
             0 for if the EOF marker is absent when it should be present;
764
            -1 (with errno set) on failure
765
  @discussion
766
      Check if the BGZF end-of-file (EOF) marker is present
767
*/
768
HTSLIB_EXPORT
769
int hts_check_EOF(htsFile *fp);
770
771
/************
772
 * Indexing *
773
 ************/
774
775
/*!
776
These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc,
777
producing iterators operating as follows:
778
 - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
779
 - HTS_IDX_START  iterates over the entire file
780
 - HTS_IDX_REST   iterates from the current position to the end of the file
781
 - HTS_IDX_NONE   always returns "no more alignment records"
782
When one of these special tid values is used, beg and end are ignored.
783
When REST or NONE is used, idx is also ignored and may be NULL.
784
*/
785
0
#define HTS_IDX_NOCOOR (-2)
786
0
#define HTS_IDX_START  (-3)
787
0
#define HTS_IDX_REST   (-4)
788
0
#define HTS_IDX_NONE   (-5)
789
790
0
#define HTS_FMT_CSI 0
791
5.15k
#define HTS_FMT_BAI 1
792
10.3k
#define HTS_FMT_TBI 2
793
0
#define HTS_FMT_CRAI 3
794
0
#define HTS_FMT_FAI 4
795
796
// Almost INT64_MAX, but when cast into a 32-bit int it's
797
// also INT_MAX instead of -1.  This avoids bugs with old code
798
// using the new hts_pos_t data type.
799
18.9M
#define HTS_POS_MAX ((((int64_t)INT_MAX)<<32)|INT_MAX)
800
#define HTS_POS_MIN INT64_MIN
801
1.67k
#define PRIhts_pos PRId64
802
typedef int64_t hts_pos_t;
803
804
// For comparison with previous release:
805
//
806
// #define HTS_POS_MAX INT_MAX
807
// #define HTS_POS_MIN INT_MIN
808
// #define PRIhts_pos PRId32
809
// typedef int32_t hts_pos_t;
810
811
typedef struct hts_pair_pos_t {
812
   hts_pos_t beg, end;
813
} hts_pair_pos_t;
814
815
typedef hts_pair_pos_t hts_pair32_t;  // For backwards compatibility
816
817
typedef struct hts_pair64_t {
818
    uint64_t u, v;
819
} hts_pair64_t;
820
821
typedef struct hts_pair64_max_t {
822
    uint64_t u, v;
823
    uint64_t max;
824
} hts_pair64_max_t;
825
826
typedef struct hts_reglist_t {
827
    const char *reg;
828
    hts_pair_pos_t *intervals;
829
    int tid;
830
    uint32_t count;
831
    hts_pos_t min_beg, max_end;
832
} hts_reglist_t;
833
834
typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end);
835
typedef int hts_seek_func(void *fp, int64_t offset, int where);
836
typedef int64_t hts_tell_func(void *fp);
837
838
/**
839
 * @brief File iterator that can handle multiple target regions.
840
 * This structure should be considered opaque by end users.
841
 * It does both the stepping inside the file and the filtering of alignments.
842
 * It can operate in single or multi-region mode, and depending on this,
843
 * it uses different fields.
844
 *
845
 * read_rest (1) - read everything from the current offset, without filtering
846
 * finished  (1) - no more iterations
847
 * is_cram   (1) - current file has CRAM format
848
 * nocoor    (1) - read all unmapped reads
849
 *
850
 * multi     (1) - multi-region moode
851
 * reg_list  - List of target regions
852
 * n_reg     - Size of the above list
853
 * curr_reg  - List index of the current region of search
854
 * curr_intv - Interval index inside the current region; points to a (beg, end)
855
 * end       - Used for CRAM files, to preserve the max end coordinate
856
 *
857
 * multi     (0) - single-region mode
858
 * tid       - Reference id of the target region
859
 * beg       - Start position of the target region
860
 * end       - End position of the target region
861
 *
862
 * Common fields:
863
 * off        - List of file offsets computed from the index
864
 * n_off      - Size of the above list
865
 * i          - List index of the current file offset
866
 * curr_off   - File offset for the next file read
867
 * curr_tid   - Reference id of the current alignment
868
 * curr_beg   - Start position of the current alignment
869
 * curr_end   - End position of the current alignment
870
 * nocoor_off - File offset where the unmapped reads start
871
 *
872
 * readrec    - File specific function that reads an alignment
873
 * seek       - File specific function for changing the file offset
874
 * tell       - File specific function for indicating the file offset
875
 */
876
877
typedef struct hts_itr_t {
878
    uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, multi:1, dummy:27;
879
    int tid, n_off, i, n_reg;
880
    hts_pos_t beg, end;
881
    hts_reglist_t *reg_list;
882
    int curr_tid, curr_reg, curr_intv;
883
    hts_pos_t curr_beg, curr_end;
884
    uint64_t curr_off, nocoor_off;
885
    hts_pair64_max_t *off;
886
    hts_readrec_func *readrec;
887
    hts_seek_func *seek;
888
    hts_tell_func *tell;
889
    struct {
890
        int n, m;
891
        int *a;
892
    } bins;
893
} hts_itr_t;
894
895
typedef hts_itr_t hts_itr_multi_t;
896
897
/// Compute the first bin on a given level
898
0
#define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)
899
/// Compute the parent bin of a given bin
900
0
#define hts_bin_parent(b) (((b) - 1) >> 3)
901
902
///////////////////////////////////////////////////////////
903
// Low-level API for building indexes.
904
905
/// Create a BAI/CSI/TBI type index structure
906
/** @param n          Initial number of targets
907
    @param fmt        Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
908
    @param offset0    Initial file offset
909
    @param min_shift  Number of bits for the minimal interval
910
    @param n_lvls     Number of levels in the binning index
911
    @return An initialised hts_idx_t struct on success; NULL on failure
912
913
The struct returned by a successful call should be freed via hts_idx_destroy()
914
when it is no longer needed.
915
*/
916
HTSLIB_EXPORT
917
hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
918
919
/// Free a BAI/CSI/TBI type index
920
/** @param idx   Index structure to free
921
 */
922
HTSLIB_EXPORT
923
void hts_idx_destroy(hts_idx_t *idx);
924
925
/// Push an index entry
926
/** @param idx        Index
927
    @param tid        Target id
928
    @param beg        Range start (zero-based)
929
    @param end        Range end (zero-based, half-open)
930
    @param offset     File offset
931
    @param is_mapped  Range corresponds to a mapped read
932
    @return 0 on success; -1 on failure
933
934
The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts
935
stored in the meta-data bin.
936
 */
937
HTSLIB_EXPORT
938
int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped);
939
940
/// Finish building an index
941
/** @param idx          Index
942
    @param final_offset Last file offset
943
    @return 0 on success; non-zero on failure.
944
*/
945
HTSLIB_EXPORT
946
int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
947
948
/// Returns index format
949
/** @param idx   Index
950
    @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
951
*/
952
HTSLIB_EXPORT
953
int hts_idx_fmt(hts_idx_t *idx);
954
955
/// Add name to TBI index meta-data
956
/** @param idx   Index
957
    @param tid   Target identifier
958
    @param name  Target name
959
    @return Index number of name in names list on success; -1 on failure.
960
*/
961
HTSLIB_EXPORT
962
int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name);
963
964
// Index loading and saving
965
966
/// Save an index to a file
967
/** @param idx  Index to be written
968
    @param fn   Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added
969
    @param fmt  One of the HTS_FMT_* index formats
970
    @return  0 if successful, or negative if an error occurred.
971
*/
972
HTSLIB_EXPORT
973
int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) HTS_RESULT_USED;
974
975
/// Save an index to a specific file
976
/** @param idx    Index to be written
977
    @param fn     Input BAM/BCF/etc filename
978
    @param fnidx  Output filename, or NULL to add .bai/.csi/etc to @a fn
979
    @param fmt    One of the HTS_FMT_* index formats
980
    @return  0 if successful, or negative if an error occurred.
981
*/
982
HTSLIB_EXPORT
983
int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) HTS_RESULT_USED;
984
985
/// Load an index file
986
/** @param fn   BAM/BCF/etc filename, to which .bai/.csi/etc will be added or
987
                the extension substituted, to search for an existing index file.
988
                In case of a non-standard naming, the file name can include the
989
                name of the index file delimited with HTS_IDX_DELIM.
990
    @param fmt  One of the HTS_FMT_* index formats
991
    @return  The index, or NULL if an error occurred.
992
993
If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before
994
the delimiter will be used as the name of the data file and the part after
995
it will be used as the name of the index.
996
997
Otherwise, this function tries to work out the index name as follows:
998
999
  It will try appending ".csi" to @p fn
1000
  It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi"
1001
  Then, if @p fmt is HTS_FMT_BAI:
1002
    It will try appending ".bai" to @p fn
1003
    To will substituting the existing suffix (e.g. .bam) with ".bai"
1004
  else if @p fmt is HTS_FMT_TBI:
1005
    It will try appending ".tbi" to @p fn
1006
    To will substituting the existing suffix (e.g. .vcf) with ".tbi"
1007
1008
If the index file is remote (served over a protocol like https), first a check
1009
is made to see is a locally cached copy is available.  This is done for all
1010
of the possible names listed above.  If a cached copy is not available then
1011
the index will be downloaded and stored in the current working directory,
1012
with the same name as the remote index.
1013
1014
    Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE);
1015
*/
1016
HTSLIB_EXPORT
1017
hts_idx_t *hts_idx_load(const char *fn, int fmt);
1018
1019
/// Load a specific index file
1020
/** @param fn     Input BAM/BCF/etc filename
1021
    @param fnidx  The input index filename
1022
    @return  The index, or NULL if an error occurred.
1023
1024
    Equivalent to hts_idx_load3(fn, fnidx, 0, 0);
1025
1026
    This function will not attempt to save index files locally.
1027
*/
1028
HTSLIB_EXPORT
1029
hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx);
1030
1031
/// Load a specific index file
1032
/** @param fn     Input BAM/BCF/etc filename
1033
    @param fnidx  The input index filename
1034
    @param fmt    One of the HTS_FMT_* index formats
1035
    @param flags  Flags to alter behaviour (see description)
1036
    @return  The index, or NULL if an error occurred.
1037
1038
    If @p fnidx is NULL, the index name will be derived from @p fn in the
1039
    same way as hts_idx_load().
1040
1041
    If @p fnidx is not NULL, @p fmt is ignored.
1042
1043
    The @p flags parameter can be set to a combination of the following
1044
    values:
1045
1046
        HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1047
        HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1048
1049
    The index struct returned by a successful call should be freed
1050
    via hts_idx_destroy() when it is no longer needed.
1051
*/
1052
HTSLIB_EXPORT
1053
hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags);
1054
1055
/// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() )
1056
0
#define HTS_IDX_SAVE_REMOTE 1
1057
10.3k
#define HTS_IDX_SILENT_FAIL 2
1058
1059
///////////////////////////////////////////////////////////
1060
// Functions for accessing meta-data stored in indexes
1061
1062
typedef const char *(*hts_id2name_f)(void*, int);
1063
1064
/// Get extra index meta-data
1065
/** @param idx    The index
1066
    @param l_meta Pointer to where the length of the extra data is stored
1067
    @return Pointer to the extra data if present; NULL otherwise
1068
1069
    Indexes (both .tbi and .csi) made by tabix include extra data about
1070
    the indexed file.  The returns a pointer to this data.  Note that the
1071
    data is stored exactly as it is in the index.  Callers need to interpret
1072
    the results themselves, including knowing what sort of data to expect;
1073
    byte swapping etc.
1074
*/
1075
HTSLIB_EXPORT
1076
uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta);
1077
1078
/// Set extra index meta-data
1079
/** @param idx     The index
1080
    @param l_meta  Length of data
1081
    @param meta    Pointer to the extra data
1082
    @param is_copy If not zero, a copy of the data is taken
1083
    @return 0 on success; -1 on failure (out of memory).
1084
1085
    Sets the data that is returned by hts_idx_get_meta().
1086
1087
    If is_copy != 0, a copy of the input data is taken.  If not, ownership of
1088
    the data pointed to by *meta passes to the index.
1089
*/
1090
HTSLIB_EXPORT
1091
int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy);
1092
1093
/// Get number of mapped and unmapped reads from an index
1094
/** @param      idx      Index
1095
    @param      tid      Target ID
1096
    @param[out] mapped   Location to store number of mapped reads
1097
    @param[out] unmapped Location to store number of unmapped reads
1098
    @return 0 on success; -1 on failure (data not available)
1099
1100
    BAI and CSI indexes store information on the number of reads for each
1101
    target that were mapped or unmapped (unmapped reads will generally have
1102
    a paired read that is mapped to the target).  This function returns this
1103
    information if it is available.
1104
1105
    @note Cram CRAI indexes do not include this information.
1106
*/
1107
HTSLIB_EXPORT
1108
int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
1109
1110
/// Return the number of unplaced reads from an index
1111
/** @param idx    Index
1112
    @return Unplaced reads count
1113
1114
    Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM
1115
    files).
1116
*/
1117
HTSLIB_EXPORT
1118
uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx);
1119
1120
/// Return a list of target names from an index
1121
/** @param      idx    Index
1122
    @param[out] n      Location to store the number of targets
1123
    @param      getid  Callback function to get the name for a target ID
1124
    @param      hdr    Header from indexed file
1125
    @return An array of pointers to the names on success; NULL on failure
1126
1127
    @note The names are pointers into the header data structure.  When cleaning
1128
    up, only the array should be freed, not the names.
1129
 */
1130
HTSLIB_EXPORT
1131
const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values
1132
1133
/// Return the number of targets from an index
1134
/** @param      idx    Index
1135
    @return The number of targets
1136
 */
1137
HTSLIB_EXPORT
1138
int hts_idx_nseq(const hts_idx_t *idx);
1139
1140
///////////////////////////////////////////////////////////
1141
// Region parsing
1142
1143
0
#define HTS_PARSE_THOUSANDS_SEP 1  ///< Ignore ',' separators within numbers
1144
0
#define HTS_PARSE_ONE_COORD     2  ///< chr:pos means chr:pos-pos and not chr:pos-end
1145
0
#define HTS_PARSE_LIST          4  ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP)
1146
1147
/// Parse a numeric string
1148
/** The number may be expressed in scientific notation, and optionally may
1149
    contain commas in the integer part (before any decimal point or E notation).
1150
    @param str     String to be parsed
1151
    @param strend  If non-NULL, set on return to point to the first character
1152
                   in @a str after those forming the parsed number
1153
    @param flags   Or'ed-together combination of HTS_PARSE_* flags
1154
    @return  Integer value of the parsed number, or 0 if no valid number
1155
1156
    The input string is parsed as: optional whitespace; an optional '+' or
1157
    '-' sign; decimal digits possibly including ',' characters (if @a flags
1158
    includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional
1159
    case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific
1160
    notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and
1161
    decimal digits. To be considered a valid numeric value, the main part (not
1162
    including any suffix or scientific notation) must contain at least one
1163
    digit (either before or after the decimal point).
1164
1165
    When @a strend is NULL, @a str is expected to contain only (optional
1166
    whitespace followed by) the numeric value. A warning will be printed
1167
    (if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number
1168
    is found or if there are any unused characters after the number.
1169
1170
    When @a strend is non-NULL, @a str starts with (optional whitespace
1171
    followed by) the numeric value. On return, @a strend is set to point
1172
    to the first unused character after the numeric value, or to @a str
1173
    if no valid parsable number is found.
1174
*/
1175
HTSLIB_EXPORT
1176
long long hts_parse_decimal(const char *str, char **strend, int flags);
1177
1178
typedef int (*hts_name2id_f)(void*, const char*);
1179
1180
/// Parse a "CHR:START-END"-style region string
1181
/** @param str  String to be parsed
1182
    @param beg  Set on return to the 0-based start of the region
1183
    @param end  Set on return to the 1-based end of the region
1184
    @return  Pointer to the colon or '\0' after the reference sequence name,
1185
             or NULL if @a str could not be parsed.
1186
1187
    NOTE: For compatibility with hts_parse_reg only.
1188
    Please use hts_parse_region instead.
1189
*/
1190
HTSLIB_EXPORT
1191
const char *hts_parse_reg64(const char *str, hts_pos_t *beg, hts_pos_t *end);
1192
1193
/// Parse a "CHR:START-END"-style region string
1194
/** @param str  String to be parsed
1195
    @param beg  Set on return to the 0-based start of the region
1196
    @param end  Set on return to the 1-based end of the region
1197
    @return  Pointer to the colon or '\0' after the reference sequence name,
1198
             or NULL if @a str could not be parsed.
1199
*/
1200
HTSLIB_EXPORT
1201
const char *hts_parse_reg(const char *str, int *beg, int *end);
1202
1203
/// Parse a "CHR:START-END"-style region string
1204
/** @param str   String to be parsed
1205
    @param tid   Set on return (if not NULL) to be reference index (-1 if invalid)
1206
    @param beg   Set on return to the 0-based start of the region
1207
    @param end   Set on return to the 1-based end of the region
1208
    @param getid Function pointer.  Called if not NULL to set tid.
1209
    @param hdr   Caller data passed to getid.
1210
    @param flags Bitwise HTS_PARSE_* flags listed above.
1211
    @return      Pointer to the byte after the end of the entire region
1212
                 specifier (including any trailing comma) on success,
1213
                 or NULL if @a str could not be parsed.
1214
1215
    A variant of hts_parse_reg which is reference-id aware.  It uses
1216
    the iterator name2id callbacks to validate the region tokenisation works.
1217
1218
    This is necessary due to GRCh38 HLA additions which have reference names
1219
    like "HLA-DRB1*12:17".
1220
1221
    To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200"
1222
    are reference names, quote using curly braces.
1223
    Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example.
1224
1225
    Flags are used to control how parsing works, and can be one of the below.
1226
1227
    HTS_PARSE_THOUSANDS_SEP:
1228
        Ignore commas in numbers.  For example with this flag 1,234,567
1229
        is interpreted as 1234567.
1230
1231
    HTS_PARSE_LIST:
1232
        If present, the region is assmed to be a comma separated list and
1233
        position parsing will not contain commas (this implicitly
1234
        clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal).
1235
        On success the return pointer will be the start of the next region, ie
1236
        the character after the comma.  (If *ret != '\0' then the caller can
1237
        assume another region is present in the list.)
1238
1239
        If not set then positions may contain commas.  In this case the return
1240
        value should point to the end of the string, or NULL on failure.
1241
1242
    HTS_PARSE_ONE_COORD:
1243
        If present, X:100 is treated as the single base pair region X:100-100.
1244
        In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-<end>.
1245
        (This is the standard bcftools region convention.)
1246
1247
        When not set X:100 is considered to be X:100-<end> where <end> is
1248
        the end of chromosome X (set to INT_MAX here).  X:100- and X:-100 are
1249
        invalid.
1250
        (This is the standard samtools region convention.)
1251
1252
    Note the supplied string expects 1 based inclusive coordinates, but the
1253
    returned coordinates start from 0 and are half open, so pos0 is valid
1254
    for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}"
1255
1256
    If NULL is returned, the value in tid mat give additional information
1257
    about the error:
1258
1259
        -2   Failed to parse @p hdr; or out of memory
1260
        -1   The reference in @p str has mismatched braces, or does not
1261
             exist in @p hdr
1262
        >= 0 The specified range in @p str could not be parsed
1263
*/
1264
HTSLIB_EXPORT
1265
const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg,
1266
                             hts_pos_t *end, hts_name2id_f getid, void *hdr,
1267
                             int flags);
1268
1269
1270
///////////////////////////////////////////////////////////
1271
// Generic iterators
1272
//
1273
// These functions provide the low-level infrastructure for iterators.
1274
// Wrappers around these are used to make iterators for specific file types.
1275
// See:
1276
//     htslib/sam.h  for SAM/BAM/CRAM iterators
1277
//     htslib/vcf.h  for VCF/BCF iterators
1278
//     htslib/tbx.h  for files indexed by tabix
1279
1280
/// Create a single-region iterator
1281
/** @param idx      Index
1282
    @param tid      Target ID
1283
    @param beg      Start of region
1284
    @param end      End of region
1285
    @param readrec  Callback to read a record from the input file
1286
    @return An iterator on success; NULL on failure
1287
1288
    The iterator struct returned by a successful call should be freed
1289
    via hts_itr_destroy() when it is no longer needed.
1290
 */
1291
HTSLIB_EXPORT
1292
hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec);
1293
1294
/// Free an iterator
1295
/** @param iter   Iterator to free
1296
 */
1297
HTSLIB_EXPORT
1298
void hts_itr_destroy(hts_itr_t *iter);
1299
1300
typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec);
1301
1302
/// Create a single-region iterator from a text region specification
1303
/** @param idx       Index
1304
    @param reg       Region specifier
1305
    @param getid     Callback function to return the target ID for a name
1306
    @param hdr       Input file header
1307
    @param itr_query Callback function returning an iterator for a numeric tid,
1308
                     start and end position
1309
    @param readrec   Callback to read a record from the input file
1310
    @return An iterator on success; NULL on error
1311
1312
    The iterator struct returned by a successful call should be freed
1313
    via hts_itr_destroy() when it is no longer needed.
1314
 */
1315
HTSLIB_EXPORT
1316
hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec);
1317
1318
/// Return the next record from an iterator
1319
/** @param fp      Input file handle
1320
    @param iter    Iterator
1321
    @param r       Pointer to record placeholder
1322
    @param data    Data passed to the readrec callback
1323
    @return >= 0 on success, -1 when there is no more data, < -1 on error
1324
 */
1325
HTSLIB_EXPORT
1326
int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) HTS_RESULT_USED;
1327
1328
/**********************************
1329
 * Iterator with multiple regions *
1330
 **********************************/
1331
1332
typedef int hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_t *itr);
1333
HTSLIB_EXPORT
1334
int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter);
1335
HTSLIB_EXPORT
1336
int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter);
1337
1338
/// Create a multi-region iterator from a region list
1339
/** @param idx          Index
1340
    @param reglist      Region list
1341
    @param count        Number of items in region list
1342
    @param getid        Callback to convert names to target IDs
1343
    @param hdr          Indexed file header (passed to getid)
1344
    @param itr_specific Filetype-specific callback function
1345
    @param readrec      Callback to read an input file record
1346
    @param seek         Callback to seek in the input file
1347
    @param tell         Callback to return current input file location
1348
    @return An iterator on success; NULL on failure
1349
1350
    The iterator struct returned by a successful call should be freed
1351
    via hts_itr_destroy() when it is no longer needed.
1352
 */
1353
HTSLIB_EXPORT
1354
hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell);
1355
1356
/// Return the next record from an iterator
1357
/** @param fp      Input file handle
1358
    @param iter    Iterator
1359
    @param r       Pointer to record placeholder
1360
    @return >= 0 on success, -1 when there is no more data, < -1 on error
1361
 */
1362
HTSLIB_EXPORT
1363
int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r);
1364
1365
/// Create a region list from a char array
1366
/** @param argv      Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2
1367
    @param argc      Number of items in the array
1368
    @param r_count   Pointer to the number of items in the resulting region list
1369
    @param hdr       Header for the sam/bam/cram file
1370
    @param getid     Callback to convert target names to target ids.
1371
    @return  A region list on success, NULL on failure
1372
1373
    The hts_reglist_t struct returned by a successful call should be freed
1374
    via hts_reglist_free() when it is no longer needed.
1375
 */
1376
HTSLIB_EXPORT
1377
hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr,  hts_name2id_f getid);
1378
1379
/// Free a region list
1380
/** @param reglist    Region list
1381
    @param count      Number of items in the list
1382
 */
1383
HTSLIB_EXPORT
1384
void hts_reglist_free(hts_reglist_t *reglist, int count);
1385
1386
/// Free a multi-region iterator
1387
/** @param iter   Iterator to free
1388
 */
1389
#define hts_itr_multi_destroy(iter) hts_itr_destroy(iter)
1390
1391
1392
    /**
1393
     * hts_file_type() - Convenience function to determine file type
1394
     * DEPRECATED:  This function has been replaced by hts_detect_format().
1395
     * It and these FT_* macros will be removed in a future HTSlib release.
1396
     */
1397
    #define FT_UNKN   0
1398
0
    #define FT_GZ     1
1399
0
    #define FT_VCF    2
1400
0
    #define FT_VCF_GZ (FT_GZ|FT_VCF)
1401
0
    #define FT_BCF    (1<<2)
1402
0
    #define FT_BCF_GZ (FT_GZ|FT_BCF)
1403
0
    #define FT_STDIN  (1<<3)
1404
    HTSLIB_EXPORT
1405
    int hts_file_type(const char *fname);
1406
1407
1408
/***************************
1409
 * Revised MAQ error model *
1410
 ***************************/
1411
1412
struct errmod_t;
1413
typedef struct errmod_t errmod_t;
1414
1415
HTSLIB_EXPORT
1416
errmod_t *errmod_init(double depcorr);
1417
HTSLIB_EXPORT
1418
void errmod_destroy(errmod_t *em);
1419
1420
/*
1421
    n: number of bases
1422
    m: maximum base
1423
    bases[i]: qual:6, strand:1, base:4
1424
    q[i*m+j]: phred-scaled likelihood of (i,j)
1425
 */
1426
HTSLIB_EXPORT
1427
int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q);
1428
1429
1430
/*****************************************************
1431
 * Probabilistic banded glocal alignment             *
1432
 * See https://doi.org/10.1093/bioinformatics/btr076 *
1433
 *****************************************************/
1434
1435
typedef struct probaln_par_t {
1436
    float d, e;
1437
    int bw;
1438
} probaln_par_t;
1439
1440
/// Perform probabilistic banded glocal alignment
1441
/** @param      ref     Reference sequence
1442
    @param      l_ref   Length of reference
1443
    @param      query   Query sequence
1444
    @param      l_query Length of query sequence
1445
    @param      iqual   Query base qualities
1446
    @param      c       Alignment parameters
1447
    @param[out] state   Output alignment
1448
    @param[out] q    Phred scaled posterior probability of state[i] being wrong
1449
    @return     Phred-scaled likelihood score, or INT_MIN on failure.
1450
1451
The reference and query sequences are coded using integers 0,1,2,3,4 for
1452
bases A,C,G,T,N respectively (N here is for any ambiguity code).
1453
1454
On output, state and q are arrays of length l_query. The higher 30
1455
bits give the reference position the query base is matched to and the
1456
lower two bits can be 0 (an alignment match) or 1 (an
1457
insertion). q[i] gives the phred scaled posterior probability of
1458
state[i] being wrong.
1459
1460
On failure, errno will be set to EINVAL if the values of l_ref or l_query
1461
were invalid; or ENOMEM if a memory allocation failed.
1462
*/
1463
1464
HTSLIB_EXPORT
1465
int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q);
1466
1467
1468
    /**********************
1469
     * MD5 implementation *
1470
     **********************/
1471
1472
    struct hts_md5_context;
1473
    typedef struct hts_md5_context hts_md5_context;
1474
1475
    /*! @abstract   Initialises an MD5 context.
1476
     *  @discussion
1477
     *    The expected use is to allocate an hts_md5_context using
1478
     *    hts_md5_init().  This pointer is then passed into one or more calls
1479
     *    of hts_md5_update() to compute successive internal portions of the
1480
     *    MD5 sum, which can then be externalised as a full 16-byte MD5sum
1481
     *    calculation by calling hts_md5_final().  This can then be turned
1482
     *    into ASCII via hts_md5_hex().
1483
     *
1484
     *    To dealloate any resources created by hts_md5_init() call the
1485
     *    hts_md5_destroy() function.
1486
     *
1487
     *  @return     hts_md5_context pointer on success, NULL otherwise.
1488
     */
1489
    HTSLIB_EXPORT
1490
    hts_md5_context *hts_md5_init(void);
1491
1492
    /*! @abstract Updates the context with the MD5 of the data. */
1493
    HTSLIB_EXPORT
1494
    void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size);
1495
1496
    /*! @abstract Computes the final 128-bit MD5 hash from the given context */
1497
    HTSLIB_EXPORT
1498
    void hts_md5_final(unsigned char *digest, hts_md5_context *ctx);
1499
1500
    /*! @abstract Resets an md5_context to the initial state, as returned
1501
     *            by hts_md5_init().
1502
     */
1503
    HTSLIB_EXPORT
1504
    void hts_md5_reset(hts_md5_context *ctx);
1505
1506
    /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated
1507
     *            hex string.
1508
     */
1509
    HTSLIB_EXPORT
1510
    void hts_md5_hex(char *hex, const unsigned char *digest);
1511
1512
    /*! @abstract Deallocates any memory allocated by hts_md5_init. */
1513
    HTSLIB_EXPORT
1514
    void hts_md5_destroy(hts_md5_context *ctx);
1515
1516
static inline int hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls)
1517
19.4M
{
1518
19.4M
    int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
1519
19.5M
    for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
1520
19.5M
        if (beg>>s == end>>s) return t + (beg>>s);
1521
6.42k
    return 0;
1522
19.4M
}
Unexecuted instantiation: hts_open_fuzzer.c:hts_reg2bin
Unexecuted instantiation: header.c:hts_reg2bin
Unexecuted instantiation: hfile.c:hts_reg2bin
Unexecuted instantiation: hts.c:hts_reg2bin
Unexecuted instantiation: multipart.c:hts_reg2bin
Unexecuted instantiation: region.c:hts_reg2bin
sam.c:hts_reg2bin
Line
Count
Source
1517
19.4M
{
1518
19.4M
    int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
1519
19.5M
    for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
1520
19.5M
        if (beg>>s == end>>s) return t + (beg>>s);
1521
6.42k
    return 0;
1522
19.4M
}
Unexecuted instantiation: sam_mods.c:hts_reg2bin
Unexecuted instantiation: simd.c:hts_reg2bin
Unexecuted instantiation: textutils.c:hts_reg2bin
Unexecuted instantiation: vcf.c:hts_reg2bin
Unexecuted instantiation: cram_decode.c:hts_reg2bin
Unexecuted instantiation: cram_encode.c:hts_reg2bin
Unexecuted instantiation: cram_index.c:hts_reg2bin
Unexecuted instantiation: cram_io.c:hts_reg2bin
Unexecuted instantiation: cram_stats.c:hts_reg2bin
Unexecuted instantiation: open_trace_file.c:hts_reg2bin
Unexecuted instantiation: hfile_libcurl.c:hts_reg2bin
Unexecuted instantiation: hfile_gcs.c:hts_reg2bin
Unexecuted instantiation: hfile_s3.c:hts_reg2bin
Unexecuted instantiation: hfile_s3_write.c:hts_reg2bin
Unexecuted instantiation: bgzf.c:hts_reg2bin
Unexecuted instantiation: faidx.c:hts_reg2bin
Unexecuted instantiation: md5.c:hts_reg2bin
Unexecuted instantiation: tbx.c:hts_reg2bin
Unexecuted instantiation: cram_codecs.c:hts_reg2bin
1523
1524
/// Compute the level of a bin in a binning index
1525
0
static inline int hts_bin_level(int bin) {
1526
0
    int l, b;
1527
0
    for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b));
1528
0
    return l;
1529
0
}
Unexecuted instantiation: hts_open_fuzzer.c:hts_bin_level
Unexecuted instantiation: header.c:hts_bin_level
Unexecuted instantiation: hfile.c:hts_bin_level
Unexecuted instantiation: hts.c:hts_bin_level
Unexecuted instantiation: multipart.c:hts_bin_level
Unexecuted instantiation: region.c:hts_bin_level
Unexecuted instantiation: sam.c:hts_bin_level
Unexecuted instantiation: sam_mods.c:hts_bin_level
Unexecuted instantiation: simd.c:hts_bin_level
Unexecuted instantiation: textutils.c:hts_bin_level
Unexecuted instantiation: vcf.c:hts_bin_level
Unexecuted instantiation: cram_decode.c:hts_bin_level
Unexecuted instantiation: cram_encode.c:hts_bin_level
Unexecuted instantiation: cram_index.c:hts_bin_level
Unexecuted instantiation: cram_io.c:hts_bin_level
Unexecuted instantiation: cram_stats.c:hts_bin_level
Unexecuted instantiation: open_trace_file.c:hts_bin_level
Unexecuted instantiation: hfile_libcurl.c:hts_bin_level
Unexecuted instantiation: hfile_gcs.c:hts_bin_level
Unexecuted instantiation: hfile_s3.c:hts_bin_level
Unexecuted instantiation: hfile_s3_write.c:hts_bin_level
Unexecuted instantiation: bgzf.c:hts_bin_level
Unexecuted instantiation: faidx.c:hts_bin_level
Unexecuted instantiation: md5.c:hts_bin_level
Unexecuted instantiation: tbx.c:hts_bin_level
Unexecuted instantiation: cram_codecs.c:hts_bin_level
1530
1531
/**************************************
1532
 * Exposing the CRC32 implementation  *
1533
 * Either from zlib or libdeflate.    *
1534
 *************************************/
1535
HTSLIB_EXPORT
1536
uint32_t hts_crc32(uint32_t crc, const void *buf, size_t len);
1537
1538
1539
//! Compute the corresponding entry into the linear index of a given bin from
1540
//! a binning index
1541
/*!
1542
 *  @param bin    The bin number
1543
 *  @param n_lvls The index depth (number of levels - 0 based)
1544
 *  @return       The integer offset into the linear index
1545
 *
1546
 *  Explanation of the return value formula:
1547
 *  Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs.
1548
 *  A linear index entry covers exp(2, min_shift) base pairs.
1549
 */
1550
static inline int hts_bin_bot(int bin, int n_lvls)
1551
0
{
1552
0
    int l = hts_bin_level(bin);
1553
0
    return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
1554
0
}
Unexecuted instantiation: hts_open_fuzzer.c:hts_bin_bot
Unexecuted instantiation: header.c:hts_bin_bot
Unexecuted instantiation: hfile.c:hts_bin_bot
Unexecuted instantiation: hts.c:hts_bin_bot
Unexecuted instantiation: multipart.c:hts_bin_bot
Unexecuted instantiation: region.c:hts_bin_bot
Unexecuted instantiation: sam.c:hts_bin_bot
Unexecuted instantiation: sam_mods.c:hts_bin_bot
Unexecuted instantiation: simd.c:hts_bin_bot
Unexecuted instantiation: textutils.c:hts_bin_bot
Unexecuted instantiation: vcf.c:hts_bin_bot
Unexecuted instantiation: cram_decode.c:hts_bin_bot
Unexecuted instantiation: cram_encode.c:hts_bin_bot
Unexecuted instantiation: cram_index.c:hts_bin_bot
Unexecuted instantiation: cram_io.c:hts_bin_bot
Unexecuted instantiation: cram_stats.c:hts_bin_bot
Unexecuted instantiation: open_trace_file.c:hts_bin_bot
Unexecuted instantiation: hfile_libcurl.c:hts_bin_bot
Unexecuted instantiation: hfile_gcs.c:hts_bin_bot
Unexecuted instantiation: hfile_s3.c:hts_bin_bot
Unexecuted instantiation: hfile_s3_write.c:hts_bin_bot
Unexecuted instantiation: bgzf.c:hts_bin_bot
Unexecuted instantiation: faidx.c:hts_bin_bot
Unexecuted instantiation: md5.c:hts_bin_bot
Unexecuted instantiation: tbx.c:hts_bin_bot
Unexecuted instantiation: cram_codecs.c:hts_bin_bot
1555
1556
/// Compute the (0-based exclusive) maximum position covered by a binning index
1557
static inline hts_pos_t hts_bin_maxpos(int min_shift, int n_lvls)
1558
0
{
1559
0
    hts_pos_t one = 1;
1560
0
    return one << (min_shift + n_lvls * 3);
1561
0
}
Unexecuted instantiation: hts_open_fuzzer.c:hts_bin_maxpos
Unexecuted instantiation: header.c:hts_bin_maxpos
Unexecuted instantiation: hfile.c:hts_bin_maxpos
Unexecuted instantiation: hts.c:hts_bin_maxpos
Unexecuted instantiation: multipart.c:hts_bin_maxpos
Unexecuted instantiation: region.c:hts_bin_maxpos
Unexecuted instantiation: sam.c:hts_bin_maxpos
Unexecuted instantiation: sam_mods.c:hts_bin_maxpos
Unexecuted instantiation: simd.c:hts_bin_maxpos
Unexecuted instantiation: textutils.c:hts_bin_maxpos
Unexecuted instantiation: vcf.c:hts_bin_maxpos
Unexecuted instantiation: cram_decode.c:hts_bin_maxpos
Unexecuted instantiation: cram_encode.c:hts_bin_maxpos
Unexecuted instantiation: cram_index.c:hts_bin_maxpos
Unexecuted instantiation: cram_io.c:hts_bin_maxpos
Unexecuted instantiation: cram_stats.c:hts_bin_maxpos
Unexecuted instantiation: open_trace_file.c:hts_bin_maxpos
Unexecuted instantiation: hfile_libcurl.c:hts_bin_maxpos
Unexecuted instantiation: hfile_gcs.c:hts_bin_maxpos
Unexecuted instantiation: hfile_s3.c:hts_bin_maxpos
Unexecuted instantiation: hfile_s3_write.c:hts_bin_maxpos
Unexecuted instantiation: bgzf.c:hts_bin_maxpos
Unexecuted instantiation: faidx.c:hts_bin_maxpos
Unexecuted instantiation: md5.c:hts_bin_maxpos
Unexecuted instantiation: tbx.c:hts_bin_maxpos
Unexecuted instantiation: cram_codecs.c:hts_bin_maxpos
1562
1563
/**************
1564
 * Endianness *
1565
 **************/
1566
1567
static inline int ed_is_big(void)
1568
80.5k
{
1569
80.5k
    long one= 1;
1570
80.5k
    return !(*((char *)(&one)));
1571
80.5k
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_is_big
Unexecuted instantiation: header.c:ed_is_big
Unexecuted instantiation: hfile.c:ed_is_big
hts.c:ed_is_big
Line
Count
Source
1568
64.6k
{
1569
64.6k
    long one= 1;
1570
64.6k
    return !(*((char *)(&one)));
1571
64.6k
}
Unexecuted instantiation: multipart.c:ed_is_big
Unexecuted instantiation: region.c:ed_is_big
Unexecuted instantiation: sam.c:ed_is_big
Unexecuted instantiation: sam_mods.c:ed_is_big
Unexecuted instantiation: simd.c:ed_is_big
Unexecuted instantiation: textutils.c:ed_is_big
Unexecuted instantiation: vcf.c:ed_is_big
Unexecuted instantiation: cram_decode.c:ed_is_big
Unexecuted instantiation: cram_encode.c:ed_is_big
Unexecuted instantiation: cram_index.c:ed_is_big
Unexecuted instantiation: cram_io.c:ed_is_big
Unexecuted instantiation: cram_stats.c:ed_is_big
Unexecuted instantiation: open_trace_file.c:ed_is_big
Unexecuted instantiation: hfile_libcurl.c:ed_is_big
Unexecuted instantiation: hfile_gcs.c:ed_is_big
Unexecuted instantiation: hfile_s3.c:ed_is_big
Unexecuted instantiation: hfile_s3_write.c:ed_is_big
bgzf.c:ed_is_big
Line
Count
Source
1568
15.8k
{
1569
15.8k
    long one= 1;
1570
15.8k
    return !(*((char *)(&one)));
1571
15.8k
}
Unexecuted instantiation: faidx.c:ed_is_big
Unexecuted instantiation: md5.c:ed_is_big
Unexecuted instantiation: tbx.c:ed_is_big
Unexecuted instantiation: cram_codecs.c:ed_is_big
1572
static inline uint16_t ed_swap_2(uint16_t v)
1573
0
{
1574
0
    return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
1575
0
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_2
Unexecuted instantiation: header.c:ed_swap_2
Unexecuted instantiation: hfile.c:ed_swap_2
Unexecuted instantiation: hts.c:ed_swap_2
Unexecuted instantiation: multipart.c:ed_swap_2
Unexecuted instantiation: region.c:ed_swap_2
Unexecuted instantiation: sam.c:ed_swap_2
Unexecuted instantiation: sam_mods.c:ed_swap_2
Unexecuted instantiation: simd.c:ed_swap_2
Unexecuted instantiation: textutils.c:ed_swap_2
Unexecuted instantiation: vcf.c:ed_swap_2
Unexecuted instantiation: cram_decode.c:ed_swap_2
Unexecuted instantiation: cram_encode.c:ed_swap_2
Unexecuted instantiation: cram_index.c:ed_swap_2
Unexecuted instantiation: cram_io.c:ed_swap_2
Unexecuted instantiation: cram_stats.c:ed_swap_2
Unexecuted instantiation: open_trace_file.c:ed_swap_2
Unexecuted instantiation: hfile_libcurl.c:ed_swap_2
Unexecuted instantiation: hfile_gcs.c:ed_swap_2
Unexecuted instantiation: hfile_s3.c:ed_swap_2
Unexecuted instantiation: hfile_s3_write.c:ed_swap_2
Unexecuted instantiation: bgzf.c:ed_swap_2
Unexecuted instantiation: faidx.c:ed_swap_2
Unexecuted instantiation: md5.c:ed_swap_2
Unexecuted instantiation: tbx.c:ed_swap_2
Unexecuted instantiation: cram_codecs.c:ed_swap_2
1576
static inline void *ed_swap_2p(void *x)
1577
0
{
1578
0
    *(uint16_t*)x = ed_swap_2(*(uint16_t*)x);
1579
0
    return x;
1580
0
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_2p
Unexecuted instantiation: header.c:ed_swap_2p
Unexecuted instantiation: hfile.c:ed_swap_2p
Unexecuted instantiation: hts.c:ed_swap_2p
Unexecuted instantiation: multipart.c:ed_swap_2p
Unexecuted instantiation: region.c:ed_swap_2p
Unexecuted instantiation: sam.c:ed_swap_2p
Unexecuted instantiation: sam_mods.c:ed_swap_2p
Unexecuted instantiation: simd.c:ed_swap_2p
Unexecuted instantiation: textutils.c:ed_swap_2p
Unexecuted instantiation: vcf.c:ed_swap_2p
Unexecuted instantiation: cram_decode.c:ed_swap_2p
Unexecuted instantiation: cram_encode.c:ed_swap_2p
Unexecuted instantiation: cram_index.c:ed_swap_2p
Unexecuted instantiation: cram_io.c:ed_swap_2p
Unexecuted instantiation: cram_stats.c:ed_swap_2p
Unexecuted instantiation: open_trace_file.c:ed_swap_2p
Unexecuted instantiation: hfile_libcurl.c:ed_swap_2p
Unexecuted instantiation: hfile_gcs.c:ed_swap_2p
Unexecuted instantiation: hfile_s3.c:ed_swap_2p
Unexecuted instantiation: hfile_s3_write.c:ed_swap_2p
Unexecuted instantiation: bgzf.c:ed_swap_2p
Unexecuted instantiation: faidx.c:ed_swap_2p
Unexecuted instantiation: md5.c:ed_swap_2p
Unexecuted instantiation: tbx.c:ed_swap_2p
Unexecuted instantiation: cram_codecs.c:ed_swap_2p
1581
static inline uint32_t ed_swap_4(uint32_t v)
1582
0
{
1583
0
    v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
1584
0
    return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
1585
0
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_4
Unexecuted instantiation: header.c:ed_swap_4
Unexecuted instantiation: hfile.c:ed_swap_4
Unexecuted instantiation: hts.c:ed_swap_4
Unexecuted instantiation: multipart.c:ed_swap_4
Unexecuted instantiation: region.c:ed_swap_4
Unexecuted instantiation: sam.c:ed_swap_4
Unexecuted instantiation: sam_mods.c:ed_swap_4
Unexecuted instantiation: simd.c:ed_swap_4
Unexecuted instantiation: textutils.c:ed_swap_4
Unexecuted instantiation: vcf.c:ed_swap_4
Unexecuted instantiation: cram_decode.c:ed_swap_4
Unexecuted instantiation: cram_encode.c:ed_swap_4
Unexecuted instantiation: cram_index.c:ed_swap_4
Unexecuted instantiation: cram_io.c:ed_swap_4
Unexecuted instantiation: cram_stats.c:ed_swap_4
Unexecuted instantiation: open_trace_file.c:ed_swap_4
Unexecuted instantiation: hfile_libcurl.c:ed_swap_4
Unexecuted instantiation: hfile_gcs.c:ed_swap_4
Unexecuted instantiation: hfile_s3.c:ed_swap_4
Unexecuted instantiation: hfile_s3_write.c:ed_swap_4
Unexecuted instantiation: bgzf.c:ed_swap_4
Unexecuted instantiation: faidx.c:ed_swap_4
Unexecuted instantiation: md5.c:ed_swap_4
Unexecuted instantiation: tbx.c:ed_swap_4
Unexecuted instantiation: cram_codecs.c:ed_swap_4
1586
static inline void *ed_swap_4p(void *x)
1587
0
{
1588
0
    *(uint32_t*)x = ed_swap_4(*(uint32_t*)x);
1589
0
    return x;
1590
0
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_4p
Unexecuted instantiation: header.c:ed_swap_4p
Unexecuted instantiation: hfile.c:ed_swap_4p
Unexecuted instantiation: hts.c:ed_swap_4p
Unexecuted instantiation: multipart.c:ed_swap_4p
Unexecuted instantiation: region.c:ed_swap_4p
Unexecuted instantiation: sam.c:ed_swap_4p
Unexecuted instantiation: sam_mods.c:ed_swap_4p
Unexecuted instantiation: simd.c:ed_swap_4p
Unexecuted instantiation: textutils.c:ed_swap_4p
Unexecuted instantiation: vcf.c:ed_swap_4p
Unexecuted instantiation: cram_decode.c:ed_swap_4p
Unexecuted instantiation: cram_encode.c:ed_swap_4p
Unexecuted instantiation: cram_index.c:ed_swap_4p
Unexecuted instantiation: cram_io.c:ed_swap_4p
Unexecuted instantiation: cram_stats.c:ed_swap_4p
Unexecuted instantiation: open_trace_file.c:ed_swap_4p
Unexecuted instantiation: hfile_libcurl.c:ed_swap_4p
Unexecuted instantiation: hfile_gcs.c:ed_swap_4p
Unexecuted instantiation: hfile_s3.c:ed_swap_4p
Unexecuted instantiation: hfile_s3_write.c:ed_swap_4p
Unexecuted instantiation: bgzf.c:ed_swap_4p
Unexecuted instantiation: faidx.c:ed_swap_4p
Unexecuted instantiation: md5.c:ed_swap_4p
Unexecuted instantiation: tbx.c:ed_swap_4p
Unexecuted instantiation: cram_codecs.c:ed_swap_4p
1591
static inline uint64_t ed_swap_8(uint64_t v)
1592
20
{
1593
20
    v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
1594
20
    v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
1595
20
    return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
1596
20
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_8
Unexecuted instantiation: header.c:ed_swap_8
Unexecuted instantiation: hfile.c:ed_swap_8
Unexecuted instantiation: hts.c:ed_swap_8
Unexecuted instantiation: multipart.c:ed_swap_8
Unexecuted instantiation: region.c:ed_swap_8
Unexecuted instantiation: sam.c:ed_swap_8
Unexecuted instantiation: sam_mods.c:ed_swap_8
Unexecuted instantiation: simd.c:ed_swap_8
Unexecuted instantiation: textutils.c:ed_swap_8
Unexecuted instantiation: vcf.c:ed_swap_8
Unexecuted instantiation: cram_decode.c:ed_swap_8
Unexecuted instantiation: cram_encode.c:ed_swap_8
Unexecuted instantiation: cram_index.c:ed_swap_8
Unexecuted instantiation: cram_io.c:ed_swap_8
Unexecuted instantiation: cram_stats.c:ed_swap_8
Unexecuted instantiation: open_trace_file.c:ed_swap_8
Unexecuted instantiation: hfile_libcurl.c:ed_swap_8
Unexecuted instantiation: hfile_gcs.c:ed_swap_8
Unexecuted instantiation: hfile_s3.c:ed_swap_8
Unexecuted instantiation: hfile_s3_write.c:ed_swap_8
bgzf.c:ed_swap_8
Line
Count
Source
1592
20
{
1593
20
    v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
1594
20
    v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
1595
20
    return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
1596
20
}
Unexecuted instantiation: faidx.c:ed_swap_8
Unexecuted instantiation: md5.c:ed_swap_8
Unexecuted instantiation: tbx.c:ed_swap_8
Unexecuted instantiation: cram_codecs.c:ed_swap_8
1597
static inline void *ed_swap_8p(void *x)
1598
20
{
1599
20
    *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
1600
20
    return x;
1601
20
}
Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_8p
Unexecuted instantiation: header.c:ed_swap_8p
Unexecuted instantiation: hfile.c:ed_swap_8p
Unexecuted instantiation: hts.c:ed_swap_8p
Unexecuted instantiation: multipart.c:ed_swap_8p
Unexecuted instantiation: region.c:ed_swap_8p
Unexecuted instantiation: sam.c:ed_swap_8p
Unexecuted instantiation: sam_mods.c:ed_swap_8p
Unexecuted instantiation: simd.c:ed_swap_8p
Unexecuted instantiation: textutils.c:ed_swap_8p
Unexecuted instantiation: vcf.c:ed_swap_8p
Unexecuted instantiation: cram_decode.c:ed_swap_8p
Unexecuted instantiation: cram_encode.c:ed_swap_8p
Unexecuted instantiation: cram_index.c:ed_swap_8p
Unexecuted instantiation: cram_io.c:ed_swap_8p
Unexecuted instantiation: cram_stats.c:ed_swap_8p
Unexecuted instantiation: open_trace_file.c:ed_swap_8p
Unexecuted instantiation: hfile_libcurl.c:ed_swap_8p
Unexecuted instantiation: hfile_gcs.c:ed_swap_8p
Unexecuted instantiation: hfile_s3.c:ed_swap_8p
Unexecuted instantiation: hfile_s3_write.c:ed_swap_8p
bgzf.c:ed_swap_8p
Line
Count
Source
1598
20
{
1599
20
    *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
1600
20
    return x;
1601
20
}
Unexecuted instantiation: faidx.c:ed_swap_8p
Unexecuted instantiation: md5.c:ed_swap_8p
Unexecuted instantiation: tbx.c:ed_swap_8p
Unexecuted instantiation: cram_codecs.c:ed_swap_8p
1602
1603
#ifdef __cplusplus
1604
}
1605
#endif
1606
1607
#endif