Line | Count | Source (jump to first uncovered line) |
1 | | /// @file htslib/hts.h |
2 | | /// Format-neutral I/O, indexing, and iterator API functions. |
3 | | /* |
4 | | Copyright (C) 2012-2022 Genome Research Ltd. |
5 | | Copyright (C) 2010, 2012 Broad Institute. |
6 | | Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk> |
7 | | |
8 | | Author: Heng Li <lh3@sanger.ac.uk> |
9 | | |
10 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
11 | | of this software and associated documentation files (the "Software"), to deal |
12 | | in the Software without restriction, including without limitation the rights |
13 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
14 | | copies of the Software, and to permit persons to whom the Software is |
15 | | furnished to do so, subject to the following conditions: |
16 | | |
17 | | The above copyright notice and this permission notice shall be included in |
18 | | all copies or substantial portions of the Software. |
19 | | |
20 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
21 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
22 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
23 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
24 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
25 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
26 | | DEALINGS IN THE SOFTWARE. */ |
27 | | |
28 | | #ifndef HTSLIB_HTS_H |
29 | | #define HTSLIB_HTS_H |
30 | | |
31 | | #include <stddef.h> |
32 | | #include <stdint.h> |
33 | | #include <inttypes.h> |
34 | | |
35 | | #include "hts_defs.h" |
36 | | #include "hts_log.h" |
37 | | #include "kstring.h" |
38 | | #include "kroundup.h" |
39 | | |
40 | | #ifdef __cplusplus |
41 | | extern "C" { |
42 | | #endif |
43 | | |
44 | | // Separator used to split HTS_PATH (for plugins); REF_PATH (cram references) |
45 | | #if defined(_WIN32) || defined(__MSYS__) |
46 | | #define HTS_PATH_SEPARATOR_CHAR ';' |
47 | | #define HTS_PATH_SEPARATOR_STR ";" |
48 | | #else |
49 | 0 | #define HTS_PATH_SEPARATOR_CHAR ':' |
50 | | #define HTS_PATH_SEPARATOR_STR ":" |
51 | | #endif |
52 | | |
53 | | #ifndef HTS_BGZF_TYPEDEF |
54 | | typedef struct BGZF BGZF; |
55 | | #define HTS_BGZF_TYPEDEF |
56 | | #endif |
57 | | struct cram_fd; |
58 | | struct hFILE; |
59 | | struct hts_tpool; |
60 | | struct sam_hdr_t; |
61 | | |
62 | | /** |
63 | | * @hideinitializer |
64 | | * Deprecated macro to expand a dynamic array of a given type |
65 | | * |
66 | | * @param type_t The type of the array elements |
67 | | * @param[in] n Requested number of elements of type type_t |
68 | | * @param[in,out] m Size of memory allocated |
69 | | * @param[in,out] ptr Pointer to the array |
70 | | * |
71 | | * @discussion |
72 | | * Do not use this macro. Use hts_resize() instead as allows allocation |
73 | | * failures to be handled more gracefully. |
74 | | * |
75 | | * The array *ptr will be expanded if necessary so that it can hold @p n |
76 | | * or more elements. If the array is expanded then the new size will be |
77 | | * written to @p m and the value in @p ptr may change. |
78 | | * |
79 | | * It must be possible to take the address of @p ptr and @p m must be usable |
80 | | * as an lvalue. |
81 | | * |
82 | | * @bug |
83 | | * If the memory allocation fails, this will call exit(1). This is |
84 | | * not ideal behaviour in a library. |
85 | | */ |
86 | 43.1k | #define hts_expand(type_t, n, m, ptr) do { \ |
87 | 43.1k | if ((n) > (m)) { \ |
88 | 2.03k | size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ |
89 | 2.03k | int, void **, const char *); \ |
90 | 2.03k | (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ |
91 | 2.03k | sizeof(type_t), 0, \ |
92 | 2.03k | (void **)&(ptr), __func__); \ |
93 | 2.03k | } \ |
94 | 43.1k | } while (0) |
95 | | |
96 | | /** |
97 | | * @hideinitializer |
98 | | * Macro to expand a dynamic array, zeroing any newly-allocated memory |
99 | | * |
100 | | * @param type_t The type of the array elements |
101 | | * @param[in] n Requested number of elements of type type_t |
102 | | * @param[in,out] m Size of memory allocated |
103 | | * @param[in,out] ptr Pointer to the array |
104 | | * |
105 | | * @discussion |
106 | | * Do not use this macro. Use hts_resize() instead as allows allocation |
107 | | * failures to be handled more gracefully. |
108 | | * |
109 | | * As for hts_expand(), except the bytes that make up the array elements |
110 | | * between the old and new values of @p m are set to zero using memset(). |
111 | | * |
112 | | * @bug |
113 | | * If the memory allocation fails, this will call exit(1). This is |
114 | | * not ideal behaviour in a library. |
115 | | */ |
116 | | |
117 | | |
118 | 0 | #define hts_expand0(type_t, n, m, ptr) do { \ |
119 | 0 | if ((n) > (m)) { \ |
120 | 0 | size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ |
121 | 0 | int, void **, const char *); \ |
122 | 0 | (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ |
123 | 0 | sizeof(type_t), 1, \ |
124 | 0 | (void **)&(ptr), __func__); \ |
125 | 0 | } \ |
126 | 0 | } while (0) |
127 | | |
128 | | // For internal use (by hts_resize()) only |
129 | | HTSLIB_EXPORT |
130 | | int hts_resize_array_(size_t, size_t, size_t, void *, void **, int, |
131 | | const char *); |
132 | | |
133 | 4.30k | #define HTS_RESIZE_CLEAR 1 |
134 | | |
135 | | /** |
136 | | * @hideinitializer |
137 | | * Macro to expand a dynamic array of a given type |
138 | | * |
139 | | * @param type_t The type of the array elements |
140 | | * @param[in] num Requested number of elements of type type_t |
141 | | * @param[in,out] size_ptr Pointer to where the size (in elements) of the |
142 | | array is stored. |
143 | | * @param[in,out] ptr Location of the pointer to the array |
144 | | * @param[in] flags Option flags |
145 | | * |
146 | | * @return 0 for success, or negative if an error occurred. |
147 | | * |
148 | | * @discussion |
149 | | * The array *ptr will be expanded if necessary so that it can hold @p num |
150 | | * or more elements. If the array is expanded then the new size will be |
151 | | * written to @p *size_ptr and the value in @p *ptr may change. |
152 | | * |
153 | | * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will |
154 | | * be cleared. |
155 | | */ |
156 | | |
157 | | #define hts_resize(type_t, num, size_ptr, ptr, flags) \ |
158 | 11.8k | ((num) > (*(size_ptr)) \ |
159 | 11.8k | ? hts_resize_array_(sizeof(type_t), (num), \ |
160 | 4.30k | sizeof(*(size_ptr)), (size_ptr), \ |
161 | 4.30k | (void **)(ptr), (flags), __func__) \ |
162 | 11.8k | : 0) |
163 | | |
164 | | /// Release resources when dlclosing a dynamically loaded HTSlib |
165 | | /** @discussion |
166 | | * Normally HTSlib cleans up automatically when your program exits, |
167 | | * whether that is via exit(3) or returning from main(). However if you |
168 | | * have dlopen(3)ed HTSlib and wish to close it before your main program |
169 | | * exits, you must call hts_lib_shutdown() before dlclose(3). |
170 | | */ |
171 | | HTSLIB_EXPORT |
172 | | void hts_lib_shutdown(void); |
173 | | |
174 | | /** |
175 | | * Wrapper function for free(). Enables memory deallocation across DLL |
176 | | * boundary. Should be used by all applications, which are compiled |
177 | | * with a different standard library than htslib and call htslib |
178 | | * methods that return dynamically allocated data. |
179 | | */ |
180 | | HTSLIB_EXPORT |
181 | | void hts_free(void *ptr); |
182 | | |
183 | | /************ |
184 | | * File I/O * |
185 | | ************/ |
186 | | |
187 | | // Add new entries only at the end (but before the *_maximum entry) |
188 | | // of these enums, as their numbering is part of the htslib ABI. |
189 | | |
190 | | enum htsFormatCategory { |
191 | | unknown_category, |
192 | | sequence_data, // Sequence data -- SAM, BAM, CRAM, etc |
193 | | variant_data, // Variant calling data -- VCF, BCF, etc |
194 | | index_file, // Index file associated with some data file |
195 | | region_list, // Coordinate intervals or regions -- BED, etc |
196 | | category_maximum = 32767 |
197 | | }; |
198 | | |
199 | | enum htsExactFormat { |
200 | | unknown_format, |
201 | | binary_format, text_format, |
202 | | sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed, |
203 | | htsget, |
204 | | json HTS_DEPRECATED_ENUM("Use htsExactFormat 'htsget' instead") = htsget, |
205 | | empty_format, // File is empty (or empty after decompression) |
206 | | fasta_format, fastq_format, fai_format, fqi_format, |
207 | | hts_crypt4gh_format, |
208 | | d4_format, |
209 | | format_maximum = 32767 |
210 | | }; |
211 | | |
212 | | enum htsCompression { |
213 | | no_compression, gzip, bgzf, custom, bzip2_compression, razf_compression, |
214 | | xz_compression, zstd_compression, |
215 | | compression_maximum = 32767 |
216 | | }; |
217 | | |
218 | | typedef struct htsFormat { |
219 | | enum htsFormatCategory category; |
220 | | enum htsExactFormat format; |
221 | | struct { short major, minor; } version; |
222 | | enum htsCompression compression; |
223 | | short compression_level; // currently unused |
224 | | void *specific; // format specific options; see struct hts_opt. |
225 | | } htsFormat; |
226 | | |
227 | | struct hts_idx_t; |
228 | | typedef struct hts_idx_t hts_idx_t; |
229 | | struct hts_filter_t; |
230 | | |
231 | | /** |
232 | | * @brief File handle returned by hts_open() etc. |
233 | | * This structure should be considered opaque by end users. There should be |
234 | | * no need to access most fields directly in user code, and in cases where |
235 | | * it is desirable accessor functions such as hts_get_format() are provided. |
236 | | */ |
237 | | // Maintainers note htsFile cannot be an incomplete struct because some of its |
238 | | // fields are part of libhts.so's ABI (hence these fields must not be moved): |
239 | | // - fp is used in the public sam_itr_next()/etc macros |
240 | | // - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1 |
241 | | // - is_write and is_cram are used directly in samtools <= 1.1 |
242 | | // - fp is used directly in samtools (up to and including current develop) |
243 | | // - line is used directly in bcftools (up to and including current develop) |
244 | | // - is_bgzf and is_cram flags indicate which fp union member to use. |
245 | | // Note is_bgzf being set does not indicate the flag is BGZF compressed, |
246 | | // nor even whether it is compressed at all (eg on naked BAMs). |
247 | | typedef struct htsFile { |
248 | | uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27; |
249 | | int64_t lineno; |
250 | | kstring_t line; |
251 | | char *fn, *fn_aux; |
252 | | union { |
253 | | BGZF *bgzf; |
254 | | struct cram_fd *cram; |
255 | | struct hFILE *hfile; |
256 | | } fp; |
257 | | void *state; // format specific state information |
258 | | htsFormat format; |
259 | | hts_idx_t *idx; |
260 | | const char *fnidx; |
261 | | struct sam_hdr_t *bam_header; |
262 | | struct hts_filter_t *filter; |
263 | | } htsFile; |
264 | | |
265 | | // A combined thread pool and queue allocation size. |
266 | | // The pool should already be defined, but qsize may be zero to |
267 | | // indicate an appropriate queue size is taken from the pool. |
268 | | // |
269 | | // Reasons for explicitly setting it could be where many more file |
270 | | // descriptors are in use than threads, so keeping memory low is |
271 | | // important. |
272 | | typedef struct htsThreadPool { |
273 | | struct hts_tpool *pool; // The shared thread pool itself |
274 | | int qsize; // Size of I/O queue to use for this fp |
275 | | } htsThreadPool; |
276 | | |
277 | | // REQUIRED_FIELDS |
278 | | enum sam_fields { |
279 | | SAM_QNAME = 0x00000001, |
280 | | SAM_FLAG = 0x00000002, |
281 | | SAM_RNAME = 0x00000004, |
282 | | SAM_POS = 0x00000008, |
283 | | SAM_MAPQ = 0x00000010, |
284 | | SAM_CIGAR = 0x00000020, |
285 | | SAM_RNEXT = 0x00000040, |
286 | | SAM_PNEXT = 0x00000080, |
287 | | SAM_TLEN = 0x00000100, |
288 | | SAM_SEQ = 0x00000200, |
289 | | SAM_QUAL = 0x00000400, |
290 | | SAM_AUX = 0x00000800, |
291 | | SAM_RGAUX = 0x00001000, |
292 | | }; |
293 | | |
294 | | // Mostly CRAM only, but this could also include other format options |
295 | | enum hts_fmt_option { |
296 | | // CRAM specific |
297 | | CRAM_OPT_DECODE_MD, |
298 | | CRAM_OPT_PREFIX, |
299 | | CRAM_OPT_VERBOSITY, // obsolete, use hts_set_log_level() instead |
300 | | CRAM_OPT_SEQS_PER_SLICE, |
301 | | CRAM_OPT_SLICES_PER_CONTAINER, |
302 | | CRAM_OPT_RANGE, |
303 | | CRAM_OPT_VERSION, // rename to cram_version? |
304 | | CRAM_OPT_EMBED_REF, |
305 | | CRAM_OPT_IGNORE_MD5, |
306 | | CRAM_OPT_REFERENCE, // make general |
307 | | CRAM_OPT_MULTI_SEQ_PER_SLICE, |
308 | | CRAM_OPT_NO_REF, |
309 | | CRAM_OPT_USE_BZIP2, |
310 | | CRAM_OPT_SHARED_REF, |
311 | | CRAM_OPT_NTHREADS, // deprecated, use HTS_OPT_NTHREADS |
312 | | CRAM_OPT_THREAD_POOL,// make general |
313 | | CRAM_OPT_USE_LZMA, |
314 | | CRAM_OPT_USE_RANS, |
315 | | CRAM_OPT_REQUIRED_FIELDS, |
316 | | CRAM_OPT_LOSSY_NAMES, |
317 | | CRAM_OPT_BASES_PER_SLICE, |
318 | | CRAM_OPT_STORE_MD, |
319 | | CRAM_OPT_STORE_NM, |
320 | | CRAM_OPT_RANGE_NOSEEK, // CRAM_OPT_RANGE minus the seek |
321 | | CRAM_OPT_USE_TOK, |
322 | | CRAM_OPT_USE_FQZ, |
323 | | CRAM_OPT_USE_ARITH, |
324 | | CRAM_OPT_POS_DELTA, // force delta for AP, even on non-pos sorted data |
325 | | |
326 | | // General purpose |
327 | | HTS_OPT_COMPRESSION_LEVEL = 100, |
328 | | HTS_OPT_NTHREADS, |
329 | | HTS_OPT_THREAD_POOL, |
330 | | HTS_OPT_CACHE_SIZE, |
331 | | HTS_OPT_BLOCK_SIZE, |
332 | | HTS_OPT_FILTER, |
333 | | HTS_OPT_PROFILE, |
334 | | |
335 | | // Fastq |
336 | | |
337 | | // Boolean. |
338 | | // Read / Write CASAVA 1.8 format. |
339 | | // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf |
340 | | // |
341 | | // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+ |
342 | | // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag, |
343 | | // \d+ is a control number, and the sequence at the end is |
344 | | // for barcode sequence. Barcodes are read into the aux tag defined |
345 | | // by FASTQ_OPT_BARCODE ("BC" by default). |
346 | | FASTQ_OPT_CASAVA = 1000, |
347 | | |
348 | | // String. |
349 | | // Whether to read / write extra SAM format aux tags from the fastq |
350 | | // identifier line. For reading this can simply be "1" to request |
351 | | // decoding aux tags. For writing it is a comma separated list of aux |
352 | | // tag types to be written out. |
353 | | FASTQ_OPT_AUX, |
354 | | |
355 | | // Boolean. |
356 | | // Whether to add /1 and /2 to read identifiers when writing FASTQ. |
357 | | // These come from the BAM_FREAD1 or BAM_FREAD2 flags. |
358 | | // (Detecting the /1 and /2 is automatic when reading fastq.) |
359 | | FASTQ_OPT_RNUM, |
360 | | |
361 | | // Two character string. |
362 | | // Barcode aux tag for CASAVA; defaults to "BC". |
363 | | FASTQ_OPT_BARCODE, |
364 | | |
365 | | // Process SRA and ENA read names which pointlessly move the original |
366 | | // name to the second field and insert a constructed <run>.<number> |
367 | | // name in its place. |
368 | | FASTQ_OPT_NAME2, |
369 | | }; |
370 | | |
371 | | // Profile options for encoding; primarily used at present in CRAM |
372 | | // but also usable in BAM as a synonym for deflate compression levels. |
373 | | enum hts_profile_option { |
374 | | HTS_PROFILE_FAST, |
375 | | HTS_PROFILE_NORMAL, |
376 | | HTS_PROFILE_SMALL, |
377 | | HTS_PROFILE_ARCHIVE, |
378 | | }; |
379 | | |
380 | | // For backwards compatibility |
381 | | #define cram_option hts_fmt_option |
382 | | |
383 | | typedef struct hts_opt { |
384 | | char *arg; // string form, strdup()ed |
385 | | enum hts_fmt_option opt; // tokenised key |
386 | | union { // ... and value |
387 | | int i; |
388 | | char *s; |
389 | | } val; |
390 | | struct hts_opt *next; |
391 | | } hts_opt; |
392 | | |
393 | | #define HTS_FILE_OPTS_INIT {{0},0} |
394 | | |
395 | | /* |
396 | | * Explicit index file name delimiter, see below |
397 | | */ |
398 | 4.23k | #define HTS_IDX_DELIM "##idx##" |
399 | | |
400 | | |
401 | | /********************** |
402 | | * Exported functions * |
403 | | **********************/ |
404 | | |
405 | | /* |
406 | | * Parses arg and appends it to the option list. |
407 | | * |
408 | | * Returns 0 on success; |
409 | | * -1 on failure. |
410 | | */ |
411 | | HTSLIB_EXPORT |
412 | | int hts_opt_add(hts_opt **opts, const char *c_arg); |
413 | | |
414 | | /* |
415 | | * Applies an hts_opt option list to a given htsFile. |
416 | | * |
417 | | * Returns 0 on success |
418 | | * -1 on failure |
419 | | */ |
420 | | HTSLIB_EXPORT |
421 | | int hts_opt_apply(htsFile *fp, hts_opt *opts); |
422 | | |
423 | | /* |
424 | | * Frees an hts_opt list. |
425 | | */ |
426 | | HTSLIB_EXPORT |
427 | | void hts_opt_free(hts_opt *opts); |
428 | | |
429 | | /* |
430 | | * Accepts a string file format (sam, bam, cram, vcf, bam) optionally |
431 | | * followed by a comma separated list of key=value options and splits |
432 | | * these up into the fields of htsFormat struct. |
433 | | * |
434 | | * Returns 0 on success |
435 | | * -1 on failure. |
436 | | */ |
437 | | HTSLIB_EXPORT |
438 | | int hts_parse_format(htsFormat *opt, const char *str); |
439 | | |
440 | | /* |
441 | | * Tokenise options as (key(=value)?,)*(key(=value)?)? |
442 | | * NB: No provision for ',' appearing in the value! |
443 | | * Add backslashing rules? |
444 | | * |
445 | | * This could be used as part of a general command line option parser or |
446 | | * as a string concatenated onto the file open mode. |
447 | | * |
448 | | * Returns 0 on success |
449 | | * -1 on failure. |
450 | | */ |
451 | | HTSLIB_EXPORT |
452 | | int hts_parse_opt_list(htsFormat *opt, const char *str); |
453 | | |
454 | | /*! @abstract Table for converting a nucleotide character to 4-bit encoding. |
455 | | The input character may be either an IUPAC ambiguity code, '=' for 0, or |
456 | | '0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 |
457 | | for A/C/G/T or combinations of these bits for ambiguous bases. |
458 | | */ |
459 | | HTSLIB_EXPORT |
460 | | extern const unsigned char seq_nt16_table[256]; |
461 | | |
462 | | /*! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC |
463 | | ambiguity code letter (or '=' when given 0). |
464 | | */ |
465 | | HTSLIB_EXPORT |
466 | | extern const char seq_nt16_str[]; |
467 | | |
468 | | /*! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits. |
469 | | Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous). |
470 | | */ |
471 | | HTSLIB_EXPORT |
472 | | extern const int seq_nt16_int[]; |
473 | | |
474 | | /*! |
475 | | @abstract Get the htslib version number |
476 | | @return For released versions, a string like "N.N[.N]"; or git describe |
477 | | output if using a library built within a Git repository. |
478 | | */ |
479 | | HTSLIB_EXPORT |
480 | | const char *hts_version(void); |
481 | | |
482 | | /*! |
483 | | @abstract Compile-time HTSlib version number, for use in #if checks |
484 | | @return For released versions X.Y[.Z], an integer of the form XYYYZZ; |
485 | | useful for preprocessor conditionals such as |
486 | | #if HTS_VERSION >= 101000 // Check for v1.10 or later |
487 | | */ |
488 | | // Maintainers: Bump this in the final stage of preparing a new release. |
489 | | // Immediately after release, bump ZZ to 90 to distinguish in-development |
490 | | // Git repository builds from the release; you may wish to increment this |
491 | | // further when significant features are merged. |
492 | | #define HTS_VERSION 101690 |
493 | | |
494 | | /*! @abstract Introspection on the features enabled in htslib |
495 | | * |
496 | | * @return a bitfield of HTS_FEATURE_* macros. |
497 | | */ |
498 | | HTSLIB_EXPORT |
499 | | unsigned int hts_features(void); |
500 | | |
501 | | HTSLIB_EXPORT |
502 | | const char *hts_test_feature(unsigned int id); |
503 | | |
504 | | /*! @abstract Introspection on the features enabled in htslib, string form |
505 | | * |
506 | | * @return a string describing htslib build features |
507 | | */ |
508 | | HTSLIB_EXPORT |
509 | | const char *hts_feature_string(void); |
510 | | |
511 | | // Whether ./configure was used or vanilla Makefile |
512 | 0 | #define HTS_FEATURE_CONFIGURE 1 |
513 | | |
514 | | // Whether --enable-plugins was used |
515 | 0 | #define HTS_FEATURE_PLUGINS 2 |
516 | | |
517 | | // Transport specific |
518 | 0 | #define HTS_FEATURE_LIBCURL (1u<<10) |
519 | 0 | #define HTS_FEATURE_S3 (1u<<11) |
520 | 0 | #define HTS_FEATURE_GCS (1u<<12) |
521 | | |
522 | | // Compression options |
523 | 0 | #define HTS_FEATURE_LIBDEFLATE (1u<<20) |
524 | 0 | #define HTS_FEATURE_LZMA (1u<<21) |
525 | 0 | #define HTS_FEATURE_BZIP2 (1u<<22) |
526 | 0 | #define HTS_FEATURE_HTSCODECS (1u<<23) // htscodecs library version |
527 | | |
528 | | // Build params |
529 | 0 | #define HTS_FEATURE_CC (1u<<27) |
530 | 0 | #define HTS_FEATURE_CFLAGS (1u<<28) |
531 | 0 | #define HTS_FEATURE_CPPFLAGS (1u<<29) |
532 | 0 | #define HTS_FEATURE_LDFLAGS (1u<<30) |
533 | | |
534 | | |
535 | | /*! |
536 | | @abstract Determine format by peeking at the start of a file |
537 | | @param fp File opened for reading, positioned at the beginning |
538 | | @param fmt Format structure that will be filled out on return |
539 | | @return 0 for success, or negative if an error occurred. |
540 | | |
541 | | Equivalent to hts_detect_format2(fp, NULL, fmt). |
542 | | */ |
543 | | HTSLIB_EXPORT |
544 | | int hts_detect_format(struct hFILE *fp, htsFormat *fmt); |
545 | | |
546 | | /*! |
547 | | @abstract Determine format primarily by peeking at the start of a file |
548 | | @param fp File opened for reading, positioned at the beginning |
549 | | @param fname Name of the file, or NULL if not available |
550 | | @param fmt Format structure that will be filled out on return |
551 | | @return 0 for success, or negative if an error occurred. |
552 | | @since 1.15 |
553 | | |
554 | | Some formats are only recognised if the filename is available and has the |
555 | | expected extension, as otherwise more generic files may be misrecognised. |
556 | | In particular: |
557 | | - FASTA/Q indexes must have .fai/.fqi extensions; without this requirement, |
558 | | some similar BED files would be misrecognised as indexes. |
559 | | */ |
560 | | HTSLIB_EXPORT |
561 | | int hts_detect_format2(struct hFILE *fp, const char *fname, htsFormat *fmt); |
562 | | |
563 | | /*! |
564 | | @abstract Get a human-readable description of the file format |
565 | | @param fmt Format structure holding type, version, compression, etc. |
566 | | @return Description string, to be freed by the caller after use. |
567 | | */ |
568 | | HTSLIB_EXPORT |
569 | | char *hts_format_description(const htsFormat *format); |
570 | | |
571 | | /*! |
572 | | @abstract Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF) |
573 | | or possibly-compressed textual line-orientated file |
574 | | @param fn The file name or "-" for stdin/stdout. For indexed files |
575 | | with a non-standard naming, the file name can include the |
576 | | name of the index file delimited with HTS_IDX_DELIM |
577 | | @param mode Mode matching / [rwa][bcefFguxz0-9]* / |
578 | | @discussion |
579 | | With 'r' opens for reading; any further format mode letters are ignored |
580 | | as the format is detected by checking the first few bytes or BGZF blocks |
581 | | of the file. With 'w' or 'a' opens for writing or appending, with format |
582 | | specifier letters: |
583 | | b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc) |
584 | | c CRAM format |
585 | | f FASTQ format |
586 | | F FASTA format |
587 | | g gzip compressed |
588 | | u uncompressed |
589 | | z bgzf compressed |
590 | | [0-9] zlib compression level |
591 | | and with non-format option letters (for any of 'r'/'w'/'a'): |
592 | | e close the file on exec(2) (opens with O_CLOEXEC, where supported) |
593 | | x create the file exclusively (opens with O_EXCL, where supported) |
594 | | Note that there is a distinction between 'u' and '0': the first yields |
595 | | plain uncompressed output whereas the latter outputs uncompressed data |
596 | | wrapped in the zlib format. |
597 | | @example |
598 | | [rw]b .. compressed BCF, BAM, FAI |
599 | | [rw]bu .. uncompressed BCF |
600 | | [rw]z .. compressed VCF |
601 | | [rw] .. uncompressed VCF |
602 | | */ |
603 | | HTSLIB_EXPORT |
604 | | htsFile *hts_open(const char *fn, const char *mode); |
605 | | |
606 | | /*! |
607 | | @abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file |
608 | | @param fn The file name or "-" for stdin/stdout |
609 | | @param mode Open mode, as per hts_open() |
610 | | @param fmt Optional format specific parameters |
611 | | @discussion |
612 | | See hts_open() for description of fn and mode. |
613 | | // TODO Update documentation for s/opts/fmt/ |
614 | | Opts contains a format string (sam, bam, cram, vcf, bcf) which will, |
615 | | if defined, override mode. Opts also contains a linked list of hts_opt |
616 | | structures to apply to the open file handle. These can contain things |
617 | | like pointers to the reference or information on compression levels, |
618 | | block sizes, etc. |
619 | | */ |
620 | | HTSLIB_EXPORT |
621 | | htsFile *hts_open_format(const char *fn, const char *mode, const htsFormat *fmt); |
622 | | |
623 | | /*! |
624 | | @abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file |
625 | | @param fn The already-open file handle |
626 | | @param mode Open mode, as per hts_open() |
627 | | */ |
628 | | HTSLIB_EXPORT |
629 | | htsFile *hts_hopen(struct hFILE *fp, const char *fn, const char *mode); |
630 | | |
631 | | /*! |
632 | | @abstract For output streams, flush any buffered data |
633 | | @param fp The file handle to be flushed |
634 | | @return 0 for success, or negative if an error occurred. |
635 | | @since 1.14 |
636 | | */ |
637 | | HTSLIB_EXPORT |
638 | | int hts_flush(htsFile *fp); |
639 | | |
640 | | /*! |
641 | | @abstract Close a file handle, flushing buffered data for output streams |
642 | | @param fp The file handle to be closed |
643 | | @return 0 for success, or negative if an error occurred. |
644 | | */ |
645 | | HTSLIB_EXPORT |
646 | | int hts_close(htsFile *fp); |
647 | | |
648 | | /*! |
649 | | @abstract Returns the file's format information |
650 | | @param fp The file handle |
651 | | @return Read-only pointer to the file's htsFormat. |
652 | | */ |
653 | | HTSLIB_EXPORT |
654 | | const htsFormat *hts_get_format(htsFile *fp); |
655 | | |
656 | | /*! |
657 | | @ abstract Returns a string containing the file format extension. |
658 | | @ param format Format structure containing the file type. |
659 | | @ return A string ("sam", "bam", etc) or "?" for unknown formats. |
660 | | */ |
661 | | HTSLIB_EXPORT |
662 | | const char *hts_format_file_extension(const htsFormat *format); |
663 | | |
664 | | /*! |
665 | | @abstract Sets a specified CRAM option on the open file handle. |
666 | | @param fp The file handle open the open file. |
667 | | @param opt The CRAM_OPT_* option. |
668 | | @param ... Optional arguments, dependent on the option used. |
669 | | @return 0 for success, or negative if an error occurred. |
670 | | */ |
671 | | HTSLIB_EXPORT |
672 | | int hts_set_opt(htsFile *fp, enum hts_fmt_option opt, ...); |
673 | | |
674 | | /*! |
675 | | @abstract Read a line (and its \n or \r\n terminator) from a file |
676 | | @param fp The file handle |
677 | | @param delimiter Unused, but must be '\n' (or KS_SEP_LINE) |
678 | | @param str The line (not including the terminator) is written here |
679 | | @return Length of the string read (capped at INT_MAX); |
680 | | -1 on end-of-file; <= -2 on error |
681 | | */ |
682 | | HTSLIB_EXPORT |
683 | | int hts_getline(htsFile *fp, int delimiter, kstring_t *str); |
684 | | |
685 | | HTSLIB_EXPORT |
686 | | char **hts_readlines(const char *fn, int *_n); |
687 | | /*! |
688 | | @abstract Parse comma-separated list or read list from a file |
689 | | @param list File name or comma-separated list |
690 | | @param is_file |
691 | | @param _n Size of the output array (number of items read) |
692 | | @return NULL on failure or pointer to newly allocated array of |
693 | | strings |
694 | | */ |
695 | | HTSLIB_EXPORT |
696 | | char **hts_readlist(const char *fn, int is_file, int *_n); |
697 | | |
698 | | /*! |
699 | | @abstract Create extra threads to aid compress/decompression for this file |
700 | | @param fp The file handle |
701 | | @param n The number of worker threads to create |
702 | | @return 0 for success, or negative if an error occurred. |
703 | | @notes This function creates non-shared threads for use solely by fp. |
704 | | The hts_set_thread_pool function is the recommended alternative. |
705 | | */ |
706 | | HTSLIB_EXPORT |
707 | | int hts_set_threads(htsFile *fp, int n); |
708 | | |
709 | | /*! |
710 | | @abstract Create extra threads to aid compress/decompression for this file |
711 | | @param fp The file handle |
712 | | @param p A pool of worker threads, previously allocated by hts_create_threads(). |
713 | | @return 0 for success, or negative if an error occurred. |
714 | | */ |
715 | | HTSLIB_EXPORT |
716 | | int hts_set_thread_pool(htsFile *fp, htsThreadPool *p); |
717 | | |
718 | | /*! |
719 | | @abstract Adds a cache of decompressed blocks, potentially speeding up seeks. |
720 | | This may not work for all file types (currently it is bgzf only). |
721 | | @param fp The file handle |
722 | | @param n The size of cache, in bytes |
723 | | */ |
724 | | HTSLIB_EXPORT |
725 | | void hts_set_cache_size(htsFile *fp, int n); |
726 | | |
727 | | /*! |
728 | | @abstract Set .fai filename for a file opened for reading |
729 | | @return 0 for success, negative on failure |
730 | | @discussion |
731 | | Called before *_hdr_read(), this provides the name of a .fai file |
732 | | used to provide a reference list if the htsFile contains no @SQ headers. |
733 | | */ |
734 | | HTSLIB_EXPORT |
735 | | int hts_set_fai_filename(htsFile *fp, const char *fn_aux); |
736 | | |
737 | | |
738 | | /*! |
739 | | @abstract Sets a filter expression |
740 | | @return 0 for success, negative on failure |
741 | | @discussion |
742 | | To clear an existing filter, specifying expr as NULL. |
743 | | */ |
744 | | HTSLIB_EXPORT |
745 | | int hts_set_filter_expression(htsFile *fp, const char *expr); |
746 | | |
747 | | /*! |
748 | | @abstract Determine whether a given htsFile contains a valid EOF block |
749 | | @return 3 for a non-EOF checkable filetype; |
750 | | 2 for an unseekable file type where EOF cannot be checked; |
751 | | 1 for a valid EOF block; |
752 | | 0 for if the EOF marker is absent when it should be present; |
753 | | -1 (with errno set) on failure |
754 | | @discussion |
755 | | Check if the BGZF end-of-file (EOF) marker is present |
756 | | */ |
757 | | HTSLIB_EXPORT |
758 | | int hts_check_EOF(htsFile *fp); |
759 | | |
760 | | /************ |
761 | | * Indexing * |
762 | | ************/ |
763 | | |
764 | | /*! |
765 | | These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc, |
766 | | producing iterators operating as follows: |
767 | | - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file |
768 | | - HTS_IDX_START iterates over the entire file |
769 | | - HTS_IDX_REST iterates from the current position to the end of the file |
770 | | - HTS_IDX_NONE always returns "no more alignment records" |
771 | | When one of these special tid values is used, beg and end are ignored. |
772 | | When REST or NONE is used, idx is also ignored and may be NULL. |
773 | | */ |
774 | 0 | #define HTS_IDX_NOCOOR (-2) |
775 | 0 | #define HTS_IDX_START (-3) |
776 | 0 | #define HTS_IDX_REST (-4) |
777 | 0 | #define HTS_IDX_NONE (-5) |
778 | | |
779 | 0 | #define HTS_FMT_CSI 0 |
780 | 674 | #define HTS_FMT_BAI 1 |
781 | 1.34k | #define HTS_FMT_TBI 2 |
782 | 0 | #define HTS_FMT_CRAI 3 |
783 | 0 | #define HTS_FMT_FAI 4 |
784 | | |
785 | | // Almost INT64_MAX, but when cast into a 32-bit int it's |
786 | | // also INT_MAX instead of -1. This avoids bugs with old code |
787 | | // using the new hts_pos_t data type. |
788 | 11.7k | #define HTS_POS_MAX ((((int64_t)INT_MAX)<<32)|INT_MAX) |
789 | | #define HTS_POS_MIN INT64_MIN |
790 | 1.02k | #define PRIhts_pos PRId64 |
791 | | typedef int64_t hts_pos_t; |
792 | | |
793 | | // For comparison with previous release: |
794 | | // |
795 | | // #define HTS_POS_MAX INT_MAX |
796 | | // #define HTS_POS_MIN INT_MIN |
797 | | // #define PRIhts_pos PRId32 |
798 | | // typedef int32_t hts_pos_t; |
799 | | |
800 | | typedef struct hts_pair_pos_t { |
801 | | hts_pos_t beg, end; |
802 | | } hts_pair_pos_t; |
803 | | |
804 | | typedef hts_pair_pos_t hts_pair32_t; // For backwards compatibility |
805 | | |
806 | | typedef struct hts_pair64_t { |
807 | | uint64_t u, v; |
808 | | } hts_pair64_t; |
809 | | |
810 | | typedef struct hts_pair64_max_t { |
811 | | uint64_t u, v; |
812 | | uint64_t max; |
813 | | } hts_pair64_max_t; |
814 | | |
815 | | typedef struct hts_reglist_t { |
816 | | const char *reg; |
817 | | hts_pair_pos_t *intervals; |
818 | | int tid; |
819 | | uint32_t count; |
820 | | hts_pos_t min_beg, max_end; |
821 | | } hts_reglist_t; |
822 | | |
823 | | typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end); |
824 | | typedef int hts_seek_func(void *fp, int64_t offset, int where); |
825 | | typedef int64_t hts_tell_func(void *fp); |
826 | | |
827 | | /** |
828 | | * @brief File iterator that can handle multiple target regions. |
829 | | * This structure should be considered opaque by end users. |
830 | | * It does both the stepping inside the file and the filtering of alignments. |
831 | | * It can operate in single or multi-region mode, and depending on this, |
832 | | * it uses different fields. |
833 | | * |
834 | | * read_rest (1) - read everything from the current offset, without filtering |
835 | | * finished (1) - no more iterations |
836 | | * is_cram (1) - current file has CRAM format |
837 | | * nocoor (1) - read all unmapped reads |
838 | | * |
839 | | * multi (1) - multi-region moode |
840 | | * reg_list - List of target regions |
841 | | * n_reg - Size of the above list |
842 | | * curr_reg - List index of the current region of search |
843 | | * curr_intv - Interval index inside the current region; points to a (beg, end) |
844 | | * end - Used for CRAM files, to preserve the max end coordinate |
845 | | * |
846 | | * multi (0) - single-region mode |
847 | | * tid - Reference id of the target region |
848 | | * beg - Start position of the target region |
849 | | * end - End position of the target region |
850 | | * |
851 | | * Common fields: |
852 | | * off - List of file offsets computed from the index |
853 | | * n_off - Size of the above list |
854 | | * i - List index of the current file offset |
855 | | * curr_off - File offset for the next file read |
856 | | * curr_tid - Reference id of the current alignment |
857 | | * curr_beg - Start position of the current alignment |
858 | | * curr_end - End position of the current alignment |
859 | | * nocoor_off - File offset where the unmapped reads start |
860 | | * |
861 | | * readrec - File specific function that reads an alignment |
862 | | * seek - File specific function for changing the file offset |
863 | | * tell - File specific function for indicating the file offset |
864 | | */ |
865 | | |
866 | | typedef struct hts_itr_t { |
867 | | uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, multi:1, dummy:27; |
868 | | int tid, n_off, i, n_reg; |
869 | | hts_pos_t beg, end; |
870 | | hts_reglist_t *reg_list; |
871 | | int curr_tid, curr_reg, curr_intv; |
872 | | hts_pos_t curr_beg, curr_end; |
873 | | uint64_t curr_off, nocoor_off; |
874 | | hts_pair64_max_t *off; |
875 | | hts_readrec_func *readrec; |
876 | | hts_seek_func *seek; |
877 | | hts_tell_func *tell; |
878 | | struct { |
879 | | int n, m; |
880 | | int *a; |
881 | | } bins; |
882 | | } hts_itr_t; |
883 | | |
884 | | typedef hts_itr_t hts_itr_multi_t; |
885 | | |
886 | | /// Compute the first bin on a given level |
887 | 0 | #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7) |
888 | | /// Compute the parent bin of a given bin |
889 | 0 | #define hts_bin_parent(b) (((b) - 1) >> 3) |
890 | | |
891 | | /////////////////////////////////////////////////////////// |
892 | | // Low-level API for building indexes. |
893 | | |
894 | | /// Create a BAI/CSI/TBI type index structure |
895 | | /** @param n Initial number of targets |
896 | | @param fmt Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI |
897 | | @param offset0 Initial file offset |
898 | | @param min_shift Number of bits for the minimal interval |
899 | | @param n_lvls Number of levels in the binning index |
900 | | @return An initialised hts_idx_t struct on success; NULL on failure |
901 | | |
902 | | The struct returned by a successful call should be freed via hts_idx_destroy() |
903 | | when it is no longer needed. |
904 | | */ |
905 | | HTSLIB_EXPORT |
906 | | hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); |
907 | | |
908 | | /// Free a BAI/CSI/TBI type index |
909 | | /** @param idx Index structure to free |
910 | | */ |
911 | | HTSLIB_EXPORT |
912 | | void hts_idx_destroy(hts_idx_t *idx); |
913 | | |
914 | | /// Push an index entry |
915 | | /** @param idx Index |
916 | | @param tid Target id |
917 | | @param beg Range start (zero-based) |
918 | | @param end Range end (zero-based, half-open) |
919 | | @param offset File offset |
920 | | @param is_mapped Range corresponds to a mapped read |
921 | | @return 0 on success; -1 on failure |
922 | | |
923 | | The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts |
924 | | stored in the meta-data bin. |
925 | | */ |
926 | | HTSLIB_EXPORT |
927 | | int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); |
928 | | |
929 | | /// Finish building an index |
930 | | /** @param idx Index |
931 | | @param final_offset Last file offset |
932 | | @return 0 on success; non-zero on failure. |
933 | | */ |
934 | | HTSLIB_EXPORT |
935 | | int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); |
936 | | |
937 | | /// Returns index format |
938 | | /** @param idx Index |
939 | | @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI |
940 | | */ |
941 | | HTSLIB_EXPORT |
942 | | int hts_idx_fmt(hts_idx_t *idx); |
943 | | |
944 | | /// Add name to TBI index meta-data |
945 | | /** @param idx Index |
946 | | @param tid Target identifier |
947 | | @param name Target name |
948 | | @return Index number of name in names list on success; -1 on failure. |
949 | | */ |
950 | | HTSLIB_EXPORT |
951 | | int hts_idx_tbi_name(hts_idx_t *idx, int tid, const char *name); |
952 | | |
953 | | // Index loading and saving |
954 | | |
955 | | /// Save an index to a file |
956 | | /** @param idx Index to be written |
957 | | @param fn Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added |
958 | | @param fmt One of the HTS_FMT_* index formats |
959 | | @return 0 if successful, or negative if an error occurred. |
960 | | */ |
961 | | HTSLIB_EXPORT |
962 | | int hts_idx_save(const hts_idx_t *idx, const char *fn, int fmt) HTS_RESULT_USED; |
963 | | |
964 | | /// Save an index to a specific file |
965 | | /** @param idx Index to be written |
966 | | @param fn Input BAM/BCF/etc filename |
967 | | @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn |
968 | | @param fmt One of the HTS_FMT_* index formats |
969 | | @return 0 if successful, or negative if an error occurred. |
970 | | */ |
971 | | HTSLIB_EXPORT |
972 | | int hts_idx_save_as(const hts_idx_t *idx, const char *fn, const char *fnidx, int fmt) HTS_RESULT_USED; |
973 | | |
974 | | /// Load an index file |
975 | | /** @param fn BAM/BCF/etc filename, to which .bai/.csi/etc will be added or |
976 | | the extension substituted, to search for an existing index file. |
977 | | In case of a non-standard naming, the file name can include the |
978 | | name of the index file delimited with HTS_IDX_DELIM. |
979 | | @param fmt One of the HTS_FMT_* index formats |
980 | | @return The index, or NULL if an error occurred. |
981 | | |
982 | | If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before |
983 | | the delimiter will be used as the name of the data file and the part after |
984 | | it will be used as the name of the index. |
985 | | |
986 | | Otherwise, this function tries to work out the index name as follows: |
987 | | |
988 | | It will try appending ".csi" to @p fn |
989 | | It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi" |
990 | | Then, if @p fmt is HTS_FMT_BAI: |
991 | | It will try appending ".bai" to @p fn |
992 | | To will substituting the existing suffix (e.g. .bam) with ".bai" |
993 | | else if @p fmt is HTS_FMT_TBI: |
994 | | It will try appending ".tbi" to @p fn |
995 | | To will substituting the existing suffix (e.g. .vcf) with ".tbi" |
996 | | |
997 | | If the index file is remote (served over a protocol like https), first a check |
998 | | is made to see is a locally cached copy is available. This is done for all |
999 | | of the possible names listed above. If a cached copy is not available then |
1000 | | the index will be downloaded and stored in the current working directory, |
1001 | | with the same name as the remote index. |
1002 | | |
1003 | | Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE); |
1004 | | */ |
1005 | | HTSLIB_EXPORT |
1006 | | hts_idx_t *hts_idx_load(const char *fn, int fmt); |
1007 | | |
1008 | | /// Load a specific index file |
1009 | | /** @param fn Input BAM/BCF/etc filename |
1010 | | @param fnidx The input index filename |
1011 | | @return The index, or NULL if an error occurred. |
1012 | | |
1013 | | Equivalent to hts_idx_load3(fn, fnidx, 0, 0); |
1014 | | |
1015 | | This function will not attempt to save index files locally. |
1016 | | */ |
1017 | | HTSLIB_EXPORT |
1018 | | hts_idx_t *hts_idx_load2(const char *fn, const char *fnidx); |
1019 | | |
1020 | | /// Load a specific index file |
1021 | | /** @param fn Input BAM/BCF/etc filename |
1022 | | @param fnidx The input index filename |
1023 | | @param fmt One of the HTS_FMT_* index formats |
1024 | | @param flags Flags to alter behaviour (see description) |
1025 | | @return The index, or NULL if an error occurred. |
1026 | | |
1027 | | If @p fnidx is NULL, the index name will be derived from @p fn in the |
1028 | | same way as hts_idx_load(). |
1029 | | |
1030 | | If @p fnidx is not NULL, @p fmt is ignored. |
1031 | | |
1032 | | The @p flags parameter can be set to a combination of the following |
1033 | | values: |
1034 | | |
1035 | | HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes |
1036 | | HTS_IDX_SILENT_FAIL Fail silently if the index is not present |
1037 | | |
1038 | | The index struct returned by a successful call should be freed |
1039 | | via hts_idx_destroy() when it is no longer needed. |
1040 | | */ |
1041 | | HTSLIB_EXPORT |
1042 | | hts_idx_t *hts_idx_load3(const char *fn, const char *fnidx, int fmt, int flags); |
1043 | | |
1044 | | /// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() ) |
1045 | 0 | #define HTS_IDX_SAVE_REMOTE 1 |
1046 | 1.34k | #define HTS_IDX_SILENT_FAIL 2 |
1047 | | |
1048 | | /////////////////////////////////////////////////////////// |
1049 | | // Functions for accessing meta-data stored in indexes |
1050 | | |
1051 | | typedef const char *(*hts_id2name_f)(void*, int); |
1052 | | |
1053 | | /// Get extra index meta-data |
1054 | | /** @param idx The index |
1055 | | @param l_meta Pointer to where the length of the extra data is stored |
1056 | | @return Pointer to the extra data if present; NULL otherwise |
1057 | | |
1058 | | Indexes (both .tbi and .csi) made by tabix include extra data about |
1059 | | the indexed file. The returns a pointer to this data. Note that the |
1060 | | data is stored exactly as it is in the index. Callers need to interpret |
1061 | | the results themselves, including knowing what sort of data to expect; |
1062 | | byte swapping etc. |
1063 | | */ |
1064 | | HTSLIB_EXPORT |
1065 | | uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta); |
1066 | | |
1067 | | /// Set extra index meta-data |
1068 | | /** @param idx The index |
1069 | | @param l_meta Length of data |
1070 | | @param meta Pointer to the extra data |
1071 | | @param is_copy If not zero, a copy of the data is taken |
1072 | | @return 0 on success; -1 on failure (out of memory). |
1073 | | |
1074 | | Sets the data that is returned by hts_idx_get_meta(). |
1075 | | |
1076 | | If is_copy != 0, a copy of the input data is taken. If not, ownership of |
1077 | | the data pointed to by *meta passes to the index. |
1078 | | */ |
1079 | | HTSLIB_EXPORT |
1080 | | int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy); |
1081 | | |
1082 | | /// Get number of mapped and unmapped reads from an index |
1083 | | /** @param idx Index |
1084 | | @param tid Target ID |
1085 | | @param[out] mapped Location to store number of mapped reads |
1086 | | @param[out] unmapped Location to store number of unmapped reads |
1087 | | @return 0 on success; -1 on failure (data not available) |
1088 | | |
1089 | | BAI and CSI indexes store information on the number of reads for each |
1090 | | target that were mapped or unmapped (unmapped reads will generally have |
1091 | | a paired read that is mapped to the target). This function returns this |
1092 | | information if it is available. |
1093 | | |
1094 | | @note Cram CRAI indexes do not include this information. |
1095 | | */ |
1096 | | HTSLIB_EXPORT |
1097 | | int hts_idx_get_stat(const hts_idx_t* idx, int tid, uint64_t* mapped, uint64_t* unmapped); |
1098 | | |
1099 | | /// Return the number of unplaced reads from an index |
1100 | | /** @param idx Index |
1101 | | @return Unplaced reads count |
1102 | | |
1103 | | Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM |
1104 | | files). |
1105 | | */ |
1106 | | HTSLIB_EXPORT |
1107 | | uint64_t hts_idx_get_n_no_coor(const hts_idx_t* idx); |
1108 | | |
1109 | | /// Return a list of target names from an index |
1110 | | /** @param idx Index |
1111 | | @param[out] n Location to store the number of targets |
1112 | | @param getid Callback function to get the name for a target ID |
1113 | | @param hdr Header from indexed file |
1114 | | @return An array of pointers to the names on success; NULL on failure |
1115 | | |
1116 | | @note The names are pointers into the header data structure. When cleaning |
1117 | | up, only the array should be freed, not the names. |
1118 | | */ |
1119 | | HTSLIB_EXPORT |
1120 | | const char **hts_idx_seqnames(const hts_idx_t *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values |
1121 | | |
1122 | | /// Return the number of targets from an index |
1123 | | /** @param idx Index |
1124 | | @return The number of targets |
1125 | | */ |
1126 | | HTSLIB_EXPORT |
1127 | | int hts_idx_nseq(const hts_idx_t *idx); |
1128 | | |
1129 | | /////////////////////////////////////////////////////////// |
1130 | | // Region parsing |
1131 | | |
1132 | 0 | #define HTS_PARSE_THOUSANDS_SEP 1 ///< Ignore ',' separators within numbers |
1133 | 0 | #define HTS_PARSE_ONE_COORD 2 ///< chr:pos means chr:pos-pos and not chr:pos-end |
1134 | 0 | #define HTS_PARSE_LIST 4 ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP) |
1135 | | |
1136 | | /// Parse a numeric string |
1137 | | /** The number may be expressed in scientific notation, and optionally may |
1138 | | contain commas in the integer part (before any decimal point or E notation). |
1139 | | @param str String to be parsed |
1140 | | @param strend If non-NULL, set on return to point to the first character |
1141 | | in @a str after those forming the parsed number |
1142 | | @param flags Or'ed-together combination of HTS_PARSE_* flags |
1143 | | @return Integer value of the parsed number, or 0 if no valid number |
1144 | | |
1145 | | The input string is parsed as: optional whitespace; an optional '+' or |
1146 | | '-' sign; decimal digits possibly including ',' characters (if @a flags |
1147 | | includes HTS_PARSE_THOUSANDS_SEP) and a '.' decimal point; and an optional |
1148 | | case-insensitive suffix, which may be either 'k', 'M', 'G', or scientific |
1149 | | notation consisting of 'e'/'E' followed by an optional '+' or '-' sign and |
1150 | | decimal digits. To be considered a valid numeric value, the main part (not |
1151 | | including any suffix or scientific notation) must contain at least one |
1152 | | digit (either before or after the decimal point). |
1153 | | |
1154 | | When @a strend is NULL, @a str is expected to contain only (optional |
1155 | | whitespace followed by) the numeric value. A warning will be printed |
1156 | | (if hts_verbose is HTS_LOG_WARNING or more) if no valid parsable number |
1157 | | is found or if there are any unused characters after the number. |
1158 | | |
1159 | | When @a strend is non-NULL, @a str starts with (optional whitespace |
1160 | | followed by) the numeric value. On return, @a strend is set to point |
1161 | | to the first unused character after the numeric value, or to @a str |
1162 | | if no valid parsable number is found. |
1163 | | */ |
1164 | | HTSLIB_EXPORT |
1165 | | long long hts_parse_decimal(const char *str, char **strend, int flags); |
1166 | | |
1167 | | typedef int (*hts_name2id_f)(void*, const char*); |
1168 | | |
1169 | | /// Parse a "CHR:START-END"-style region string |
1170 | | /** @param str String to be parsed |
1171 | | @param beg Set on return to the 0-based start of the region |
1172 | | @param end Set on return to the 1-based end of the region |
1173 | | @return Pointer to the colon or '\0' after the reference sequence name, |
1174 | | or NULL if @a str could not be parsed. |
1175 | | |
1176 | | NOTE: For compatibility with hts_parse_reg only. |
1177 | | Please use hts_parse_region instead. |
1178 | | */ |
1179 | | HTSLIB_EXPORT |
1180 | | const char *hts_parse_reg64(const char *str, hts_pos_t *beg, hts_pos_t *end); |
1181 | | |
1182 | | /// Parse a "CHR:START-END"-style region string |
1183 | | /** @param str String to be parsed |
1184 | | @param beg Set on return to the 0-based start of the region |
1185 | | @param end Set on return to the 1-based end of the region |
1186 | | @return Pointer to the colon or '\0' after the reference sequence name, |
1187 | | or NULL if @a str could not be parsed. |
1188 | | */ |
1189 | | HTSLIB_EXPORT |
1190 | | const char *hts_parse_reg(const char *str, int *beg, int *end); |
1191 | | |
1192 | | /// Parse a "CHR:START-END"-style region string |
1193 | | /** @param str String to be parsed |
1194 | | @param tid Set on return (if not NULL) to be reference index (-1 if invalid) |
1195 | | @param beg Set on return to the 0-based start of the region |
1196 | | @param end Set on return to the 1-based end of the region |
1197 | | @param getid Function pointer. Called if not NULL to set tid. |
1198 | | @param hdr Caller data passed to getid. |
1199 | | @param flags Bitwise HTS_PARSE_* flags listed above. |
1200 | | @return Pointer to the byte after the end of the entire region |
1201 | | specifier (including any trailing comma) on success, |
1202 | | or NULL if @a str could not be parsed. |
1203 | | |
1204 | | A variant of hts_parse_reg which is reference-id aware. It uses |
1205 | | the iterator name2id callbacks to validate the region tokenisation works. |
1206 | | |
1207 | | This is necessary due to GRCh38 HLA additions which have reference names |
1208 | | like "HLA-DRB1*12:17". |
1209 | | |
1210 | | To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" |
1211 | | are reference names, quote using curly braces. |
1212 | | Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. |
1213 | | |
1214 | | Flags are used to control how parsing works, and can be one of the below. |
1215 | | |
1216 | | HTS_PARSE_THOUSANDS_SEP: |
1217 | | Ignore commas in numbers. For example with this flag 1,234,567 |
1218 | | is interpreted as 1234567. |
1219 | | |
1220 | | HTS_PARSE_LIST: |
1221 | | If present, the region is assmed to be a comma separated list and |
1222 | | position parsing will not contain commas (this implicitly |
1223 | | clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). |
1224 | | On success the return pointer will be the start of the next region, ie |
1225 | | the character after the comma. (If *ret != '\0' then the caller can |
1226 | | assume another region is present in the list.) |
1227 | | |
1228 | | If not set then positions may contain commas. In this case the return |
1229 | | value should point to the end of the string, or NULL on failure. |
1230 | | |
1231 | | HTS_PARSE_ONE_COORD: |
1232 | | If present, X:100 is treated as the single base pair region X:100-100. |
1233 | | In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-<end>. |
1234 | | (This is the standard bcftools region convention.) |
1235 | | |
1236 | | When not set X:100 is considered to be X:100-<end> where <end> is |
1237 | | the end of chromosome X (set to INT_MAX here). X:100- and X:-100 are |
1238 | | invalid. |
1239 | | (This is the standard samtools region convention.) |
1240 | | |
1241 | | Note the supplied string expects 1 based inclusive coordinates, but the |
1242 | | returned coordinates start from 0 and are half open, so pos0 is valid |
1243 | | for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" |
1244 | | |
1245 | | If NULL is returned, the value in tid mat give additional information |
1246 | | about the error: |
1247 | | |
1248 | | -2 Failed to parse @p hdr; or out of memory |
1249 | | -1 The reference in @p str has mismatched braces, or does not |
1250 | | exist in @p hdr |
1251 | | >= 0 The specified range in @p str could not be parsed |
1252 | | */ |
1253 | | HTSLIB_EXPORT |
1254 | | const char *hts_parse_region(const char *s, int *tid, hts_pos_t *beg, |
1255 | | hts_pos_t *end, hts_name2id_f getid, void *hdr, |
1256 | | int flags); |
1257 | | |
1258 | | |
1259 | | /////////////////////////////////////////////////////////// |
1260 | | // Generic iterators |
1261 | | // |
1262 | | // These functions provide the low-level infrastructure for iterators. |
1263 | | // Wrappers around these are used to make iterators for specific file types. |
1264 | | // See: |
1265 | | // htslib/sam.h for SAM/BAM/CRAM iterators |
1266 | | // htslib/vcf.h for VCF/BCF iterators |
1267 | | // htslib/tbx.h for files indexed by tabix |
1268 | | |
1269 | | /// Create a single-region iterator |
1270 | | /** @param idx Index |
1271 | | @param tid Target ID |
1272 | | @param beg Start of region |
1273 | | @param end End of region |
1274 | | @param readrec Callback to read a record from the input file |
1275 | | @return An iterator on success; NULL on failure |
1276 | | |
1277 | | The iterator struct returned by a successful call should be freed |
1278 | | via hts_itr_destroy() when it is no longer needed. |
1279 | | */ |
1280 | | HTSLIB_EXPORT |
1281 | | hts_itr_t *hts_itr_query(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); |
1282 | | |
1283 | | /// Free an iterator |
1284 | | /** @param iter Iterator to free |
1285 | | */ |
1286 | | HTSLIB_EXPORT |
1287 | | void hts_itr_destroy(hts_itr_t *iter); |
1288 | | |
1289 | | typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func *readrec); |
1290 | | |
1291 | | /// Create a single-region iterator from a text region specification |
1292 | | /** @param idx Index |
1293 | | @param reg Region specifier |
1294 | | @param getid Callback function to return the target ID for a name |
1295 | | @param hdr Input file header |
1296 | | @param itr_query Callback function returning an iterator for a numeric tid, |
1297 | | start and end position |
1298 | | @param readrec Callback to read a record from the input file |
1299 | | @return An iterator on success; NULL on error |
1300 | | |
1301 | | The iterator struct returned by a successful call should be freed |
1302 | | via hts_itr_destroy() when it is no longer needed. |
1303 | | */ |
1304 | | HTSLIB_EXPORT |
1305 | | hts_itr_t *hts_itr_querys(const hts_idx_t *idx, const char *reg, hts_name2id_f getid, void *hdr, hts_itr_query_func *itr_query, hts_readrec_func *readrec); |
1306 | | |
1307 | | /// Return the next record from an iterator |
1308 | | /** @param fp Input file handle |
1309 | | @param iter Iterator |
1310 | | @param r Pointer to record placeholder |
1311 | | @param data Data passed to the readrec callback |
1312 | | @return >= 0 on success, -1 when there is no more data, < -1 on error |
1313 | | */ |
1314 | | HTSLIB_EXPORT |
1315 | | int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data) HTS_RESULT_USED; |
1316 | | |
1317 | | /********************************** |
1318 | | * Iterator with multiple regions * |
1319 | | **********************************/ |
1320 | | |
1321 | | typedef int hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_t *itr); |
1322 | | HTSLIB_EXPORT |
1323 | | int hts_itr_multi_bam(const hts_idx_t *idx, hts_itr_t *iter); |
1324 | | HTSLIB_EXPORT |
1325 | | int hts_itr_multi_cram(const hts_idx_t *idx, hts_itr_t *iter); |
1326 | | |
1327 | | /// Create a multi-region iterator from a region list |
1328 | | /** @param idx Index |
1329 | | @param reglist Region list |
1330 | | @param count Number of items in region list |
1331 | | @param getid Callback to convert names to target IDs |
1332 | | @param hdr Indexed file header (passed to getid) |
1333 | | @param itr_specific Filetype-specific callback function |
1334 | | @param readrec Callback to read an input file record |
1335 | | @param seek Callback to seek in the input file |
1336 | | @param tell Callback to return current input file location |
1337 | | @return An iterator on success; NULL on failure |
1338 | | |
1339 | | The iterator struct returned by a successful call should be freed |
1340 | | via hts_itr_destroy() when it is no longer needed. |
1341 | | */ |
1342 | | HTSLIB_EXPORT |
1343 | | hts_itr_t *hts_itr_regions(const hts_idx_t *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, hts_itr_multi_query_func *itr_specific, hts_readrec_func *readrec, hts_seek_func *seek, hts_tell_func *tell); |
1344 | | |
1345 | | /// Return the next record from an iterator |
1346 | | /** @param fp Input file handle |
1347 | | @param iter Iterator |
1348 | | @param r Pointer to record placeholder |
1349 | | @return >= 0 on success, -1 when there is no more data, < -1 on error |
1350 | | */ |
1351 | | HTSLIB_EXPORT |
1352 | | int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r); |
1353 | | |
1354 | | /// Create a region list from a char array |
1355 | | /** @param argv Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2 |
1356 | | @param argc Number of items in the array |
1357 | | @param r_count Pointer to the number of items in the resulting region list |
1358 | | @param hdr Header for the sam/bam/cram file |
1359 | | @param getid Callback to convert target names to target ids. |
1360 | | @return A region list on success, NULL on failure |
1361 | | |
1362 | | The hts_reglist_t struct returned by a successful call should be freed |
1363 | | via hts_reglist_free() when it is no longer needed. |
1364 | | */ |
1365 | | HTSLIB_EXPORT |
1366 | | hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid); |
1367 | | |
1368 | | /// Free a region list |
1369 | | /** @param reglist Region list |
1370 | | @param count Number of items in the list |
1371 | | */ |
1372 | | HTSLIB_EXPORT |
1373 | | void hts_reglist_free(hts_reglist_t *reglist, int count); |
1374 | | |
1375 | | /// Free a multi-region iterator |
1376 | | /** @param iter Iterator to free |
1377 | | */ |
1378 | | #define hts_itr_multi_destroy(iter) hts_itr_destroy(iter) |
1379 | | |
1380 | | |
1381 | | /** |
1382 | | * hts_file_type() - Convenience function to determine file type |
1383 | | * DEPRECATED: This function has been replaced by hts_detect_format(). |
1384 | | * It and these FT_* macros will be removed in a future HTSlib release. |
1385 | | */ |
1386 | | #define FT_UNKN 0 |
1387 | 0 | #define FT_GZ 1 |
1388 | 0 | #define FT_VCF 2 |
1389 | 0 | #define FT_VCF_GZ (FT_GZ|FT_VCF) |
1390 | 0 | #define FT_BCF (1<<2) |
1391 | 0 | #define FT_BCF_GZ (FT_GZ|FT_BCF) |
1392 | 0 | #define FT_STDIN (1<<3) |
1393 | | HTSLIB_EXPORT |
1394 | | int hts_file_type(const char *fname); |
1395 | | |
1396 | | |
1397 | | /*************************** |
1398 | | * Revised MAQ error model * |
1399 | | ***************************/ |
1400 | | |
1401 | | struct errmod_t; |
1402 | | typedef struct errmod_t errmod_t; |
1403 | | |
1404 | | HTSLIB_EXPORT |
1405 | | errmod_t *errmod_init(double depcorr); |
1406 | | HTSLIB_EXPORT |
1407 | | void errmod_destroy(errmod_t *em); |
1408 | | |
1409 | | /* |
1410 | | n: number of bases |
1411 | | m: maximum base |
1412 | | bases[i]: qual:6, strand:1, base:4 |
1413 | | q[i*m+j]: phred-scaled likelihood of (i,j) |
1414 | | */ |
1415 | | HTSLIB_EXPORT |
1416 | | int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); |
1417 | | |
1418 | | |
1419 | | /***************************************************** |
1420 | | * Probabilistic banded glocal alignment * |
1421 | | * See https://doi.org/10.1093/bioinformatics/btr076 * |
1422 | | *****************************************************/ |
1423 | | |
1424 | | typedef struct probaln_par_t { |
1425 | | float d, e; |
1426 | | int bw; |
1427 | | } probaln_par_t; |
1428 | | |
1429 | | /// Perform probabilistic banded glocal alignment |
1430 | | /** @param ref Reference sequence |
1431 | | @param l_ref Length of reference |
1432 | | @param query Query sequence |
1433 | | @param l_query Length of query sequence |
1434 | | @param iqual Query base qualities |
1435 | | @param c Alignment parameters |
1436 | | @param[out] state Output alignment |
1437 | | @param[out] q Phred scaled posterior probability of state[i] being wrong |
1438 | | @return Phred-scaled likelihood score, or INT_MIN on failure. |
1439 | | |
1440 | | The reference and query sequences are coded using integers 0,1,2,3,4 for |
1441 | | bases A,C,G,T,N respectively (N here is for any ambiguity code). |
1442 | | |
1443 | | On output, state and q are arrays of length l_query. The higher 30 |
1444 | | bits give the reference position the query base is matched to and the |
1445 | | lower two bits can be 0 (an alignment match) or 1 (an |
1446 | | insertion). q[i] gives the phred scaled posterior probability of |
1447 | | state[i] being wrong. |
1448 | | |
1449 | | On failure, errno will be set to EINVAL if the values of l_ref or l_query |
1450 | | were invalid; or ENOMEM if a memory allocation failed. |
1451 | | */ |
1452 | | |
1453 | | HTSLIB_EXPORT |
1454 | | int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q); |
1455 | | |
1456 | | |
1457 | | /********************** |
1458 | | * MD5 implementation * |
1459 | | **********************/ |
1460 | | |
1461 | | struct hts_md5_context; |
1462 | | typedef struct hts_md5_context hts_md5_context; |
1463 | | |
1464 | | /*! @abstract Initialises an MD5 context. |
1465 | | * @discussion |
1466 | | * The expected use is to allocate an hts_md5_context using |
1467 | | * hts_md5_init(). This pointer is then passed into one or more calls |
1468 | | * of hts_md5_update() to compute successive internal portions of the |
1469 | | * MD5 sum, which can then be externalised as a full 16-byte MD5sum |
1470 | | * calculation by calling hts_md5_final(). This can then be turned |
1471 | | * into ASCII via hts_md5_hex(). |
1472 | | * |
1473 | | * To dealloate any resources created by hts_md5_init() call the |
1474 | | * hts_md5_destroy() function. |
1475 | | * |
1476 | | * @return hts_md5_context pointer on success, NULL otherwise. |
1477 | | */ |
1478 | | HTSLIB_EXPORT |
1479 | | hts_md5_context *hts_md5_init(void); |
1480 | | |
1481 | | /*! @abstract Updates the context with the MD5 of the data. */ |
1482 | | HTSLIB_EXPORT |
1483 | | void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size); |
1484 | | |
1485 | | /*! @abstract Computes the final 128-bit MD5 hash from the given context */ |
1486 | | HTSLIB_EXPORT |
1487 | | void hts_md5_final(unsigned char *digest, hts_md5_context *ctx); |
1488 | | |
1489 | | /*! @abstract Resets an md5_context to the initial state, as returned |
1490 | | * by hts_md5_init(). |
1491 | | */ |
1492 | | HTSLIB_EXPORT |
1493 | | void hts_md5_reset(hts_md5_context *ctx); |
1494 | | |
1495 | | /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated |
1496 | | * hex string. |
1497 | | */ |
1498 | | HTSLIB_EXPORT |
1499 | | void hts_md5_hex(char *hex, const unsigned char *digest); |
1500 | | |
1501 | | /*! @abstract Deallocates any memory allocated by hts_md5_init. */ |
1502 | | HTSLIB_EXPORT |
1503 | | void hts_md5_destroy(hts_md5_context *ctx); |
1504 | | |
1505 | | static inline int hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls) |
1506 | 18.1k | { |
1507 | 18.1k | int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; |
1508 | 18.9k | for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) |
1509 | 18.8k | if (beg>>s == end>>s) return t + (beg>>s); |
1510 | 125 | return 0; |
1511 | 18.1k | } Unexecuted instantiation: hts_open_fuzzer.c:hts_reg2bin Unexecuted instantiation: header.c:hts_reg2bin Unexecuted instantiation: hfile.c:hts_reg2bin Unexecuted instantiation: hts.c:hts_reg2bin Unexecuted instantiation: multipart.c:hts_reg2bin Unexecuted instantiation: region.c:hts_reg2bin Line | Count | Source | 1506 | 18.1k | { | 1507 | 18.1k | int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; | 1508 | 18.9k | for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) | 1509 | 18.8k | if (beg>>s == end>>s) return t + (beg>>s); | 1510 | 125 | return 0; | 1511 | 18.1k | } |
Unexecuted instantiation: textutils.c:hts_reg2bin Unexecuted instantiation: vcf.c:hts_reg2bin Unexecuted instantiation: cram_decode.c:hts_reg2bin Unexecuted instantiation: cram_encode.c:hts_reg2bin Unexecuted instantiation: cram_index.c:hts_reg2bin Unexecuted instantiation: cram_io.c:hts_reg2bin Unexecuted instantiation: cram_stats.c:hts_reg2bin Unexecuted instantiation: open_trace_file.c:hts_reg2bin Unexecuted instantiation: hfile_libcurl.c:hts_reg2bin Unexecuted instantiation: hfile_gcs.c:hts_reg2bin Unexecuted instantiation: hfile_s3.c:hts_reg2bin Unexecuted instantiation: hfile_s3_write.c:hts_reg2bin Unexecuted instantiation: bgzf.c:hts_reg2bin Unexecuted instantiation: faidx.c:hts_reg2bin Unexecuted instantiation: md5.c:hts_reg2bin Unexecuted instantiation: tbx.c:hts_reg2bin Unexecuted instantiation: cram_codecs.c:hts_reg2bin |
1512 | | |
1513 | | /// Compute the level of a bin in a binning index |
1514 | 0 | static inline int hts_bin_level(int bin) { |
1515 | 0 | int l, b; |
1516 | 0 | for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); |
1517 | 0 | return l; |
1518 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:hts_bin_level Unexecuted instantiation: header.c:hts_bin_level Unexecuted instantiation: hfile.c:hts_bin_level Unexecuted instantiation: hts.c:hts_bin_level Unexecuted instantiation: multipart.c:hts_bin_level Unexecuted instantiation: region.c:hts_bin_level Unexecuted instantiation: sam.c:hts_bin_level Unexecuted instantiation: textutils.c:hts_bin_level Unexecuted instantiation: vcf.c:hts_bin_level Unexecuted instantiation: cram_decode.c:hts_bin_level Unexecuted instantiation: cram_encode.c:hts_bin_level Unexecuted instantiation: cram_index.c:hts_bin_level Unexecuted instantiation: cram_io.c:hts_bin_level Unexecuted instantiation: cram_stats.c:hts_bin_level Unexecuted instantiation: open_trace_file.c:hts_bin_level Unexecuted instantiation: hfile_libcurl.c:hts_bin_level Unexecuted instantiation: hfile_gcs.c:hts_bin_level Unexecuted instantiation: hfile_s3.c:hts_bin_level Unexecuted instantiation: hfile_s3_write.c:hts_bin_level Unexecuted instantiation: bgzf.c:hts_bin_level Unexecuted instantiation: faidx.c:hts_bin_level Unexecuted instantiation: md5.c:hts_bin_level Unexecuted instantiation: tbx.c:hts_bin_level Unexecuted instantiation: cram_codecs.c:hts_bin_level |
1519 | | |
1520 | | //! Compute the corresponding entry into the linear index of a given bin from |
1521 | | //! a binning index |
1522 | | /*! |
1523 | | * @param bin The bin number |
1524 | | * @param n_lvls The index depth (number of levels - 0 based) |
1525 | | * @return The integer offset into the linear index |
1526 | | * |
1527 | | * Explanation of the return value formula: |
1528 | | * Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs. |
1529 | | * A linear index entry covers exp(2, min_shift) base pairs. |
1530 | | */ |
1531 | | static inline int hts_bin_bot(int bin, int n_lvls) |
1532 | 0 | { |
1533 | 0 | int l = hts_bin_level(bin); |
1534 | 0 | return (bin - hts_bin_first(l)) << (n_lvls - l) * 3; |
1535 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:hts_bin_bot Unexecuted instantiation: header.c:hts_bin_bot Unexecuted instantiation: hfile.c:hts_bin_bot Unexecuted instantiation: hts.c:hts_bin_bot Unexecuted instantiation: multipart.c:hts_bin_bot Unexecuted instantiation: region.c:hts_bin_bot Unexecuted instantiation: sam.c:hts_bin_bot Unexecuted instantiation: textutils.c:hts_bin_bot Unexecuted instantiation: vcf.c:hts_bin_bot Unexecuted instantiation: cram_decode.c:hts_bin_bot Unexecuted instantiation: cram_encode.c:hts_bin_bot Unexecuted instantiation: cram_index.c:hts_bin_bot Unexecuted instantiation: cram_io.c:hts_bin_bot Unexecuted instantiation: cram_stats.c:hts_bin_bot Unexecuted instantiation: open_trace_file.c:hts_bin_bot Unexecuted instantiation: hfile_libcurl.c:hts_bin_bot Unexecuted instantiation: hfile_gcs.c:hts_bin_bot Unexecuted instantiation: hfile_s3.c:hts_bin_bot Unexecuted instantiation: hfile_s3_write.c:hts_bin_bot Unexecuted instantiation: bgzf.c:hts_bin_bot Unexecuted instantiation: faidx.c:hts_bin_bot Unexecuted instantiation: md5.c:hts_bin_bot Unexecuted instantiation: tbx.c:hts_bin_bot Unexecuted instantiation: cram_codecs.c:hts_bin_bot |
1536 | | |
1537 | | /************** |
1538 | | * Endianness * |
1539 | | **************/ |
1540 | | |
1541 | | static inline int ed_is_big(void) |
1542 | 3.94k | { |
1543 | 3.94k | long one= 1; |
1544 | 3.94k | return !(*((char *)(&one))); |
1545 | 3.94k | } Unexecuted instantiation: hts_open_fuzzer.c:ed_is_big Unexecuted instantiation: header.c:ed_is_big Unexecuted instantiation: hfile.c:ed_is_big Line | Count | Source | 1542 | 3.56k | { | 1543 | 3.56k | long one= 1; | 1544 | 3.56k | return !(*((char *)(&one))); | 1545 | 3.56k | } |
Unexecuted instantiation: multipart.c:ed_is_big Unexecuted instantiation: region.c:ed_is_big Unexecuted instantiation: sam.c:ed_is_big Unexecuted instantiation: textutils.c:ed_is_big Unexecuted instantiation: vcf.c:ed_is_big Unexecuted instantiation: cram_decode.c:ed_is_big Unexecuted instantiation: cram_encode.c:ed_is_big Unexecuted instantiation: cram_index.c:ed_is_big Unexecuted instantiation: cram_io.c:ed_is_big Unexecuted instantiation: cram_stats.c:ed_is_big Unexecuted instantiation: open_trace_file.c:ed_is_big Unexecuted instantiation: hfile_libcurl.c:ed_is_big Unexecuted instantiation: hfile_gcs.c:ed_is_big Unexecuted instantiation: hfile_s3.c:ed_is_big Unexecuted instantiation: hfile_s3_write.c:ed_is_big Line | Count | Source | 1542 | 384 | { | 1543 | 384 | long one= 1; | 1544 | 384 | return !(*((char *)(&one))); | 1545 | 384 | } |
Unexecuted instantiation: faidx.c:ed_is_big Unexecuted instantiation: md5.c:ed_is_big Unexecuted instantiation: tbx.c:ed_is_big Unexecuted instantiation: cram_codecs.c:ed_is_big |
1546 | | static inline uint16_t ed_swap_2(uint16_t v) |
1547 | 0 | { |
1548 | 0 | return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); |
1549 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_2 Unexecuted instantiation: header.c:ed_swap_2 Unexecuted instantiation: hfile.c:ed_swap_2 Unexecuted instantiation: hts.c:ed_swap_2 Unexecuted instantiation: multipart.c:ed_swap_2 Unexecuted instantiation: region.c:ed_swap_2 Unexecuted instantiation: sam.c:ed_swap_2 Unexecuted instantiation: textutils.c:ed_swap_2 Unexecuted instantiation: vcf.c:ed_swap_2 Unexecuted instantiation: cram_decode.c:ed_swap_2 Unexecuted instantiation: cram_encode.c:ed_swap_2 Unexecuted instantiation: cram_index.c:ed_swap_2 Unexecuted instantiation: cram_io.c:ed_swap_2 Unexecuted instantiation: cram_stats.c:ed_swap_2 Unexecuted instantiation: open_trace_file.c:ed_swap_2 Unexecuted instantiation: hfile_libcurl.c:ed_swap_2 Unexecuted instantiation: hfile_gcs.c:ed_swap_2 Unexecuted instantiation: hfile_s3.c:ed_swap_2 Unexecuted instantiation: hfile_s3_write.c:ed_swap_2 Unexecuted instantiation: bgzf.c:ed_swap_2 Unexecuted instantiation: faidx.c:ed_swap_2 Unexecuted instantiation: md5.c:ed_swap_2 Unexecuted instantiation: tbx.c:ed_swap_2 Unexecuted instantiation: cram_codecs.c:ed_swap_2 |
1550 | | static inline void *ed_swap_2p(void *x) |
1551 | 0 | { |
1552 | 0 | *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); |
1553 | 0 | return x; |
1554 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_2p Unexecuted instantiation: header.c:ed_swap_2p Unexecuted instantiation: hfile.c:ed_swap_2p Unexecuted instantiation: hts.c:ed_swap_2p Unexecuted instantiation: multipart.c:ed_swap_2p Unexecuted instantiation: region.c:ed_swap_2p Unexecuted instantiation: sam.c:ed_swap_2p Unexecuted instantiation: textutils.c:ed_swap_2p Unexecuted instantiation: vcf.c:ed_swap_2p Unexecuted instantiation: cram_decode.c:ed_swap_2p Unexecuted instantiation: cram_encode.c:ed_swap_2p Unexecuted instantiation: cram_index.c:ed_swap_2p Unexecuted instantiation: cram_io.c:ed_swap_2p Unexecuted instantiation: cram_stats.c:ed_swap_2p Unexecuted instantiation: open_trace_file.c:ed_swap_2p Unexecuted instantiation: hfile_libcurl.c:ed_swap_2p Unexecuted instantiation: hfile_gcs.c:ed_swap_2p Unexecuted instantiation: hfile_s3.c:ed_swap_2p Unexecuted instantiation: hfile_s3_write.c:ed_swap_2p Unexecuted instantiation: bgzf.c:ed_swap_2p Unexecuted instantiation: faidx.c:ed_swap_2p Unexecuted instantiation: md5.c:ed_swap_2p Unexecuted instantiation: tbx.c:ed_swap_2p Unexecuted instantiation: cram_codecs.c:ed_swap_2p |
1555 | | static inline uint32_t ed_swap_4(uint32_t v) |
1556 | 0 | { |
1557 | 0 | v = ((v & 0x0000FFFFU) << 16) | (v >> 16); |
1558 | 0 | return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); |
1559 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_4 Unexecuted instantiation: header.c:ed_swap_4 Unexecuted instantiation: hfile.c:ed_swap_4 Unexecuted instantiation: hts.c:ed_swap_4 Unexecuted instantiation: multipart.c:ed_swap_4 Unexecuted instantiation: region.c:ed_swap_4 Unexecuted instantiation: sam.c:ed_swap_4 Unexecuted instantiation: textutils.c:ed_swap_4 Unexecuted instantiation: vcf.c:ed_swap_4 Unexecuted instantiation: cram_decode.c:ed_swap_4 Unexecuted instantiation: cram_encode.c:ed_swap_4 Unexecuted instantiation: cram_index.c:ed_swap_4 Unexecuted instantiation: cram_io.c:ed_swap_4 Unexecuted instantiation: cram_stats.c:ed_swap_4 Unexecuted instantiation: open_trace_file.c:ed_swap_4 Unexecuted instantiation: hfile_libcurl.c:ed_swap_4 Unexecuted instantiation: hfile_gcs.c:ed_swap_4 Unexecuted instantiation: hfile_s3.c:ed_swap_4 Unexecuted instantiation: hfile_s3_write.c:ed_swap_4 Unexecuted instantiation: bgzf.c:ed_swap_4 Unexecuted instantiation: faidx.c:ed_swap_4 Unexecuted instantiation: md5.c:ed_swap_4 Unexecuted instantiation: tbx.c:ed_swap_4 Unexecuted instantiation: cram_codecs.c:ed_swap_4 |
1560 | | static inline void *ed_swap_4p(void *x) |
1561 | 0 | { |
1562 | 0 | *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); |
1563 | 0 | return x; |
1564 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_4p Unexecuted instantiation: header.c:ed_swap_4p Unexecuted instantiation: hfile.c:ed_swap_4p Unexecuted instantiation: hts.c:ed_swap_4p Unexecuted instantiation: multipart.c:ed_swap_4p Unexecuted instantiation: region.c:ed_swap_4p Unexecuted instantiation: sam.c:ed_swap_4p Unexecuted instantiation: textutils.c:ed_swap_4p Unexecuted instantiation: vcf.c:ed_swap_4p Unexecuted instantiation: cram_decode.c:ed_swap_4p Unexecuted instantiation: cram_encode.c:ed_swap_4p Unexecuted instantiation: cram_index.c:ed_swap_4p Unexecuted instantiation: cram_io.c:ed_swap_4p Unexecuted instantiation: cram_stats.c:ed_swap_4p Unexecuted instantiation: open_trace_file.c:ed_swap_4p Unexecuted instantiation: hfile_libcurl.c:ed_swap_4p Unexecuted instantiation: hfile_gcs.c:ed_swap_4p Unexecuted instantiation: hfile_s3.c:ed_swap_4p Unexecuted instantiation: hfile_s3_write.c:ed_swap_4p Unexecuted instantiation: bgzf.c:ed_swap_4p Unexecuted instantiation: faidx.c:ed_swap_4p Unexecuted instantiation: md5.c:ed_swap_4p Unexecuted instantiation: tbx.c:ed_swap_4p Unexecuted instantiation: cram_codecs.c:ed_swap_4p |
1565 | | static inline uint64_t ed_swap_8(uint64_t v) |
1566 | 0 | { |
1567 | 0 | v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); |
1568 | 0 | v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); |
1569 | 0 | return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); |
1570 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_8 Unexecuted instantiation: header.c:ed_swap_8 Unexecuted instantiation: hfile.c:ed_swap_8 Unexecuted instantiation: hts.c:ed_swap_8 Unexecuted instantiation: multipart.c:ed_swap_8 Unexecuted instantiation: region.c:ed_swap_8 Unexecuted instantiation: sam.c:ed_swap_8 Unexecuted instantiation: textutils.c:ed_swap_8 Unexecuted instantiation: vcf.c:ed_swap_8 Unexecuted instantiation: cram_decode.c:ed_swap_8 Unexecuted instantiation: cram_encode.c:ed_swap_8 Unexecuted instantiation: cram_index.c:ed_swap_8 Unexecuted instantiation: cram_io.c:ed_swap_8 Unexecuted instantiation: cram_stats.c:ed_swap_8 Unexecuted instantiation: open_trace_file.c:ed_swap_8 Unexecuted instantiation: hfile_libcurl.c:ed_swap_8 Unexecuted instantiation: hfile_gcs.c:ed_swap_8 Unexecuted instantiation: hfile_s3.c:ed_swap_8 Unexecuted instantiation: hfile_s3_write.c:ed_swap_8 Unexecuted instantiation: bgzf.c:ed_swap_8 Unexecuted instantiation: faidx.c:ed_swap_8 Unexecuted instantiation: md5.c:ed_swap_8 Unexecuted instantiation: tbx.c:ed_swap_8 Unexecuted instantiation: cram_codecs.c:ed_swap_8 |
1571 | | static inline void *ed_swap_8p(void *x) |
1572 | 0 | { |
1573 | 0 | *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); |
1574 | 0 | return x; |
1575 | 0 | } Unexecuted instantiation: hts_open_fuzzer.c:ed_swap_8p Unexecuted instantiation: header.c:ed_swap_8p Unexecuted instantiation: hfile.c:ed_swap_8p Unexecuted instantiation: hts.c:ed_swap_8p Unexecuted instantiation: multipart.c:ed_swap_8p Unexecuted instantiation: region.c:ed_swap_8p Unexecuted instantiation: sam.c:ed_swap_8p Unexecuted instantiation: textutils.c:ed_swap_8p Unexecuted instantiation: vcf.c:ed_swap_8p Unexecuted instantiation: cram_decode.c:ed_swap_8p Unexecuted instantiation: cram_encode.c:ed_swap_8p Unexecuted instantiation: cram_index.c:ed_swap_8p Unexecuted instantiation: cram_io.c:ed_swap_8p Unexecuted instantiation: cram_stats.c:ed_swap_8p Unexecuted instantiation: open_trace_file.c:ed_swap_8p Unexecuted instantiation: hfile_libcurl.c:ed_swap_8p Unexecuted instantiation: hfile_gcs.c:ed_swap_8p Unexecuted instantiation: hfile_s3.c:ed_swap_8p Unexecuted instantiation: hfile_s3_write.c:ed_swap_8p Unexecuted instantiation: bgzf.c:ed_swap_8p Unexecuted instantiation: faidx.c:ed_swap_8p Unexecuted instantiation: md5.c:ed_swap_8p Unexecuted instantiation: tbx.c:ed_swap_8p Unexecuted instantiation: cram_codecs.c:ed_swap_8p |
1576 | | |
1577 | | #ifdef __cplusplus |
1578 | | } |
1579 | | #endif |
1580 | | |
1581 | | #endif |