/src/htslib/htslib/cram.h
Line | Count | Source |
1 | | /// @file htslib/cram.h |
2 | | /// CRAM format-specific API functions. |
3 | | /* |
4 | | Copyright (C) 2015, 2016, 2018-2020, 2022-2024 Genome Research Ltd. |
5 | | |
6 | | Author: James Bonfield <jkb@sanger.ac.uk> |
7 | | |
8 | | Permission is hereby granted, free of charge, to any person obtaining a copy |
9 | | of this software and associated documentation files (the "Software"), to deal |
10 | | in the Software without restriction, including without limitation the rights |
11 | | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
12 | | copies of the Software, and to permit persons to whom the Software is |
13 | | furnished to do so, subject to the following conditions: |
14 | | |
15 | | The above copyright notice and this permission notice shall be included in |
16 | | all copies or substantial portions of the Software. |
17 | | |
18 | | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
19 | | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
20 | | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
21 | | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
22 | | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
23 | | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
24 | | DEALINGS IN THE SOFTWARE. */ |
25 | | |
26 | | /** @file |
27 | | * Consider using the higher level hts_*() API for programs that wish to |
28 | | * be file format agnostic (see htslib/hts.h). |
29 | | * |
30 | | * This API should be used for CRAM specific code. The specifics of the |
31 | | * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h |
32 | | * although these should not be included directly (use this file instead). |
33 | | */ |
34 | | |
35 | | #ifndef HTSLIB_CRAM_H |
36 | | #define HTSLIB_CRAM_H |
37 | | |
38 | | #include <stdarg.h> |
39 | | #include <stdint.h> |
40 | | #include <sys/types.h> |
41 | | |
42 | | #include "hts_defs.h" |
43 | | #include "hts.h" |
44 | | #include "sam.h" |
45 | | |
46 | | #ifdef __cplusplus |
47 | | extern "C" { |
48 | | #endif |
49 | | |
50 | | // see cram/cram_structs.h for an internal more complete copy of this enum |
51 | | |
52 | | // Htslib 1.11 had these listed without any hts prefix, and included |
53 | | // some internal values such as RANS1 and GZIP_RLE (which shouldn't have ever |
54 | | // been public). |
55 | | // |
56 | | // We can't find evidence of these being used and the data type occurs |
57 | | // nowhere in functions or structures meaning using it would be pointless. |
58 | | // However for safety, if you absolute need the API to not change then |
59 | | // define HTS_COMPAT to 101100 (XYYYZZ for X.Y[.Z], meaning 1.11). |
60 | | #if defined(HTS_COMPAT) && HTS_COMPAT <= 101100 |
61 | | enum cram_block_method { |
62 | | // Public methods as defined in the CRAM spec. |
63 | | BM_ERROR = -1, |
64 | | |
65 | | // CRAM 2.x and 3.0 |
66 | | RAW = 0, |
67 | | GZIP = 1, |
68 | | BZIP2 = 2, |
69 | | LZMA = 3, |
70 | | RANS = 4, |
71 | | |
72 | | // NB: the subsequent numbers may change. They're simply here for |
73 | | // compatibility with the old API, but may have no bearing on the |
74 | | // internal way htslib works. DO NOT USE |
75 | | RANS0 = 4, |
76 | | RANS1 = 10, |
77 | | GZIP_RLE = 11, |
78 | | }; |
79 | | #else |
80 | | |
81 | | // Values as defined in the CRAM specifications. |
82 | | // See cram/cram_structs.h cram_block_method_int for an expanded version of |
83 | | // this with local specialisations assigned to codes. |
84 | | enum cram_block_method { |
85 | | CRAM_COMP_UNKNOWN = -1, |
86 | | |
87 | | // CRAM 2.x and 3.0 |
88 | | CRAM_COMP_RAW = 0, |
89 | | CRAM_COMP_GZIP = 1, |
90 | | CRAM_COMP_BZIP2 = 2, |
91 | | |
92 | | // CRAM 3.0 |
93 | | CRAM_COMP_LZMA = 3, |
94 | | CRAM_COMP_RANS4x8 = 4, // 4-way interleaving, 8-bit renormalisation |
95 | | |
96 | | // CRAM 3.1 |
97 | | CRAM_COMP_RANSNx16 = 5, // both 4x16 and 32x16 variants, plus transforms |
98 | | CRAM_COMP_ARITH = 6, // aka Range coding |
99 | | CRAM_COMP_FQZ = 7, // FQZComp |
100 | | CRAM_COMP_TOK3 = 8, // Name tokeniser |
101 | | }; |
102 | | #endif |
103 | | |
104 | | /* NOTE this structure may be expanded in future releases by appending |
105 | | * additional fields. |
106 | | * |
107 | | * Do not assume the size is fixed and avoid using arrays of this struct. |
108 | | */ |
109 | | typedef struct { |
110 | | enum cram_block_method method; |
111 | | |
112 | | // Generic compression level if known (0 if not). |
113 | | // 1 or 9 for gzip min/max flag (else 5). 1-9 for bzip2 |
114 | | // 1 or 11 for for tok3 (rans/arith encoder). |
115 | | int level; |
116 | | |
117 | | // For rans* and arith codecs |
118 | | int order; |
119 | | |
120 | | // ransNx16/arith specific |
121 | | int rle; |
122 | | int pack; |
123 | | int stripe; |
124 | | int cat; |
125 | | int nosz; |
126 | | int Nway; |
127 | | |
128 | | // Arithmetic coder only |
129 | | int ext; // external: use gz, xz or bzip2 |
130 | | } cram_method_details; |
131 | | |
132 | | enum cram_content_type { |
133 | | CT_ERROR = -1, |
134 | | FILE_HEADER = 0, |
135 | | COMPRESSION_HEADER = 1, |
136 | | MAPPED_SLICE = 2, |
137 | | UNMAPPED_SLICE = 3, // CRAM V1.0 only |
138 | | EXTERNAL = 4, |
139 | | CORE = 5, |
140 | | }; |
141 | | |
142 | | // Opaque data types, see cram_structs for the fully fledged versions. |
143 | | typedef struct cram_file_def cram_file_def; |
144 | | typedef struct cram_fd cram_fd; |
145 | | typedef struct cram_container cram_container; |
146 | | typedef struct cram_block cram_block; |
147 | | typedef struct cram_slice cram_slice; |
148 | | typedef struct cram_metrics cram_metrics; |
149 | | typedef struct cram_block_slice_hdr cram_block_slice_hdr; |
150 | | typedef struct cram_block_compression_hdr cram_block_compression_hdr; |
151 | | typedef struct cram_codec cram_codec; |
152 | | typedef struct refs_t refs_t; |
153 | | |
154 | | struct hFILE; |
155 | | |
156 | | // Accessor functions |
157 | | |
158 | | /* |
159 | | *----------------------------------------------------------------------------- |
160 | | * cram_fd |
161 | | */ |
162 | | HTSLIB_EXPORT |
163 | | sam_hdr_t *cram_fd_get_header(cram_fd *fd); |
164 | | |
165 | | HTSLIB_EXPORT |
166 | | void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr); |
167 | | |
168 | | HTSLIB_EXPORT |
169 | | int cram_fd_get_version(cram_fd *fd); |
170 | | |
171 | | HTSLIB_EXPORT |
172 | | void cram_fd_set_version(cram_fd *fd, int vers); |
173 | | |
174 | | HTSLIB_EXPORT |
175 | | int cram_major_vers(cram_fd *fd); |
176 | | HTSLIB_EXPORT |
177 | | int cram_minor_vers(cram_fd *fd); |
178 | | |
179 | | HTSLIB_EXPORT |
180 | | struct hFILE *cram_fd_get_fp(cram_fd *fd); |
181 | | HTSLIB_EXPORT |
182 | | void cram_fd_set_fp(cram_fd *fd, struct hFILE *fp); |
183 | | |
184 | | |
185 | | /* |
186 | | *----------------------------------------------------------------------------- |
187 | | * cram_container |
188 | | */ |
189 | | HTSLIB_EXPORT |
190 | | int32_t cram_container_get_length(cram_container *c); |
191 | | HTSLIB_EXPORT |
192 | | void cram_container_set_length(cram_container *c, int32_t length); |
193 | | HTSLIB_EXPORT |
194 | | int32_t cram_container_get_num_blocks(cram_container *c); |
195 | | HTSLIB_EXPORT |
196 | | void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks); |
197 | | HTSLIB_EXPORT |
198 | | int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks); |
199 | | HTSLIB_EXPORT |
200 | | void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks, |
201 | | int32_t *landmarks); |
202 | | HTSLIB_EXPORT |
203 | | int32_t cram_container_get_num_records(cram_container *c); |
204 | | HTSLIB_EXPORT |
205 | | int64_t cram_container_get_num_bases(cram_container *c); |
206 | | |
207 | | /* Returns true if the container is empty (EOF marker) */ |
208 | | HTSLIB_EXPORT |
209 | | int cram_container_is_empty(cram_fd *fd); |
210 | | |
211 | | |
212 | | /* Returns chromosome and start/span from container struct */ |
213 | | HTSLIB_EXPORT |
214 | | void cram_container_get_coords(cram_container *c, |
215 | | int *refid, hts_pos_t *start, hts_pos_t *span); |
216 | | |
217 | | /* |
218 | | *----------------------------------------------------------------------------- |
219 | | * cram_block |
220 | | */ |
221 | | HTSLIB_EXPORT |
222 | | int32_t cram_block_get_content_id(cram_block *b); |
223 | | HTSLIB_EXPORT |
224 | | int32_t cram_block_get_comp_size(cram_block *b); |
225 | | HTSLIB_EXPORT |
226 | | int32_t cram_block_get_uncomp_size(cram_block *b); |
227 | | HTSLIB_EXPORT |
228 | | int32_t cram_block_get_crc32(cram_block *b); |
229 | | HTSLIB_EXPORT |
230 | | void * cram_block_get_data(cram_block *b); |
231 | | HTSLIB_EXPORT |
232 | | enum cram_content_type cram_block_get_content_type(cram_block *b); |
233 | | HTSLIB_EXPORT |
234 | | enum cram_block_method cram_block_get_method(cram_block *b); |
235 | | |
236 | | HTSLIB_EXPORT |
237 | | cram_method_details *cram_expand_method(uint8_t *data, int32_t size, |
238 | | enum cram_block_method comp); |
239 | | |
240 | | HTSLIB_EXPORT |
241 | | void cram_block_set_content_id(cram_block *b, int32_t id); |
242 | | HTSLIB_EXPORT |
243 | | void cram_block_set_comp_size(cram_block *b, int32_t size); |
244 | | HTSLIB_EXPORT |
245 | | void cram_block_set_uncomp_size(cram_block *b, int32_t size); |
246 | | HTSLIB_EXPORT |
247 | | void cram_block_set_crc32(cram_block *b, int32_t crc); |
248 | | HTSLIB_EXPORT |
249 | | void cram_block_set_data(cram_block *b, void *data); |
250 | | |
251 | | HTSLIB_EXPORT |
252 | | int cram_block_append(cram_block *b, const void *data, int size); |
253 | | HTSLIB_EXPORT |
254 | | void cram_block_update_size(cram_block *b); |
255 | | |
256 | | // Offset is known as "size" internally, but it can be confusing. |
257 | | HTSLIB_EXPORT |
258 | | size_t cram_block_get_offset(cram_block *b); |
259 | | HTSLIB_EXPORT |
260 | | void cram_block_set_offset(cram_block *b, size_t offset); |
261 | | |
262 | | /* |
263 | | * Computes the size of a cram block, including the block |
264 | | * header itself. |
265 | | */ |
266 | | HTSLIB_EXPORT |
267 | | uint32_t cram_block_size(cram_block *b); |
268 | | |
269 | | /* |
270 | | * Returns the Block Content ID values referred to by a cram_codec in |
271 | | * ids[2]. |
272 | | * |
273 | | * -2 is unused. |
274 | | * -1 is CORE |
275 | | * >= 0 is the block with that Content ID |
276 | | */ |
277 | | HTSLIB_EXPORT |
278 | | void cram_codec_get_content_ids(cram_codec *c, int ids[2]); |
279 | | |
280 | | /* |
281 | | * Produces a human readable description of the codec parameters. |
282 | | * This is appended to an existing kstring 'ks'. |
283 | | * |
284 | | * Returns 0 on succes, |
285 | | * <0 on failure |
286 | | */ |
287 | | HTSLIB_EXPORT |
288 | | int cram_codec_describe(cram_codec *c, kstring_t *ks); |
289 | | |
290 | | /* |
291 | | * Renumbers RG numbers in a cram compression header. |
292 | | * |
293 | | * CRAM stores RG as the Nth number in the header, rather than a |
294 | | * string holding the ID: tag. This is smaller in space, but means |
295 | | * "samtools cat" to join files together that contain single but |
296 | | * different RG lines needs a way of renumbering them. |
297 | | * |
298 | | * The file descriptor is expected to be immediately after the |
299 | | * cram_container structure (ie before the cram compression header). |
300 | | * Due to the nature of the CRAM format, this needs to read and write |
301 | | * the blocks itself. Note that there may be multiple slices within |
302 | | * the container, meaning multiple compression headers to manipulate. |
303 | | * Changing RG may change the size of the compression header and |
304 | | * therefore the length field in the container. Hence we rewrite all |
305 | | * blocks just in case and also emit the adjusted container. |
306 | | * |
307 | | * The current implementation can only cope with renumbering a single |
308 | | * RG (and only then if it is using HUFFMAN or BETA codecs). In |
309 | | * theory it *may* be possible to renumber multiple RGs if they use |
310 | | * HUFFMAN to the CORE block or use an external block unshared by any |
311 | | * other data series. So we have an API that can be upgraded to |
312 | | * support this, but do not implement it for now. An example |
313 | | * implementation of RG as an EXTERNAL block would be to find that |
314 | | * block and rewrite it, returning the number of blocks consumed. |
315 | | * |
316 | | * Returns 0 on success; |
317 | | * -1 if unable to edit; |
318 | | * -2 on other errors (eg I/O). |
319 | | */ |
320 | | HTSLIB_EXPORT |
321 | | int cram_transcode_rg(cram_fd *in, cram_fd *out, |
322 | | cram_container *c, |
323 | | int nrg, int *in_rg, int *out_rg); |
324 | | |
325 | | /* |
326 | | * Copies the blocks representing the next num_slice slices from a |
327 | | * container from 'in' to 'out'. It is expected that the file pointer |
328 | | * is just after the read of the cram_container and cram compression |
329 | | * header. |
330 | | * |
331 | | * Returns 0 on success |
332 | | * -1 on failure |
333 | | */ |
334 | | HTSLIB_EXPORT |
335 | | int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice); |
336 | | |
337 | | /* |
338 | | * Copies a container, but filtering it down to a specific region (as |
339 | | * already specified in 'in' |
340 | | * |
341 | | * Returns 0 on success |
342 | | * -1 on EOF |
343 | | * -2 on error |
344 | | */ |
345 | | HTSLIB_EXPORT |
346 | | int cram_filter_container(cram_fd *in, cram_fd *out, cram_container *c, |
347 | | int *ref_id); |
348 | | |
349 | | /* |
350 | | * Decodes a CRAM block compression header. |
351 | | * Returns header ptr on success |
352 | | * NULL on failure |
353 | | */ |
354 | | HTSLIB_EXPORT |
355 | | cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd, |
356 | | cram_block *b); |
357 | | /* |
358 | | * Frees a cram_block_compression_hdr structure. |
359 | | */ |
360 | | HTSLIB_EXPORT |
361 | | void cram_free_compression_header(cram_block_compression_hdr *hdr); |
362 | | |
363 | | typedef struct cram_cid2ds_t cram_cid2ds_t; |
364 | | |
365 | | /* |
366 | | * Map cram block numbers to data-series. It's normally a 1:1 mapping, |
367 | | * but in rare cases it can be 1:many (or even many:many). |
368 | | * The key is the block number and the value is an index into the data-series |
369 | | * array, which we iterate over until reaching a negative value. |
370 | | * |
371 | | * Provide cid2ds as NULL to allocate a new map or pass in an existing one |
372 | | * to append to this map. The new (or existing) map is returned. |
373 | | * |
374 | | * Returns the cid2ds (newly allocated or as provided) on success, |
375 | | * NULL on failure. |
376 | | */ |
377 | | HTSLIB_EXPORT |
378 | | cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr, |
379 | | cram_cid2ds_t *cid2ds); |
380 | | |
381 | | /* |
382 | | * Return a list of data series observed as belonging to a block with |
383 | | * the specified content_id. *n is the number of data series |
384 | | * returned, or 0 if block is unused. |
385 | | * Block content_id of -1 is used to indicate the CORE block. |
386 | | * |
387 | | * The pointer returned is owned by the cram_cid2ds state and should |
388 | | * not be freed by the caller. |
389 | | */ |
390 | | HTSLIB_EXPORT |
391 | | int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n); |
392 | | |
393 | | /* |
394 | | * Frees a cram_cid2ds_t allocated by cram_update_cid2ds_map |
395 | | */ |
396 | | HTSLIB_EXPORT |
397 | | void cram_cid2ds_free(cram_cid2ds_t *cid2ds); |
398 | | |
399 | | /* |
400 | | * Produces a description of the record and tag encodings held within |
401 | | * a compression header and appends to 'ks'. |
402 | | * |
403 | | * Returns 0 on success, |
404 | | * <0 on failure. |
405 | | */ |
406 | | HTSLIB_EXPORT |
407 | | int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks); |
408 | | |
409 | | /* |
410 | | *----------------------------------------------------------------------------- |
411 | | * cram slice interrogation |
412 | | */ |
413 | | |
414 | | /* |
415 | | * Returns the number of cram blocks within this slice. |
416 | | */ |
417 | | HTSLIB_EXPORT |
418 | | int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr); |
419 | | |
420 | | /* |
421 | | * Returns the block content_id for the block containing an embedded reference |
422 | | * sequence. If none is present, -1 is returned. |
423 | | */ |
424 | | HTSLIB_EXPORT |
425 | | int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h); |
426 | | |
427 | | /* |
428 | | * Returns slice reference ID, start and span (length) coordinates. |
429 | | * Return parameters may be NULL in which case they are ignored. |
430 | | */ |
431 | | HTSLIB_EXPORT |
432 | | void cram_slice_hdr_get_coords(cram_block_slice_hdr *h, |
433 | | int *refid, hts_pos_t *start, hts_pos_t *span); |
434 | | |
435 | | /* |
436 | | * Decodes a slice header from a cram block. |
437 | | * Returns the opaque cram_block_slice_hdr pointer on success, |
438 | | * NULL on failure. |
439 | | */ |
440 | | HTSLIB_EXPORT |
441 | | cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b); |
442 | | |
443 | | /* |
444 | | * Frees a cram_block_slice_hdr structure. |
445 | | */ |
446 | | HTSLIB_EXPORT |
447 | | void cram_free_slice_header(cram_block_slice_hdr *hdr); |
448 | | |
449 | | /* |
450 | | *----------------------------------------------------------------------------- |
451 | | * cram_io basics |
452 | | */ |
453 | | |
454 | | /**@{ ---------------------------------------------------------------------- |
455 | | * CRAM blocks - the dynamically growable data block. We have code to |
456 | | * create, update, (un)compress and read/write. |
457 | | * |
458 | | * These are derived from the deflate_interlaced.c blocks, but with the |
459 | | * CRAM extension of content types and IDs. |
460 | | */ |
461 | | |
462 | | /*! Allocates a new cram_block structure with a specified content_type and |
463 | | * id. |
464 | | * |
465 | | * @return |
466 | | * Returns block pointer on success; |
467 | | * NULL on failure |
468 | | * |
469 | | * The cram_block struct returned by a successful call should be freed |
470 | | * via cram_free_block() when it is no longer needed. |
471 | | */ |
472 | | HTSLIB_EXPORT |
473 | | cram_block *cram_new_block(enum cram_content_type content_type, |
474 | | int content_id); |
475 | | |
476 | | /*! Reads a block from a cram file. |
477 | | * |
478 | | * @return |
479 | | * Returns cram_block pointer on success; |
480 | | * NULL on failure |
481 | | * |
482 | | * The cram_block struct returned by a successful call should be freed |
483 | | * via cram_free_block() when it is no longer needed. |
484 | | */ |
485 | | HTSLIB_EXPORT |
486 | | cram_block *cram_read_block(cram_fd *fd); |
487 | | |
488 | | /*! Writes a CRAM block. |
489 | | * |
490 | | * @return |
491 | | * Returns 0 on success; |
492 | | * -1 on failure |
493 | | */ |
494 | | HTSLIB_EXPORT |
495 | | int cram_write_block(cram_fd *fd, cram_block *b); |
496 | | |
497 | | /*! Frees a CRAM block, deallocating internal data too. |
498 | | */ |
499 | | HTSLIB_EXPORT |
500 | | void cram_free_block(cram_block *b); |
501 | | |
502 | | /*! Uncompresses a CRAM block, if compressed. |
503 | | * |
504 | | * @return |
505 | | * Returns 0 on success; |
506 | | * -1 on failure |
507 | | */ |
508 | | HTSLIB_EXPORT |
509 | | int cram_uncompress_block(cram_block *b); |
510 | | |
511 | | /*! Compresses a block. |
512 | | * |
513 | | * Compresses a block using one of two different zlib strategies. If we only |
514 | | * want one choice set strat2 to be -1. |
515 | | * |
516 | | * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED |
517 | | * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is |
518 | | * significantly faster. |
519 | | * |
520 | | * @return |
521 | | * Returns 0 on success; |
522 | | * -1 on failure |
523 | | */ |
524 | | HTSLIB_EXPORT |
525 | | int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, |
526 | | int method, int level); |
527 | | int cram_compress_block2(cram_fd *fd, cram_slice *s, |
528 | | cram_block *b, cram_metrics *metrics, |
529 | | int method, int level); |
530 | | |
531 | | /**@}*/ |
532 | | /**@{ ---------------------------------------------------------------------- |
533 | | * Containers |
534 | | */ |
535 | | |
536 | | /*! Creates a new container, specifying the maximum number of slices |
537 | | * and records permitted. |
538 | | * |
539 | | * @return |
540 | | * Returns cram_container ptr on success; |
541 | | * NULL on failure |
542 | | * |
543 | | * The cram_container struct returned by a successful call should be freed |
544 | | * via cram_free_container() when it is no longer needed. |
545 | | */ |
546 | | HTSLIB_EXPORT |
547 | | cram_container *cram_new_container(int nrec, int nslice); |
548 | | HTSLIB_EXPORT |
549 | | void cram_free_container(cram_container *c); |
550 | | |
551 | | /*! Reads a container header. |
552 | | * |
553 | | * @return |
554 | | * Returns cram_container on success; |
555 | | * NULL on failure or no container left (fd->err == 0). |
556 | | * |
557 | | * The cram_container struct returned by a successful call should be freed |
558 | | * via cram_free_container() when it is no longer needed. |
559 | | */ |
560 | | HTSLIB_EXPORT |
561 | | cram_container *cram_read_container(cram_fd *fd); |
562 | | |
563 | | /*! Writes a container structure. |
564 | | * |
565 | | * @return |
566 | | * Returns 0 on success; |
567 | | * -1 on failure |
568 | | */ |
569 | | HTSLIB_EXPORT |
570 | | int cram_write_container(cram_fd *fd, cram_container *h); |
571 | | |
572 | | /* |
573 | | * Stores the container structure in dat and returns *size as the |
574 | | * number of bytes written to dat[]. The input size of dat is also |
575 | | * held in *size and should be initialised to cram_container_size(c). |
576 | | * |
577 | | * Returns 0 on success; |
578 | | * -1 on failure |
579 | | */ |
580 | | HTSLIB_EXPORT |
581 | | int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size); |
582 | | |
583 | | HTSLIB_EXPORT |
584 | | int cram_container_size(cram_container *c); |
585 | | |
586 | | /**@}*/ |
587 | | /**@{ ---------------------------------------------------------------------- |
588 | | * The top-level cram opening, closing and option handling |
589 | | */ |
590 | | |
591 | | /*! Opens a CRAM file for read (mode "rb") or write ("wb"). |
592 | | * |
593 | | * The filename may be "-" to indicate stdin or stdout. |
594 | | * |
595 | | * @return |
596 | | * Returns file handle on success; |
597 | | * NULL on failure. |
598 | | */ |
599 | | HTSLIB_EXPORT |
600 | | cram_fd *cram_open(const char *filename, const char *mode); |
601 | | |
602 | | /*! Opens an existing stream for reading or writing. |
603 | | * |
604 | | * @return |
605 | | * Returns file handle on success; |
606 | | * NULL on failure. |
607 | | */ |
608 | | HTSLIB_EXPORT |
609 | | cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); |
610 | | |
611 | | /*! Closes a CRAM file. |
612 | | * |
613 | | * @return |
614 | | * Returns 0 on success; |
615 | | * -1 on failure |
616 | | */ |
617 | | HTSLIB_EXPORT |
618 | | int cram_close(cram_fd *fd); |
619 | | |
620 | | /* |
621 | | * Seek within a CRAM file. |
622 | | * |
623 | | * Returns 0 on success |
624 | | * -1 on failure |
625 | | */ |
626 | | HTSLIB_EXPORT |
627 | | int cram_seek(cram_fd *fd, off_t offset, int whence); |
628 | | |
629 | | /* |
630 | | * Flushes a CRAM file. |
631 | | * Useful for when writing to stdout without wishing to close the stream. |
632 | | * |
633 | | * Returns 0 on success |
634 | | * -1 on failure |
635 | | */ |
636 | | HTSLIB_EXPORT |
637 | | int cram_flush(cram_fd *fd); |
638 | | |
639 | | /*! Checks for end of file on a cram_fd stream. |
640 | | * |
641 | | * @return |
642 | | * Returns 0 if not at end of file |
643 | | * 1 if we hit an expected EOF (end of range or EOF block) |
644 | | * 2 for other EOF (end of stream without EOF block) |
645 | | */ |
646 | | HTSLIB_EXPORT |
647 | | int cram_eof(cram_fd *fd); |
648 | | |
649 | | /*! Sets options on the cram_fd. |
650 | | * |
651 | | * See CRAM_OPT_* definitions in hts.h. |
652 | | * Use this immediately after opening. |
653 | | * |
654 | | * @return |
655 | | * Returns 0 on success; |
656 | | * -1 on failure |
657 | | */ |
658 | | HTSLIB_EXPORT |
659 | | int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); |
660 | | |
661 | | /*! Sets options on the cram_fd. |
662 | | * |
663 | | * See CRAM_OPT_* definitions in hts.h. |
664 | | * Use this immediately after opening. |
665 | | * |
666 | | * @return |
667 | | * Returns 0 on success; |
668 | | * -1 on failure |
669 | | */ |
670 | | HTSLIB_EXPORT |
671 | | int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); |
672 | | |
673 | | /*! |
674 | | * Attaches a header to a cram_fd. |
675 | | * |
676 | | * This should be used when creating a new cram_fd for writing where |
677 | | * we have an SAM_hdr already constructed (eg from a file we've read |
678 | | * in). |
679 | | * |
680 | | * @return |
681 | | * Returns 0 on success; |
682 | | * -1 on failure |
683 | | */ |
684 | | HTSLIB_EXPORT |
685 | | int cram_set_header(cram_fd *fd, sam_hdr_t *hdr); |
686 | | |
687 | | /*! Check if this file has a proper EOF block |
688 | | * |
689 | | * @return |
690 | | * Returns 3 if the file is a version of CRAM that does not contain EOF blocks |
691 | | * 2 if the file is a stream and thus unseekable |
692 | | * 1 if the file contains an EOF block |
693 | | * 0 if the file does not contain an EOF block |
694 | | * -1 if an error occurred whilst reading the file or we could not seek back to where we were |
695 | | * |
696 | | */ |
697 | | HTSLIB_EXPORT |
698 | | int cram_check_EOF(cram_fd *fd); |
699 | | |
700 | | /* As int32_decoded/encode, but from/to blocks instead of cram_fd */ |
701 | | HTSLIB_EXPORT |
702 | | int int32_put_blk(cram_block *b, int32_t val); |
703 | | |
704 | | /**@}*/ |
705 | | /**@{ ------------------------------------------------------------------- |
706 | | * Old typedef and function names for compatibility with existing code. |
707 | | * Header functionality is now provided by sam.h's sam_hdr_t functions. |
708 | | */ |
709 | | |
710 | | typedef sam_hdr_t SAM_hdr; |
711 | | |
712 | | /*! Tokenises a SAM header into a hash table. |
713 | | * |
714 | | * Also extracts a few bits on specific data types, such as @RG lines. |
715 | | * |
716 | | * @return |
717 | | * Returns a SAM_hdr struct on success (free with sam_hdr_free()); |
718 | | * NULL on failure |
719 | | */ |
720 | 0 | static inline SAM_hdr *sam_hdr_parse_(const char *hdr, size_t len) { return sam_hdr_parse(len, hdr); }Unexecuted instantiation: hts.c:sam_hdr_parse_ Unexecuted instantiation: sam.c:sam_hdr_parse_ Unexecuted instantiation: cram_decode.c:sam_hdr_parse_ Unexecuted instantiation: cram_encode.c:sam_hdr_parse_ Unexecuted instantiation: cram_index.c:sam_hdr_parse_ Unexecuted instantiation: cram_io.c:sam_hdr_parse_ Unexecuted instantiation: cram_stats.c:sam_hdr_parse_ Unexecuted instantiation: cram_codecs.c:sam_hdr_parse_ |
721 | | |
722 | | /*! Deallocates all storage used by a SAM_hdr struct. |
723 | | * |
724 | | * This also decrements the header reference count. If after decrementing |
725 | | * it is still non-zero then the header is assumed to be in use by another |
726 | | * caller and the free is not done. |
727 | | */ |
728 | 0 | static inline void sam_hdr_free(SAM_hdr *hdr) { sam_hdr_destroy(hdr); }Unexecuted instantiation: hts.c:sam_hdr_free Unexecuted instantiation: sam.c:sam_hdr_free Unexecuted instantiation: cram_decode.c:sam_hdr_free Unexecuted instantiation: cram_encode.c:sam_hdr_free Unexecuted instantiation: cram_index.c:sam_hdr_free Unexecuted instantiation: cram_io.c:sam_hdr_free Unexecuted instantiation: cram_stats.c:sam_hdr_free Unexecuted instantiation: cram_codecs.c:sam_hdr_free |
729 | | |
730 | | /* sam_hdr_length() and sam_hdr_str() are now provided by sam.h. */ |
731 | | |
732 | | /*! Add an @PG line. |
733 | | * |
734 | | * If we wish complete control over this use sam_hdr_add_line() directly. This |
735 | | * function uses that, but attempts to do a lot of tedious house work for |
736 | | * you too. |
737 | | * |
738 | | * - It will generate a suitable ID if the supplied one clashes. |
739 | | * - It will generate multiple @PG records if we have multiple PG chains. |
740 | | * |
741 | | * Call it as per sam_hdr_add_line() with a series of key,value pairs ending |
742 | | * in NULL. |
743 | | * |
744 | | * @return |
745 | | * Returns 0 on success; |
746 | | * -1 on failure |
747 | | */ |
748 | | #define sam_hdr_add_PG sam_hdr_add_pg |
749 | | |
750 | | /**@{ -------------------------------------------------------------------*/ |
751 | | |
752 | | /*! |
753 | | * Returns the refs_t structure used by a cram file handle. |
754 | | * |
755 | | * This may be used in conjunction with option CRAM_OPT_SHARED_REF to |
756 | | * share reference memory between multiple file handles. |
757 | | * |
758 | | * @return |
759 | | * Returns NULL if none exists or the file handle is not a CRAM file. |
760 | | */ |
761 | | HTSLIB_EXPORT |
762 | | refs_t *cram_get_refs(htsFile *fd); |
763 | | |
764 | | /*! |
765 | | * Returns the file offsets of CRAM slices covering a specific region |
766 | | * query. Note both offsets are the START of the slice. |
767 | | * |
768 | | * first will point to the start of the first overlapping slice |
769 | | * last will point to the start of the last overlapping slice |
770 | | * |
771 | | * @return |
772 | | * Returns 0 on success |
773 | | * <0 on failure |
774 | | */ |
775 | | HTSLIB_EXPORT |
776 | | int cram_index_extents(cram_fd *fd, int refid, hts_pos_t start, hts_pos_t end, |
777 | | off_t *first, off_t *last); |
778 | | |
779 | | /*! Returns the total number of containers in the CRAM index. |
780 | | * |
781 | | * Note the index is not required to have an entry for every container, but it |
782 | | * will always have an index entry for the start of each chromosome. |
783 | | * (Although in practice our indices do container one entry per container.) |
784 | | * |
785 | | * This is equivalent to cram_num_containers_between(fd, 0, 0, NULL, NULL) |
786 | | */ |
787 | | HTSLIB_EXPORT |
788 | | int64_t cram_num_containers(cram_fd *fd); |
789 | | |
790 | | /*! Returns the number of containers in the CRAM index within given offsets. |
791 | | * |
792 | | * The cstart and cend offsets are the locations of the start of containers |
793 | | * as returned by index_container_offset. |
794 | | * |
795 | | * If non-NULL, first and last will hold the inclusive range of container |
796 | | * numbers, counting from zero. |
797 | | * |
798 | | * @return |
799 | | * Returns the number of containers, equivalent to *last-*first+1. |
800 | | */ |
801 | | HTSLIB_EXPORT |
802 | | int64_t cram_num_containers_between(cram_fd *fd, |
803 | | off_t cstart, off_t cend, |
804 | | int64_t *first, int64_t *last); |
805 | | |
806 | | /*! Returns the byte offset for the start of the n^th container. |
807 | | * |
808 | | * The index must have previously been loaded, otherwise <0 is returned. |
809 | | */ |
810 | | HTSLIB_EXPORT |
811 | | off_t cram_container_num2offset(cram_fd *fd, int64_t n); |
812 | | |
813 | | /*! Returns the container number for the first container at offset >= pos. |
814 | | * |
815 | | * The index must have previously been loaded, otherwise <0 is returned. |
816 | | */ |
817 | | HTSLIB_EXPORT |
818 | | int64_t cram_container_offset2num(cram_fd *fd, off_t pos); |
819 | | |
820 | | /**@}*/ |
821 | | |
822 | | #ifdef __cplusplus |
823 | | } |
824 | | #endif |
825 | | |
826 | | #endif |