Coverage Report

Created: 2026-03-31 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/htslib/cram.h
Line
Count
Source
1
/// @file htslib/cram.h
2
/// CRAM format-specific API functions.
3
/*
4
    Copyright (C) 2015, 2016, 2018-2020, 2022-2024 Genome Research Ltd.
5
6
    Author: James Bonfield <jkb@sanger.ac.uk>
7
8
Permission is hereby granted, free of charge, to any person obtaining a copy
9
of this software and associated documentation files (the "Software"), to deal
10
in the Software without restriction, including without limitation the rights
11
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
copies of the Software, and to permit persons to whom the Software is
13
furnished to do so, subject to the following conditions:
14
15
The above copyright notice and this permission notice shall be included in
16
all copies or substantial portions of the Software.
17
18
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24
DEALINGS IN THE SOFTWARE.  */
25
26
/** @file
27
 * Consider using the higher level hts_*() API for programs that wish to
28
 * be file format agnostic (see htslib/hts.h).
29
 *
30
 * This API should be used for CRAM specific code. The specifics of the
31
 * public API are implemented in cram_io.h, cram_encode.h and cram_decode.h
32
 * although these should not be included directly (use this file instead).
33
 */
34
35
#ifndef HTSLIB_CRAM_H
36
#define HTSLIB_CRAM_H
37
38
#include <stdarg.h>
39
#include <stdint.h>
40
#include <sys/types.h>
41
42
#include "hts_defs.h"
43
#include "hts.h"
44
#include "sam.h"
45
46
#ifdef __cplusplus
47
extern "C" {
48
#endif
49
50
// see cram/cram_structs.h for an internal more complete copy of this enum
51
52
// Htslib 1.11 had these listed without any hts prefix, and included
53
// some internal values such as RANS1 and GZIP_RLE (which shouldn't have ever
54
// been public).
55
//
56
// We can't find evidence of these being used and the data type occurs
57
// nowhere in functions or structures meaning using it would be pointless.
58
// However for safety, if you absolute need the API to not change then
59
// define HTS_COMPAT to 101100 (XYYYZZ for X.Y[.Z], meaning 1.11).
60
#if defined(HTS_COMPAT) && HTS_COMPAT <= 101100
61
enum cram_block_method {
62
    // Public methods as defined in the CRAM spec.
63
    BM_ERROR = -1,
64
65
    // CRAM 2.x and 3.0
66
    RAW      = 0,
67
    GZIP     = 1,
68
    BZIP2    = 2,
69
    LZMA     = 3,
70
    RANS     = 4,
71
72
    // NB: the subsequent numbers may change.  They're simply here for
73
    // compatibility with the old API, but may have no bearing on the
74
    // internal way htslib works.  DO NOT USE
75
    RANS0    = 4,
76
    RANS1    = 10,
77
    GZIP_RLE = 11,
78
};
79
#else
80
81
// Values as defined in the CRAM specifications.
82
// See cram/cram_structs.h cram_block_method_int for an expanded version of
83
// this with local specialisations assigned to codes.
84
enum cram_block_method {
85
    CRAM_COMP_UNKNOWN = -1,
86
87
    // CRAM 2.x and 3.0
88
    CRAM_COMP_RAW      = 0,
89
    CRAM_COMP_GZIP     = 1,
90
    CRAM_COMP_BZIP2    = 2,
91
92
    // CRAM 3.0
93
    CRAM_COMP_LZMA     = 3,
94
    CRAM_COMP_RANS4x8  = 4, // 4-way interleaving, 8-bit renormalisation
95
96
    // CRAM 3.1
97
    CRAM_COMP_RANSNx16 = 5, // both 4x16 and 32x16 variants, plus transforms
98
    CRAM_COMP_ARITH    = 6, // aka Range coding
99
    CRAM_COMP_FQZ      = 7, // FQZComp
100
    CRAM_COMP_TOK3     = 8, // Name tokeniser
101
};
102
#endif
103
104
/* NOTE this structure may be expanded in future releases by appending
105
 * additional fields.
106
 *
107
 * Do not assume the size is fixed and avoid using arrays of this struct.
108
 */
109
typedef struct {
110
    enum cram_block_method method;
111
112
    // Generic compression level if known (0 if not).
113
    // 1 or 9 for gzip min/max flag (else 5).  1-9 for bzip2
114
    // 1 or 11 for for tok3 (rans/arith encoder).
115
    int level;
116
117
    // For rans* and arith codecs
118
    int order;
119
120
    // ransNx16/arith specific
121
    int rle;
122
    int pack;
123
    int stripe;
124
    int cat;
125
    int nosz;
126
    int Nway;
127
128
    // Arithmetic coder only
129
    int ext; // external: use gz, xz or bzip2
130
} cram_method_details;
131
132
enum cram_content_type {
133
    CT_ERROR           = -1,
134
    FILE_HEADER        = 0,
135
    COMPRESSION_HEADER = 1,
136
    MAPPED_SLICE       = 2,
137
    UNMAPPED_SLICE     = 3, // CRAM V1.0 only
138
    EXTERNAL           = 4,
139
    CORE               = 5,
140
};
141
142
// Opaque data types, see cram_structs for the fully fledged versions.
143
typedef struct cram_file_def cram_file_def;
144
typedef struct cram_fd cram_fd;
145
typedef struct cram_container cram_container;
146
typedef struct cram_block cram_block;
147
typedef struct cram_slice cram_slice;
148
typedef struct cram_metrics cram_metrics;
149
typedef struct cram_block_slice_hdr cram_block_slice_hdr;
150
typedef struct cram_block_compression_hdr cram_block_compression_hdr;
151
typedef struct cram_codec cram_codec;
152
typedef struct refs_t refs_t;
153
154
struct hFILE;
155
156
// Accessor functions
157
158
/*
159
 *-----------------------------------------------------------------------------
160
 * cram_fd
161
 */
162
HTSLIB_EXPORT
163
sam_hdr_t *cram_fd_get_header(cram_fd *fd);
164
165
HTSLIB_EXPORT
166
void cram_fd_set_header(cram_fd *fd, sam_hdr_t *hdr);
167
168
HTSLIB_EXPORT
169
int cram_fd_get_version(cram_fd *fd);
170
171
HTSLIB_EXPORT
172
void cram_fd_set_version(cram_fd *fd, int vers);
173
174
HTSLIB_EXPORT
175
int cram_major_vers(cram_fd *fd);
176
HTSLIB_EXPORT
177
int cram_minor_vers(cram_fd *fd);
178
179
HTSLIB_EXPORT
180
struct hFILE *cram_fd_get_fp(cram_fd *fd);
181
HTSLIB_EXPORT
182
void cram_fd_set_fp(cram_fd *fd, struct hFILE *fp);
183
184
185
/*
186
 *-----------------------------------------------------------------------------
187
 * cram_container
188
 */
189
HTSLIB_EXPORT
190
int32_t cram_container_get_length(cram_container *c);
191
HTSLIB_EXPORT
192
void cram_container_set_length(cram_container *c, int32_t length);
193
HTSLIB_EXPORT
194
int32_t cram_container_get_num_blocks(cram_container *c);
195
HTSLIB_EXPORT
196
void cram_container_set_num_blocks(cram_container *c, int32_t num_blocks);
197
HTSLIB_EXPORT
198
int32_t *cram_container_get_landmarks(cram_container *c, int32_t *num_landmarks);
199
HTSLIB_EXPORT
200
void cram_container_set_landmarks(cram_container *c, int32_t num_landmarks,
201
                                  int32_t *landmarks);
202
HTSLIB_EXPORT
203
int32_t cram_container_get_num_records(cram_container *c);
204
HTSLIB_EXPORT
205
int64_t cram_container_get_num_bases(cram_container *c);
206
207
/* Returns true if the container is empty (EOF marker) */
208
HTSLIB_EXPORT
209
int cram_container_is_empty(cram_fd *fd);
210
211
212
/* Returns chromosome and start/span from container struct */
213
HTSLIB_EXPORT
214
void cram_container_get_coords(cram_container *c,
215
                               int *refid, hts_pos_t *start, hts_pos_t *span);
216
217
/*
218
 *-----------------------------------------------------------------------------
219
 * cram_block
220
 */
221
HTSLIB_EXPORT
222
int32_t cram_block_get_content_id(cram_block *b);
223
HTSLIB_EXPORT
224
int32_t cram_block_get_comp_size(cram_block *b);
225
HTSLIB_EXPORT
226
int32_t cram_block_get_uncomp_size(cram_block *b);
227
HTSLIB_EXPORT
228
int32_t cram_block_get_crc32(cram_block *b);
229
HTSLIB_EXPORT
230
void *  cram_block_get_data(cram_block *b);
231
HTSLIB_EXPORT
232
enum cram_content_type cram_block_get_content_type(cram_block *b);
233
HTSLIB_EXPORT
234
enum cram_block_method cram_block_get_method(cram_block *b);
235
236
HTSLIB_EXPORT
237
cram_method_details *cram_expand_method(uint8_t *data, int32_t size,
238
                                        enum cram_block_method comp);
239
240
HTSLIB_EXPORT
241
void cram_block_set_content_id(cram_block *b, int32_t id);
242
HTSLIB_EXPORT
243
void cram_block_set_comp_size(cram_block *b, int32_t size);
244
HTSLIB_EXPORT
245
void cram_block_set_uncomp_size(cram_block *b, int32_t size);
246
HTSLIB_EXPORT
247
void cram_block_set_crc32(cram_block *b, int32_t crc);
248
HTSLIB_EXPORT
249
void cram_block_set_data(cram_block *b, void *data);
250
251
HTSLIB_EXPORT
252
int cram_block_append(cram_block *b, const void *data, int size);
253
HTSLIB_EXPORT
254
void cram_block_update_size(cram_block *b);
255
256
// Offset is known as "size" internally, but it can be confusing.
257
HTSLIB_EXPORT
258
size_t cram_block_get_offset(cram_block *b);
259
HTSLIB_EXPORT
260
void cram_block_set_offset(cram_block *b, size_t offset);
261
262
/*
263
 * Computes the size of a cram block, including the block
264
 * header itself.
265
 */
266
HTSLIB_EXPORT
267
uint32_t cram_block_size(cram_block *b);
268
269
/*
270
 * Returns the Block Content ID values referred to by a cram_codec in
271
 * ids[2].
272
 *
273
 * -2 is unused.
274
 * -1 is CORE
275
 * >= 0 is the block with that Content ID
276
 */
277
HTSLIB_EXPORT
278
void cram_codec_get_content_ids(cram_codec *c, int ids[2]);
279
280
/*
281
 * Produces a human readable description of the codec parameters.
282
 * This is appended to an existing kstring 'ks'.
283
 *
284
 * Returns 0 on succes,
285
 *        <0 on failure
286
 */
287
HTSLIB_EXPORT
288
int cram_codec_describe(cram_codec *c, kstring_t *ks);
289
290
/*
291
 * Renumbers RG numbers in a cram compression header.
292
 *
293
 * CRAM stores RG as the Nth number in the header, rather than a
294
 * string holding the ID: tag.  This is smaller in space, but means
295
 * "samtools cat" to join files together that contain single but
296
 * different RG lines needs a way of renumbering them.
297
 *
298
 * The file descriptor is expected to be immediately after the
299
 * cram_container structure (ie before the cram compression header).
300
 * Due to the nature of the CRAM format, this needs to read and write
301
 * the blocks itself.  Note that there may be multiple slices within
302
 * the container, meaning multiple compression headers to manipulate.
303
 * Changing RG may change the size of the compression header and
304
 * therefore the length field in the container.  Hence we rewrite all
305
 * blocks just in case and also emit the adjusted container.
306
 *
307
 * The current implementation can only cope with renumbering a single
308
 * RG (and only then if it is using HUFFMAN or BETA codecs).  In
309
 * theory it *may* be possible to renumber multiple RGs if they use
310
 * HUFFMAN to the CORE block or use an external block unshared by any
311
 * other data series.  So we have an API that can be upgraded to
312
 * support this, but do not implement it for now.  An example
313
 * implementation of RG as an EXTERNAL block would be to find that
314
 * block and rewrite it, returning the number of blocks consumed.
315
 *
316
 * Returns 0 on success;
317
 *        -1 if unable to edit;
318
 *        -2 on other errors (eg I/O).
319
 */
320
HTSLIB_EXPORT
321
int cram_transcode_rg(cram_fd *in, cram_fd *out,
322
                      cram_container *c,
323
                      int nrg, int *in_rg, int *out_rg);
324
325
/*
326
 * Copies the blocks representing the next num_slice slices from a
327
 * container from 'in' to 'out'.  It is expected that the file pointer
328
 * is just after the read of the cram_container and cram compression
329
 * header.
330
 *
331
 * Returns 0 on success
332
 *        -1 on failure
333
 */
334
HTSLIB_EXPORT
335
int cram_copy_slice(cram_fd *in, cram_fd *out, int32_t num_slice);
336
337
/*
338
 * Copies a container, but filtering it down to a specific region (as
339
 * already specified in 'in'
340
 *
341
 * Returns 0 on success
342
 *        -1 on EOF
343
 *        -2 on error
344
 */
345
HTSLIB_EXPORT
346
int cram_filter_container(cram_fd *in, cram_fd *out, cram_container *c,
347
                          int *ref_id);
348
349
/*
350
 * Decodes a CRAM block compression header.
351
 * Returns header ptr on success
352
 *         NULL on failure
353
 */
354
HTSLIB_EXPORT
355
cram_block_compression_hdr *cram_decode_compression_header(cram_fd *fd,
356
                                                           cram_block *b);
357
/*
358
 * Frees a cram_block_compression_hdr structure.
359
 */
360
HTSLIB_EXPORT
361
void cram_free_compression_header(cram_block_compression_hdr *hdr);
362
363
typedef struct cram_cid2ds_t cram_cid2ds_t;
364
365
/*
366
 * Map cram block numbers to data-series.  It's normally a 1:1 mapping,
367
 * but in rare cases it can be 1:many (or even many:many).
368
 * The key is the block number and the value is an index into the data-series
369
 * array, which we iterate over until reaching a negative value.
370
 *
371
 * Provide cid2ds as NULL to allocate a new map or pass in an existing one
372
 * to append to this map.  The new (or existing) map is returned.
373
 *
374
 * Returns the cid2ds (newly allocated or as provided) on success,
375
 *         NULL on failure.
376
 */
377
HTSLIB_EXPORT
378
cram_cid2ds_t *cram_update_cid2ds_map(cram_block_compression_hdr *hdr,
379
                                      cram_cid2ds_t *cid2ds);
380
381
/*
382
 * Return a list of data series observed as belonging to a block with
383
 * the specified content_id.  *n is the number of data series
384
 * returned, or 0 if block is unused.
385
 * Block content_id of -1 is used to indicate the CORE block.
386
 *
387
 * The pointer returned is owned by the cram_cid2ds state and should
388
 * not be freed by the caller.
389
 */
390
HTSLIB_EXPORT
391
int *cram_cid2ds_query(cram_cid2ds_t *c2d, int content_id, int *n);
392
393
/*
394
 * Frees a cram_cid2ds_t allocated by cram_update_cid2ds_map
395
 */
396
HTSLIB_EXPORT
397
void cram_cid2ds_free(cram_cid2ds_t *cid2ds);
398
399
/*
400
 * Produces a description of the record and tag encodings held within
401
 * a compression header and appends to 'ks'.
402
 *
403
 * Returns 0 on success,
404
 *        <0 on failure.
405
 */
406
HTSLIB_EXPORT
407
int cram_describe_encodings(cram_block_compression_hdr *hdr, kstring_t *ks);
408
409
/*
410
 *-----------------------------------------------------------------------------
411
 * cram slice interrogation
412
 */
413
414
/*
415
 * Returns the number of cram blocks within this slice.
416
 */
417
HTSLIB_EXPORT
418
int32_t cram_slice_hdr_get_num_blocks(cram_block_slice_hdr *hdr);
419
420
/*
421
 * Returns the block content_id for the block containing an embedded reference
422
 * sequence.  If none is present, -1 is returned.
423
 */
424
HTSLIB_EXPORT
425
int cram_slice_hdr_get_embed_ref_id(cram_block_slice_hdr *h);
426
427
/*
428
 * Returns slice reference ID, start and span (length) coordinates.
429
 * Return parameters may be NULL in which case they are ignored.
430
 */
431
HTSLIB_EXPORT
432
void cram_slice_hdr_get_coords(cram_block_slice_hdr *h,
433
                               int *refid, hts_pos_t *start, hts_pos_t *span);
434
435
/*
436
 * Decodes a slice header from a cram block.
437
 * Returns the opaque cram_block_slice_hdr pointer on success,
438
 *         NULL on failure.
439
 */
440
HTSLIB_EXPORT
441
cram_block_slice_hdr *cram_decode_slice_header(cram_fd *fd, cram_block *b);
442
443
/*
444
 * Frees a cram_block_slice_hdr structure.
445
 */
446
HTSLIB_EXPORT
447
void cram_free_slice_header(cram_block_slice_hdr *hdr);
448
449
/*
450
 *-----------------------------------------------------------------------------
451
 * cram_io basics
452
 */
453
454
/**@{ ----------------------------------------------------------------------
455
 * CRAM blocks - the dynamically growable data block. We have code to
456
 * create, update, (un)compress and read/write.
457
 *
458
 * These are derived from the deflate_interlaced.c blocks, but with the
459
 * CRAM extension of content types and IDs.
460
 */
461
462
/*! Allocates a new cram_block structure with a specified content_type and
463
 * id.
464
 *
465
 * @return
466
 * Returns block pointer on success;
467
 *         NULL on failure
468
 *
469
 * The cram_block struct returned by a successful call should be freed
470
 * via cram_free_block() when it is no longer needed.
471
 */
472
HTSLIB_EXPORT
473
cram_block *cram_new_block(enum cram_content_type content_type,
474
                           int content_id);
475
476
/*! Reads a block from a cram file.
477
 *
478
 * @return
479
 * Returns cram_block pointer on success;
480
 *         NULL on failure
481
 *
482
 * The cram_block struct returned by a successful call should be freed
483
 * via cram_free_block() when it is no longer needed.
484
 */
485
HTSLIB_EXPORT
486
cram_block *cram_read_block(cram_fd *fd);
487
488
/*! Writes a CRAM block.
489
 *
490
 * @return
491
 * Returns 0 on success;
492
 *        -1 on failure
493
 */
494
HTSLIB_EXPORT
495
int cram_write_block(cram_fd *fd, cram_block *b);
496
497
/*! Frees a CRAM block, deallocating internal data too.
498
 */
499
HTSLIB_EXPORT
500
void cram_free_block(cram_block *b);
501
502
/*! Uncompresses a CRAM block, if compressed.
503
 *
504
 * @return
505
 * Returns 0 on success;
506
 *        -1 on failure
507
 */
508
HTSLIB_EXPORT
509
int cram_uncompress_block(cram_block *b);
510
511
/*! Compresses a block.
512
 *
513
 * Compresses a block using one of two different zlib strategies. If we only
514
 * want one choice set strat2 to be -1.
515
 *
516
 * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
517
 * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
518
 * significantly faster.
519
 *
520
 * @return
521
 * Returns 0 on success;
522
 *        -1 on failure
523
 */
524
HTSLIB_EXPORT
525
int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
526
                        int method, int level);
527
int cram_compress_block2(cram_fd *fd, cram_slice *s,
528
                         cram_block *b, cram_metrics *metrics,
529
                         int method, int level);
530
531
/**@}*/
532
/**@{ ----------------------------------------------------------------------
533
 * Containers
534
 */
535
536
/*! Creates a new container, specifying the maximum number of slices
537
 * and records permitted.
538
 *
539
 * @return
540
 * Returns cram_container ptr on success;
541
 *         NULL on failure
542
 *
543
 * The cram_container struct returned by a successful call should be freed
544
 * via cram_free_container() when it is no longer needed.
545
 */
546
HTSLIB_EXPORT
547
cram_container *cram_new_container(int nrec, int nslice);
548
HTSLIB_EXPORT
549
void cram_free_container(cram_container *c);
550
551
/*! Reads a container header.
552
 *
553
 * @return
554
 * Returns cram_container on success;
555
 *         NULL on failure or no container left (fd->err == 0).
556
 *
557
 * The cram_container struct returned by a successful call should be freed
558
 * via cram_free_container() when it is no longer needed.
559
 */
560
HTSLIB_EXPORT
561
cram_container *cram_read_container(cram_fd *fd);
562
563
/*! Writes a container structure.
564
 *
565
 * @return
566
 * Returns 0 on success;
567
 *        -1 on failure
568
 */
569
HTSLIB_EXPORT
570
int cram_write_container(cram_fd *fd, cram_container *h);
571
572
/*
573
 * Stores the container structure in dat and returns *size as the
574
 * number of bytes written to dat[].  The input size of dat is also
575
 * held in *size and should be initialised to cram_container_size(c).
576
 *
577
 * Returns 0 on success;
578
 *        -1 on failure
579
 */
580
HTSLIB_EXPORT
581
int cram_store_container(cram_fd *fd, cram_container *c, char *dat, int *size);
582
583
HTSLIB_EXPORT
584
int cram_container_size(cram_container *c);
585
586
/**@}*/
587
/**@{ ----------------------------------------------------------------------
588
 * The top-level cram opening, closing and option handling
589
 */
590
591
/*! Opens a CRAM file for read (mode "rb") or write ("wb").
592
 *
593
 * The filename may be "-" to indicate stdin or stdout.
594
 *
595
 * @return
596
 * Returns file handle on success;
597
 *         NULL on failure.
598
 */
599
HTSLIB_EXPORT
600
cram_fd *cram_open(const char *filename, const char *mode);
601
602
/*! Opens an existing stream for reading or writing.
603
 *
604
 * @return
605
 * Returns file handle on success;
606
 *         NULL on failure.
607
 */
608
HTSLIB_EXPORT
609
cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);
610
611
/*! Closes a CRAM file.
612
 *
613
 * @return
614
 * Returns 0 on success;
615
 *        -1 on failure
616
 */
617
HTSLIB_EXPORT
618
int cram_close(cram_fd *fd);
619
620
/*
621
 * Seek within a CRAM file.
622
 *
623
 * Returns 0 on success
624
 *        -1 on failure
625
 */
626
HTSLIB_EXPORT
627
int cram_seek(cram_fd *fd, off_t offset, int whence);
628
629
/*
630
 * Flushes a CRAM file.
631
 * Useful for when writing to stdout without wishing to close the stream.
632
 *
633
 * Returns 0 on success
634
 *        -1 on failure
635
 */
636
HTSLIB_EXPORT
637
int cram_flush(cram_fd *fd);
638
639
/*! Checks for end of file on a cram_fd stream.
640
 *
641
 * @return
642
 * Returns 0 if not at end of file
643
 *         1 if we hit an expected EOF (end of range or EOF block)
644
 *         2 for other EOF (end of stream without EOF block)
645
 */
646
HTSLIB_EXPORT
647
int cram_eof(cram_fd *fd);
648
649
/*! Sets options on the cram_fd.
650
 *
651
 * See CRAM_OPT_* definitions in hts.h.
652
 * Use this immediately after opening.
653
 *
654
 * @return
655
 * Returns 0 on success;
656
 *        -1 on failure
657
 */
658
HTSLIB_EXPORT
659
int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...);
660
661
/*! Sets options on the cram_fd.
662
 *
663
 * See CRAM_OPT_* definitions in hts.h.
664
 * Use this immediately after opening.
665
 *
666
 * @return
667
 * Returns 0 on success;
668
 *        -1 on failure
669
 */
670
HTSLIB_EXPORT
671
int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args);
672
673
/*!
674
 * Attaches a header to a cram_fd.
675
 *
676
 * This should be used when creating a new cram_fd for writing where
677
 * we have an SAM_hdr already constructed (eg from a file we've read
678
 * in).
679
 *
680
 * @return
681
 * Returns 0 on success;
682
 *        -1 on failure
683
 */
684
HTSLIB_EXPORT
685
int cram_set_header(cram_fd *fd, sam_hdr_t *hdr);
686
687
/*! Check if this file has a proper EOF block
688
 *
689
 * @return
690
 * Returns 3 if the file is a version of CRAM that does not contain EOF blocks
691
 *         2 if the file is a stream and thus unseekable
692
 *         1 if the file contains an EOF block
693
 *         0 if the file does not contain an EOF block
694
 *        -1 if an error occurred whilst reading the file or we could not seek back to where we were
695
 *
696
 */
697
HTSLIB_EXPORT
698
int cram_check_EOF(cram_fd *fd);
699
700
/* As int32_decoded/encode, but from/to blocks instead of cram_fd */
701
HTSLIB_EXPORT
702
int int32_put_blk(cram_block *b, int32_t val);
703
704
/**@}*/
705
/**@{ -------------------------------------------------------------------
706
 * Old typedef and function names for compatibility with existing code.
707
 * Header functionality is now provided by sam.h's sam_hdr_t functions.
708
 */
709
710
typedef sam_hdr_t SAM_hdr;
711
712
/*! Tokenises a SAM header into a hash table.
713
 *
714
 * Also extracts a few bits on specific data types, such as @RG lines.
715
 *
716
 * @return
717
 * Returns a SAM_hdr struct on success (free with sam_hdr_free());
718
 *         NULL on failure
719
 */
720
0
static inline SAM_hdr *sam_hdr_parse_(const char *hdr, size_t len) { return sam_hdr_parse(len, hdr); }
Unexecuted instantiation: hts.c:sam_hdr_parse_
Unexecuted instantiation: sam.c:sam_hdr_parse_
Unexecuted instantiation: cram_decode.c:sam_hdr_parse_
Unexecuted instantiation: cram_encode.c:sam_hdr_parse_
Unexecuted instantiation: cram_index.c:sam_hdr_parse_
Unexecuted instantiation: cram_io.c:sam_hdr_parse_
Unexecuted instantiation: cram_stats.c:sam_hdr_parse_
Unexecuted instantiation: cram_codecs.c:sam_hdr_parse_
721
722
/*! Deallocates all storage used by a SAM_hdr struct.
723
 *
724
 * This also decrements the header reference count. If after decrementing
725
 * it is still non-zero then the header is assumed to be in use by another
726
 * caller and the free is not done.
727
 */
728
0
static inline void sam_hdr_free(SAM_hdr *hdr) { sam_hdr_destroy(hdr); }
Unexecuted instantiation: hts.c:sam_hdr_free
Unexecuted instantiation: sam.c:sam_hdr_free
Unexecuted instantiation: cram_decode.c:sam_hdr_free
Unexecuted instantiation: cram_encode.c:sam_hdr_free
Unexecuted instantiation: cram_index.c:sam_hdr_free
Unexecuted instantiation: cram_io.c:sam_hdr_free
Unexecuted instantiation: cram_stats.c:sam_hdr_free
Unexecuted instantiation: cram_codecs.c:sam_hdr_free
729
730
/* sam_hdr_length() and sam_hdr_str() are now provided by sam.h. */
731
732
/*! Add an @PG line.
733
 *
734
 * If we wish complete control over this use sam_hdr_add_line() directly. This
735
 * function uses that, but attempts to do a lot of tedious house work for
736
 * you too.
737
 *
738
 * - It will generate a suitable ID if the supplied one clashes.
739
 * - It will generate multiple @PG records if we have multiple PG chains.
740
 *
741
 * Call it as per sam_hdr_add_line() with a series of key,value pairs ending
742
 * in NULL.
743
 *
744
 * @return
745
 * Returns 0 on success;
746
 *        -1 on failure
747
 */
748
#define sam_hdr_add_PG sam_hdr_add_pg
749
750
/**@{ -------------------------------------------------------------------*/
751
752
/*!
753
 * Returns the refs_t structure used by a cram file handle.
754
 *
755
 * This may be used in conjunction with option CRAM_OPT_SHARED_REF to
756
 * share reference memory between multiple file handles.
757
 *
758
 * @return
759
 * Returns NULL if none exists or the file handle is not a CRAM file.
760
 */
761
HTSLIB_EXPORT
762
refs_t *cram_get_refs(htsFile *fd);
763
764
/*!
765
 * Returns the file offsets of CRAM slices covering a specific region
766
 * query.  Note both offsets are the START of the slice.
767
 *
768
 * first will point to the start of the first overlapping slice
769
 * last will point to the start of the last overlapping slice
770
 *
771
 * @return
772
 * Returns 0 on success
773
 *        <0 on failure
774
 */
775
HTSLIB_EXPORT
776
int cram_index_extents(cram_fd *fd, int refid, hts_pos_t start, hts_pos_t end,
777
                       off_t *first, off_t *last);
778
779
/*! Returns the total number of containers in the CRAM index.
780
 *
781
 * Note the index is not required to have an entry for every container, but it
782
 * will always have an index entry for the start of each chromosome.
783
 * (Although in practice our indices do container one entry per container.)
784
 *
785
 * This is equivalent to cram_num_containers_between(fd, 0, 0, NULL, NULL)
786
 */
787
HTSLIB_EXPORT
788
int64_t cram_num_containers(cram_fd *fd);
789
790
/*! Returns the number of containers in the CRAM index within given offsets.
791
 *
792
 * The cstart and cend offsets are the locations of the start of containers
793
 * as returned by index_container_offset.
794
 *
795
 * If non-NULL, first and last will hold the inclusive range of container
796
 * numbers, counting from zero.
797
 *
798
 * @return
799
 * Returns the number of containers, equivalent to *last-*first+1.
800
 */
801
HTSLIB_EXPORT
802
int64_t cram_num_containers_between(cram_fd *fd,
803
                                    off_t cstart, off_t cend,
804
                                    int64_t *first, int64_t *last);
805
806
/*! Returns the byte offset for the start of the n^th container.
807
 *
808
 * The index must have previously been loaded, otherwise <0 is returned.
809
 */
810
HTSLIB_EXPORT
811
off_t cram_container_num2offset(cram_fd *fd, int64_t n);
812
813
/*! Returns the container number for the first container at offset >= pos.
814
 *
815
 * The index must have previously been loaded, otherwise <0 is returned.
816
 */
817
HTSLIB_EXPORT
818
int64_t cram_container_offset2num(cram_fd *fd, off_t pos);
819
820
/**@}*/
821
822
#ifdef __cplusplus
823
}
824
#endif
825
826
#endif