/src/htslib/cram/cram_io.h
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | Copyright (c) 2012-2020 Genome Research Ltd. |
3 | | Author: James Bonfield <jkb@sanger.ac.uk> |
4 | | |
5 | | Redistribution and use in source and binary forms, with or without |
6 | | modification, are permitted provided that the following conditions are met: |
7 | | |
8 | | 1. Redistributions of source code must retain the above copyright notice, |
9 | | this list of conditions and the following disclaimer. |
10 | | |
11 | | 2. Redistributions in binary form must reproduce the above copyright notice, |
12 | | this list of conditions and the following disclaimer in the documentation |
13 | | and/or other materials provided with the distribution. |
14 | | |
15 | | 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger |
16 | | Institute nor the names of its contributors may be used to endorse or promote |
17 | | products derived from this software without specific prior written permission. |
18 | | |
19 | | THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND |
20 | | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED |
21 | | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE |
22 | | DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE |
23 | | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
24 | | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR |
25 | | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER |
26 | | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, |
27 | | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
28 | | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
29 | | */ |
30 | | |
31 | | /*! \file |
32 | | * Include cram.h instead. |
33 | | * |
34 | | * This is an internal part of the CRAM system and is automatically included |
35 | | * when you #include cram.h. |
36 | | * |
37 | | * Implements the low level CRAM I/O primitives. |
38 | | * This includes basic data types such as byte, int, ITF-8, |
39 | | * maps, bitwise I/O, etc. |
40 | | */ |
41 | | |
42 | | #ifndef CRAM_IO_H |
43 | | #define CRAM_IO_H |
44 | | |
45 | | #include <stdint.h> |
46 | | |
47 | | #include "misc.h" |
48 | | |
49 | | #ifdef __cplusplus |
50 | | extern "C" { |
51 | | #endif |
52 | | |
53 | | /**@{ ---------------------------------------------------------------------- |
54 | | * ITF8 encoding and decoding. |
55 | | * |
56 | | * Also see the itf8_get and itf8_put macros. |
57 | | */ |
58 | | |
59 | | /*! INTERNAL: Converts two characters into an integer for use in switch{} */ |
60 | 19.8k | #define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b))) |
61 | | |
62 | | /*! Reads an integer in ITF-8 encoding from 'fd' and stores it in |
63 | | * *val. |
64 | | * |
65 | | * @return |
66 | | * Returns the number of bytes read on success; |
67 | | * -1 on failure |
68 | | */ |
69 | | int itf8_decode(cram_fd *fd, int32_t *val); |
70 | | |
71 | | extern const int itf8_bytes[16]; |
72 | | extern const int ltf8_bytes[256]; |
73 | | |
74 | | /*! Pushes a value in ITF8 format onto the end of a block. |
75 | | * |
76 | | * This shouldn't be used for high-volume data as it is not the fastest |
77 | | * method. |
78 | | * |
79 | | * @return |
80 | | * Returns the number of bytes written |
81 | | */ |
82 | | int itf8_put_blk(cram_block *blk, int32_t val); |
83 | | int ltf8_put_blk(cram_block *blk, int64_t val); |
84 | | |
85 | | /*! Pulls a literal 32-bit value from a block. |
86 | | * |
87 | | * @returns the number of bytes decoded; |
88 | | * -1 on failure. |
89 | | */ |
90 | | int int32_get_blk(cram_block *b, int32_t *val); |
91 | | |
92 | | /*! Pushes a literal 32-bit value onto the end of a block. |
93 | | * |
94 | | * @return |
95 | | * Returns 0 on success; |
96 | | * -1 on failure. |
97 | | */ |
98 | | int int32_put_blk(cram_block *blk, int32_t val); |
99 | | |
100 | | |
101 | | /**@}*/ |
102 | | /**@{ ---------------------------------------------------------------------- |
103 | | * CRAM blocks - the dynamically growable data block. We have code to |
104 | | * create, update, (un)compress and read/write. |
105 | | * |
106 | | * These are derived from the deflate_interlaced.c blocks, but with the |
107 | | * CRAM extension of content types and IDs. |
108 | | */ |
109 | | |
110 | | /*! Allocates a new cram_block structure with a specified content_type and |
111 | | * id. |
112 | | * |
113 | | * @return |
114 | | * Returns block pointer on success; |
115 | | * NULL on failure |
116 | | */ |
117 | | cram_block *cram_new_block(enum cram_content_type content_type, |
118 | | int content_id); |
119 | | |
120 | | /*! Reads a block from a cram file. |
121 | | * |
122 | | * @return |
123 | | * Returns cram_block pointer on success; |
124 | | * NULL on failure |
125 | | */ |
126 | | cram_block *cram_read_block(cram_fd *fd); |
127 | | |
128 | | /*! Writes a CRAM block. |
129 | | * |
130 | | * @return |
131 | | * Returns 0 on success; |
132 | | * -1 on failure |
133 | | */ |
134 | | int cram_write_block(cram_fd *fd, cram_block *b); |
135 | | |
136 | | /*! Frees a CRAM block, deallocating internal data too. |
137 | | */ |
138 | | void cram_free_block(cram_block *b); |
139 | | |
140 | | /*! Uncompress a memory block using Zlib. |
141 | | * |
142 | | * @return |
143 | | * Returns 0 on success; |
144 | | * -1 on failure |
145 | | */ |
146 | | char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size); |
147 | | |
148 | | /*! Uncompresses a CRAM block, if compressed. |
149 | | * |
150 | | * @return |
151 | | * Returns 0 on success; |
152 | | * -1 on failure |
153 | | */ |
154 | | int cram_uncompress_block(cram_block *b); |
155 | | |
156 | | /*! Compresses a block. |
157 | | * |
158 | | * Compresses a block using one of two different zlib strategies. If we only |
159 | | * want one choice set strat2 to be -1. |
160 | | * |
161 | | * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED |
162 | | * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is |
163 | | * significantly faster. |
164 | | * |
165 | | * @return |
166 | | * Returns 0 on success; |
167 | | * -1 on failure |
168 | | */ |
169 | | int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics, |
170 | | int method, int level); |
171 | | int cram_compress_block2(cram_fd *fd, cram_slice *s, |
172 | | cram_block *b, cram_metrics *metrics, |
173 | | int method, int level); |
174 | | |
175 | | cram_metrics *cram_new_metrics(void); |
176 | | char *cram_block_method2str(enum cram_block_method_int m); |
177 | | char *cram_content_type2str(enum cram_content_type t); |
178 | | |
179 | | /* |
180 | | * Find an external block by its content_id |
181 | | */ |
182 | | |
183 | 1.82k | static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) { |
184 | | //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id); |
185 | 1.82k | uint32_t v = id; |
186 | 1.82k | if (slice->block_by_id && v < 256) { |
187 | 1.82k | return slice->block_by_id[v]; |
188 | 1.82k | } else { |
189 | 4 | v = 256 + v % 251; |
190 | 4 | if (slice->block_by_id && |
191 | 4 | slice->block_by_id[v] && |
192 | 4 | slice->block_by_id[v]->content_id == id) |
193 | 0 | return slice->block_by_id[v]; |
194 | | |
195 | | // Otherwise a linear search in case of collision |
196 | 4 | int i; |
197 | 288 | for (i = 0; i < slice->hdr->num_blocks; i++) { |
198 | 284 | cram_block *b = slice->block[i]; |
199 | 284 | if (b && b->content_type == EXTERNAL && b->content_id == id) |
200 | 0 | return b; |
201 | 284 | } |
202 | 4 | } |
203 | 4 | return NULL; |
204 | 1.82k | } Unexecuted instantiation: hts.c:cram_get_block_by_id Unexecuted instantiation: sam.c:cram_get_block_by_id cram_decode.c:cram_get_block_by_id Line | Count | Source | 183 | 1.82k | static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) { | 184 | | //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id); | 185 | 1.82k | uint32_t v = id; | 186 | 1.82k | if (slice->block_by_id && v < 256) { | 187 | 1.82k | return slice->block_by_id[v]; | 188 | 1.82k | } else { | 189 | 4 | v = 256 + v % 251; | 190 | 4 | if (slice->block_by_id && | 191 | 4 | slice->block_by_id[v] && | 192 | 4 | slice->block_by_id[v]->content_id == id) | 193 | 0 | return slice->block_by_id[v]; | 194 | | | 195 | | // Otherwise a linear search in case of collision | 196 | 4 | int i; | 197 | 288 | for (i = 0; i < slice->hdr->num_blocks; i++) { | 198 | 284 | cram_block *b = slice->block[i]; | 199 | 284 | if (b && b->content_type == EXTERNAL && b->content_id == id) | 200 | 0 | return b; | 201 | 284 | } | 202 | 4 | } | 203 | 4 | return NULL; | 204 | 1.82k | } |
Unexecuted instantiation: cram_encode.c:cram_get_block_by_id Unexecuted instantiation: cram_index.c:cram_get_block_by_id Unexecuted instantiation: cram_io.c:cram_get_block_by_id Unexecuted instantiation: cram_stats.c:cram_get_block_by_id Unexecuted instantiation: cram_codecs.c:cram_get_block_by_id |
205 | | |
206 | | /* --- Accessor macros for manipulating blocks on a byte by byte basis --- */ |
207 | | |
208 | | /* Block size and data pointer. */ |
209 | 56 | #define BLOCK_SIZE(b) ((b)->byte) |
210 | 3.66k | #define BLOCK_DATA(b) ((b)->data) |
211 | | |
212 | | /* Returns the address one past the end of the block */ |
213 | 28 | #define BLOCK_END(b) (&(b)->data[(b)->byte]) |
214 | | |
215 | | /* Make block exactly 'l' bytes long */ |
216 | 7.28k | static inline int block_resize_exact(cram_block *b, size_t len) { |
217 | 7.28k | unsigned char *tmp = realloc(b->data, len); |
218 | 7.28k | if (!tmp) |
219 | 0 | return -1; |
220 | 7.28k | b->alloc = len; |
221 | 7.28k | b->data = tmp; |
222 | 7.28k | return 0; |
223 | 7.28k | } Unexecuted instantiation: hts.c:block_resize_exact Unexecuted instantiation: sam.c:block_resize_exact cram_decode.c:block_resize_exact Line | Count | Source | 216 | 7.28k | static inline int block_resize_exact(cram_block *b, size_t len) { | 217 | 7.28k | unsigned char *tmp = realloc(b->data, len); | 218 | 7.28k | if (!tmp) | 219 | 0 | return -1; | 220 | 7.28k | b->alloc = len; | 221 | 7.28k | b->data = tmp; | 222 | 7.28k | return 0; | 223 | 7.28k | } |
Unexecuted instantiation: cram_encode.c:block_resize_exact Unexecuted instantiation: cram_index.c:block_resize_exact Unexecuted instantiation: cram_io.c:block_resize_exact Unexecuted instantiation: cram_stats.c:block_resize_exact Unexecuted instantiation: cram_codecs.c:block_resize_exact |
224 | | |
225 | | /* Request block to be at least 'l' bytes long */ |
226 | 0 | static inline int block_resize(cram_block *b, size_t len) { |
227 | 0 | if (b->alloc > len) |
228 | 0 | return 0; |
229 | | |
230 | 0 | size_t alloc = b->alloc; |
231 | 0 | while (alloc <= len) |
232 | 0 | alloc = alloc ? alloc + (alloc>>2) : 1024; |
233 | |
|
234 | 0 | return block_resize_exact(b, alloc); |
235 | 0 | } Unexecuted instantiation: hts.c:block_resize Unexecuted instantiation: sam.c:block_resize Unexecuted instantiation: cram_decode.c:block_resize Unexecuted instantiation: cram_encode.c:block_resize Unexecuted instantiation: cram_index.c:block_resize Unexecuted instantiation: cram_io.c:block_resize Unexecuted instantiation: cram_stats.c:block_resize Unexecuted instantiation: cram_codecs.c:block_resize |
236 | | |
237 | | |
238 | | /* Ensure the block can hold at least another 'l' bytes */ |
239 | 0 | static inline int block_grow(cram_block *b, size_t len) { |
240 | 0 | return block_resize(b, BLOCK_SIZE(b) + len); |
241 | 0 | } Unexecuted instantiation: hts.c:block_grow Unexecuted instantiation: sam.c:block_grow Unexecuted instantiation: cram_decode.c:block_grow Unexecuted instantiation: cram_encode.c:block_grow Unexecuted instantiation: cram_index.c:block_grow Unexecuted instantiation: cram_io.c:block_grow Unexecuted instantiation: cram_stats.c:block_grow Unexecuted instantiation: cram_codecs.c:block_grow |
242 | | |
243 | | /* Append string 's' of length 'l'. */ |
244 | 0 | static inline int block_append(cram_block *b, const void *s, size_t len) { |
245 | 0 | if (block_grow(b, len) < 0) |
246 | 0 | return -1; |
247 | | |
248 | 0 | if (len) { |
249 | 0 | memcpy(BLOCK_END(b), s, len); |
250 | 0 | BLOCK_SIZE(b) += len; |
251 | 0 | } |
252 | |
|
253 | 0 | return 0; |
254 | 0 | } Unexecuted instantiation: hts.c:block_append Unexecuted instantiation: sam.c:block_append Unexecuted instantiation: cram_decode.c:block_append Unexecuted instantiation: cram_encode.c:block_append Unexecuted instantiation: cram_index.c:block_append Unexecuted instantiation: cram_io.c:block_append Unexecuted instantiation: cram_stats.c:block_append Unexecuted instantiation: cram_codecs.c:block_append |
255 | | |
256 | | /* Append as single character 'c' */ |
257 | 0 | static inline int block_append_char(cram_block *b, char c) { |
258 | 0 | if (block_grow(b, 1) < 0) |
259 | 0 | return -1; |
260 | | |
261 | 0 | b->data[b->byte++] = c; |
262 | 0 | return 0; |
263 | 0 | } Unexecuted instantiation: hts.c:block_append_char Unexecuted instantiation: sam.c:block_append_char Unexecuted instantiation: cram_decode.c:block_append_char Unexecuted instantiation: cram_encode.c:block_append_char Unexecuted instantiation: cram_index.c:block_append_char Unexecuted instantiation: cram_io.c:block_append_char Unexecuted instantiation: cram_stats.c:block_append_char Unexecuted instantiation: cram_codecs.c:block_append_char |
264 | | |
265 | | /* Append a single unsigned integer */ |
266 | | static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i); |
267 | 0 | static inline int block_append_uint(cram_block *b, unsigned int i) { |
268 | 0 | if (block_grow(b, 11) < 0) |
269 | 0 | return -1; |
270 | | |
271 | 0 | unsigned char *cp = &b->data[b->byte]; |
272 | 0 | b->byte += append_uint32(cp, i) - cp; |
273 | 0 | return 0; |
274 | 0 | } Unexecuted instantiation: hts.c:block_append_uint Unexecuted instantiation: sam.c:block_append_uint Unexecuted instantiation: cram_decode.c:block_append_uint Unexecuted instantiation: cram_encode.c:block_append_uint Unexecuted instantiation: cram_index.c:block_append_uint Unexecuted instantiation: cram_io.c:block_append_uint Unexecuted instantiation: cram_stats.c:block_append_uint Unexecuted instantiation: cram_codecs.c:block_append_uint |
275 | | |
276 | | // Versions of above with built in goto block_err calls. |
277 | 7.28k | #define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err |
278 | 0 | #define BLOCK_RESIZE(b,l) if (block_resize((b),(l)) <0) goto block_err |
279 | 0 | #define BLOCK_GROW(b,l) if (block_grow((b),(l)) <0) goto block_err |
280 | 0 | #define BLOCK_APPEND(b,s,l) if (block_append((b),(s),(l)) <0) goto block_err |
281 | 0 | #define BLOCK_APPEND_CHAR(b,c) if (block_append_char((b),(c)) <0) goto block_err |
282 | 0 | #define BLOCK_APPEND_UINT(b,i) if (block_append_uint((b),(i)) <0) goto block_err |
283 | | |
284 | 0 | static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) { |
285 | 0 | uint32_t j; |
286 | |
|
287 | 0 | if (i == 0) { |
288 | 0 | *cp++ = '0'; |
289 | 0 | return cp; |
290 | 0 | } |
291 | | |
292 | 0 | if (i < 100) goto b1; |
293 | 0 | if (i < 10000) goto b3; |
294 | 0 | if (i < 1000000) goto b5; |
295 | 0 | if (i < 100000000) goto b7; |
296 | | |
297 | 0 | if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;} |
298 | 0 | if ((j = i / 100000000)) {*cp++ = j + '0'; i -= j*100000000; goto x7;} |
299 | 0 | b7:if ((j = i / 10000000)) {*cp++ = j + '0'; i -= j*10000000; goto x6;} |
300 | 0 | if ((j = i / 1000000)) {*cp++ = j + '0', i -= j*1000000; goto x5;} |
301 | 0 | b5:if ((j = i / 100000)) {*cp++ = j + '0', i -= j*100000; goto x4;} |
302 | 0 | if ((j = i / 10000)) {*cp++ = j + '0', i -= j*10000; goto x3;} |
303 | 0 | b3:if ((j = i / 1000)) {*cp++ = j + '0', i -= j*1000; goto x2;} |
304 | 0 | if ((j = i / 100)) {*cp++ = j + '0', i -= j*100; goto x1;} |
305 | 0 | b1:if ((j = i / 10)) {*cp++ = j + '0', i -= j*10; goto x0;} |
306 | 0 | if (i) *cp++ = i + '0'; |
307 | 0 | return cp; |
308 | | |
309 | 0 | x8: *cp++ = i / 100000000 + '0', i %= 100000000; |
310 | 0 | x7: *cp++ = i / 10000000 + '0', i %= 10000000; |
311 | 0 | x6: *cp++ = i / 1000000 + '0', i %= 1000000; |
312 | 0 | x5: *cp++ = i / 100000 + '0', i %= 100000; |
313 | 0 | x4: *cp++ = i / 10000 + '0', i %= 10000; |
314 | 0 | x3: *cp++ = i / 1000 + '0', i %= 1000; |
315 | 0 | x2: *cp++ = i / 100 + '0', i %= 100; |
316 | 0 | x1: *cp++ = i / 10 + '0', i %= 10; |
317 | 0 | x0: *cp++ = i + '0'; |
318 | |
|
319 | 0 | return cp; |
320 | 0 | } Unexecuted instantiation: hts.c:append_uint32 Unexecuted instantiation: sam.c:append_uint32 Unexecuted instantiation: cram_decode.c:append_uint32 Unexecuted instantiation: cram_encode.c:append_uint32 Unexecuted instantiation: cram_index.c:append_uint32 Unexecuted instantiation: cram_io.c:append_uint32 Unexecuted instantiation: cram_stats.c:append_uint32 Unexecuted instantiation: cram_codecs.c:append_uint32 |
321 | | |
322 | 0 | static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) { |
323 | 0 | *cp++ = i / 100000000 + '0', i %= 100000000; |
324 | 0 | *cp++ = i / 10000000 + '0', i %= 10000000; |
325 | 0 | *cp++ = i / 1000000 + '0', i %= 1000000; |
326 | 0 | *cp++ = i / 100000 + '0', i %= 100000; |
327 | 0 | *cp++ = i / 10000 + '0', i %= 10000; |
328 | 0 | *cp++ = i / 1000 + '0', i %= 1000; |
329 | 0 | *cp++ = i / 100 + '0', i %= 100; |
330 | 0 | *cp++ = i / 10 + '0', i %= 10; |
331 | 0 | *cp++ = i + '0'; |
332 | |
|
333 | 0 | return cp; |
334 | 0 | } Unexecuted instantiation: hts.c:append_sub32 Unexecuted instantiation: sam.c:append_sub32 Unexecuted instantiation: cram_decode.c:append_sub32 Unexecuted instantiation: cram_encode.c:append_sub32 Unexecuted instantiation: cram_index.c:append_sub32 Unexecuted instantiation: cram_io.c:append_sub32 Unexecuted instantiation: cram_stats.c:append_sub32 Unexecuted instantiation: cram_codecs.c:append_sub32 |
335 | | |
336 | 0 | static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) { |
337 | 0 | uint64_t j; |
338 | |
|
339 | 0 | if (i <= 0xffffffff) |
340 | 0 | return append_uint32(cp, i); |
341 | | |
342 | 0 | if ((j = i/1000000000) > 1000000000) { |
343 | 0 | cp = append_uint32(cp, j/1000000000); |
344 | 0 | j %= 1000000000; |
345 | 0 | cp = append_sub32(cp, j); |
346 | 0 | } else { |
347 | 0 | cp = append_uint32(cp, i / 1000000000); |
348 | 0 | } |
349 | 0 | cp = append_sub32(cp, i % 1000000000); |
350 | |
|
351 | 0 | return cp; |
352 | 0 | } Unexecuted instantiation: hts.c:append_uint64 Unexecuted instantiation: sam.c:append_uint64 Unexecuted instantiation: cram_decode.c:append_uint64 Unexecuted instantiation: cram_encode.c:append_uint64 Unexecuted instantiation: cram_index.c:append_uint64 Unexecuted instantiation: cram_io.c:append_uint64 Unexecuted instantiation: cram_stats.c:append_uint64 Unexecuted instantiation: cram_codecs.c:append_uint64 |
353 | | |
354 | | #define BLOCK_UPLEN(b) \ |
355 | 0 | (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b)) |
356 | | |
357 | | /**@}*/ |
358 | | /**@{ ---------------------------------------------------------------------- |
359 | | * Reference sequence handling |
360 | | */ |
361 | | |
362 | | /*! Loads a reference set from fn and stores in the cram_fd. |
363 | | * |
364 | | * @return |
365 | | * Returns 0 on success; |
366 | | * -1 on failure |
367 | | */ |
368 | | int cram_load_reference(cram_fd *fd, char *fn); |
369 | | |
370 | | /*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t. |
371 | | * |
372 | | * Indexes references by the order they appear in a BAM file. This may not |
373 | | * necessarily be the same order they appear in the fasta reference file. |
374 | | * |
375 | | * @return |
376 | | * Returns 0 on success; |
377 | | * -1 on failure |
378 | | */ |
379 | | int refs2id(refs_t *r, sam_hdr_t *hdr); |
380 | | |
381 | | void refs_free(refs_t *r); |
382 | | |
383 | | /*! Returns a portion of a reference sequence from start to end inclusive. |
384 | | * |
385 | | * The returned pointer is owned by the cram_file fd and should not be freed |
386 | | * by the caller. It is valid only until the next cram_get_ref is called |
387 | | * with the same fd parameter (so is thread-safe if given multiple files). |
388 | | * |
389 | | * To return the entire reference sequence, specify start as 1 and end |
390 | | * as 0. |
391 | | * |
392 | | * @return |
393 | | * Returns reference on success; |
394 | | * NULL on failure |
395 | | */ |
396 | | char *cram_get_ref(cram_fd *fd, int id, int start, int end); |
397 | | void cram_ref_incr(refs_t *r, int id); |
398 | | void cram_ref_decr(refs_t *r, int id); |
399 | | /**@}*/ |
400 | | /**@{ ---------------------------------------------------------------------- |
401 | | * Containers |
402 | | */ |
403 | | |
404 | | /*! Creates a new container, specifying the maximum number of slices |
405 | | * and records permitted. |
406 | | * |
407 | | * @return |
408 | | * Returns cram_container ptr on success; |
409 | | * NULL on failure |
410 | | */ |
411 | | cram_container *cram_new_container(int nrec, int nslice); |
412 | | void cram_free_container(cram_container *c); |
413 | | |
414 | | /*! Reads a container header. |
415 | | * |
416 | | * @return |
417 | | * Returns cram_container on success; |
418 | | * NULL on failure or no container left (fd->err == 0). |
419 | | */ |
420 | | cram_container *cram_read_container(cram_fd *fd); |
421 | | |
422 | | /*! Writes a container structure. |
423 | | * |
424 | | * @return |
425 | | * Returns 0 on success; |
426 | | * -1 on failure |
427 | | */ |
428 | | int cram_write_container(cram_fd *fd, cram_container *h); |
429 | | |
430 | | /*! Flushes a container to disk. |
431 | | * |
432 | | * Flushes a completely or partially full container to disk, writing |
433 | | * container structure, header and blocks. This also calls the encoder |
434 | | * functions. |
435 | | * |
436 | | * @return |
437 | | * Returns 0 on success; |
438 | | * -1 on failure |
439 | | */ |
440 | | int cram_flush_container(cram_fd *fd, cram_container *c); |
441 | | int cram_flush_container_mt(cram_fd *fd, cram_container *c); |
442 | | |
443 | | |
444 | | /**@}*/ |
445 | | /**@{ ---------------------------------------------------------------------- |
446 | | * Compression headers; the first part of the container |
447 | | */ |
448 | | |
449 | | /*! Creates a new blank container compression header |
450 | | * |
451 | | * @return |
452 | | * Returns header ptr on success; |
453 | | * NULL on failure |
454 | | */ |
455 | | cram_block_compression_hdr *cram_new_compression_header(void); |
456 | | |
457 | | /*! Frees a cram_block_compression_hdr */ |
458 | | void cram_free_compression_header(cram_block_compression_hdr *hdr); |
459 | | |
460 | | |
461 | | /**@}*/ |
462 | | /**@{ ---------------------------------------------------------------------- |
463 | | * Slices and slice headers |
464 | | */ |
465 | | |
466 | | /*! Frees a slice header */ |
467 | | void cram_free_slice_header(cram_block_slice_hdr *hdr); |
468 | | |
469 | | /*! Frees a slice */ |
470 | | void cram_free_slice(cram_slice *s); |
471 | | |
472 | | /*! Creates a new empty slice in memory, for subsequent writing to |
473 | | * disk. |
474 | | * |
475 | | * @return |
476 | | * Returns cram_slice ptr on success; |
477 | | * NULL on failure |
478 | | */ |
479 | | cram_slice *cram_new_slice(enum cram_content_type type, int nrecs); |
480 | | |
481 | | /*! Loads an entire slice. |
482 | | * |
483 | | * FIXME: In 1.0 the native unit of slices within CRAM is broken |
484 | | * as slices contain references to objects in other slices. |
485 | | * To work around this while keeping the slice oriented outer loop |
486 | | * we read all slices and stitch them together into a fake large |
487 | | * slice instead. |
488 | | * |
489 | | * @return |
490 | | * Returns cram_slice ptr on success; |
491 | | * NULL on failure |
492 | | */ |
493 | | cram_slice *cram_read_slice(cram_fd *fd); |
494 | | |
495 | | |
496 | | |
497 | | /**@}*/ |
498 | | /**@{ ---------------------------------------------------------------------- |
499 | | * CRAM file definition (header) |
500 | | */ |
501 | | |
502 | | /*! Reads a CRAM file definition structure. |
503 | | * |
504 | | * @return |
505 | | * Returns file_def ptr on success; |
506 | | * NULL on failure |
507 | | */ |
508 | | cram_file_def *cram_read_file_def(cram_fd *fd); |
509 | | |
510 | | /*! Writes a cram_file_def structure to cram_fd. |
511 | | * |
512 | | * @return |
513 | | * Returns 0 on success; |
514 | | * -1 on failure |
515 | | */ |
516 | | int cram_write_file_def(cram_fd *fd, cram_file_def *def); |
517 | | |
518 | | /*! Frees a cram_file_def structure. */ |
519 | | void cram_free_file_def(cram_file_def *def); |
520 | | |
521 | | |
522 | | /**@}*/ |
523 | | /**@{ ---------------------------------------------------------------------- |
524 | | * SAM header I/O |
525 | | */ |
526 | | |
527 | | /*! Reads the SAM header from the first CRAM data block. |
528 | | * |
529 | | * Also performs minimal parsing to extract read-group |
530 | | * and sample information. |
531 | | * |
532 | | * @return |
533 | | * Returns SAM hdr ptr on success; |
534 | | * NULL on failure |
535 | | */ |
536 | | sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd); |
537 | | |
538 | | /*! Writes a CRAM SAM header. |
539 | | * |
540 | | * @return |
541 | | * Returns 0 on success; |
542 | | * -1 on failure |
543 | | */ |
544 | | int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr); |
545 | | |
546 | | |
547 | | /**@}*/ |
548 | | /**@{ ---------------------------------------------------------------------- |
549 | | * The top-level cram opening, closing and option handling |
550 | | */ |
551 | | |
552 | | /*! Opens a CRAM file for read (mode "rb") or write ("wb"). |
553 | | * |
554 | | * The filename may be "-" to indicate stdin or stdout. |
555 | | * |
556 | | * @return |
557 | | * Returns file handle on success; |
558 | | * NULL on failure. |
559 | | */ |
560 | | cram_fd *cram_open(const char *filename, const char *mode); |
561 | | |
562 | | /*! Opens an existing stream for reading or writing. |
563 | | * |
564 | | * @return |
565 | | * Returns file handle on success; |
566 | | * NULL on failure. |
567 | | */ |
568 | | cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode); |
569 | | |
570 | | /*! Closes a CRAM file. |
571 | | * |
572 | | * @return |
573 | | * Returns 0 on success; |
574 | | * -1 on failure |
575 | | */ |
576 | | int cram_close(cram_fd *fd); |
577 | | |
578 | | /* |
579 | | * Seek within a CRAM file. |
580 | | * |
581 | | * Returns 0 on success |
582 | | * -1 on failure |
583 | | */ |
584 | | int cram_seek(cram_fd *fd, off_t offset, int whence); |
585 | | |
586 | | /* |
587 | | * Flushes a CRAM file. |
588 | | * Useful for when writing to stdout without wishing to close the stream. |
589 | | * |
590 | | * Returns 0 on success |
591 | | * -1 on failure |
592 | | */ |
593 | | int cram_flush(cram_fd *fd); |
594 | | |
595 | | /*! Checks for end of file on a cram_fd stream. |
596 | | * |
597 | | * @return |
598 | | * Returns 0 if not at end of file |
599 | | * 1 if we hit an expected EOF (end of range or EOF block) |
600 | | * 2 for other EOF (end of stream without EOF block) |
601 | | */ |
602 | | int cram_eof(cram_fd *fd); |
603 | | |
604 | | /*! Sets options on the cram_fd. |
605 | | * |
606 | | * See CRAM_OPT_* definitions in cram_structs.h. |
607 | | * Use this immediately after opening. |
608 | | * |
609 | | * @return |
610 | | * Returns 0 on success; |
611 | | * -1 on failure |
612 | | */ |
613 | | int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...); |
614 | | |
615 | | /*! Sets options on the cram_fd. |
616 | | * |
617 | | * See CRAM_OPT_* definitions in cram_structs.h. |
618 | | * Use this immediately after opening. |
619 | | * |
620 | | * @return |
621 | | * Returns 0 on success; |
622 | | * -1 on failure |
623 | | */ |
624 | | int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args); |
625 | | |
626 | | /*! |
627 | | * Attaches a header to a cram_fd. |
628 | | * |
629 | | * This should be used when creating a new cram_fd for writing where |
630 | | * we have an sam_hdr_t already constructed (eg from a file we've read |
631 | | * in). |
632 | | * |
633 | | * @return |
634 | | * Returns 0 on success; |
635 | | * -1 on failure |
636 | | */ |
637 | | int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr); |
638 | | |
639 | | /*! |
640 | | * Returns the hFILE connected to a cram_fd. |
641 | | */ |
642 | 0 | static inline struct hFILE *cram_hfile(cram_fd *fd) { |
643 | 0 | return fd->fp; |
644 | 0 | } Unexecuted instantiation: hts.c:cram_hfile Unexecuted instantiation: sam.c:cram_hfile Unexecuted instantiation: cram_decode.c:cram_hfile Unexecuted instantiation: cram_encode.c:cram_hfile Unexecuted instantiation: cram_index.c:cram_hfile Unexecuted instantiation: cram_io.c:cram_hfile Unexecuted instantiation: cram_stats.c:cram_hfile Unexecuted instantiation: cram_codecs.c:cram_hfile |
645 | | |
646 | | #ifdef __cplusplus |
647 | | } |
648 | | #endif |
649 | | |
650 | | #endif /* CRAM_IO_H */ |