Coverage Report

Created: 2025-08-29 06:39

/src/htslib/cram/cram_io.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2012-2020 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
   1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
   2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
/*! \file
32
 * Include cram.h instead.
33
 *
34
 * This is an internal part of the CRAM system and is automatically included
35
 * when you #include cram.h.
36
 *
37
 * Implements the low level CRAM I/O primitives.
38
 * This includes basic data types such as byte, int, ITF-8,
39
 * maps, bitwise I/O, etc.
40
 */
41
42
#ifndef CRAM_IO_H
43
#define CRAM_IO_H
44
45
#include <stdint.h>
46
47
#include "misc.h"
48
49
#ifdef __cplusplus
50
extern "C" {
51
#endif
52
53
/**@{ ----------------------------------------------------------------------
54
 * ITF8 encoding and decoding.
55
 *
56
 * Also see the itf8_get and itf8_put macros.
57
 */
58
59
/*! INTERNAL: Converts two characters into an integer for use in switch{} */
60
254k
#define CRAM_KEY(a,b) ((((unsigned char) a)<<8)|(((unsigned char) b)))
61
62
/*! Reads an integer in ITF-8 encoding from 'fd' and stores it in
63
 * *val.
64
 *
65
 * @return
66
 * Returns the number of bytes read on success;
67
 *        -1 on failure
68
 */
69
int itf8_decode(cram_fd *fd, int32_t *val);
70
71
extern const int itf8_bytes[16];
72
extern const int ltf8_bytes[256];
73
74
/*! Pushes a value in ITF8 format onto the end of a block.
75
 *
76
 * This shouldn't be used for high-volume data as it is not the fastest
77
 * method.
78
 *
79
 * @return
80
 * Returns the number of bytes written
81
 */
82
int itf8_put_blk(cram_block *blk, int32_t val);
83
int ltf8_put_blk(cram_block *blk, int64_t val);
84
85
/*! Pulls a literal 32-bit value from a block.
86
 *
87
 * @returns the number of bytes decoded;
88
 *         -1 on failure.
89
 */
90
int int32_get_blk(cram_block *b, int32_t *val);
91
92
/*! Pushes a literal 32-bit value onto the end of a block.
93
 *
94
 * @return
95
 * Returns 0 on success;
96
 *        -1 on failure.
97
 */
98
int int32_put_blk(cram_block *blk, int32_t val);
99
100
101
/**@}*/
102
/**@{ ----------------------------------------------------------------------
103
 * CRAM blocks - the dynamically growable data block. We have code to
104
 * create, update, (un)compress and read/write.
105
 *
106
 * These are derived from the deflate_interlaced.c blocks, but with the
107
 * CRAM extension of content types and IDs.
108
 */
109
110
/*! Allocates a new cram_block structure with a specified content_type and
111
 * id.
112
 *
113
 * @return
114
 * Returns block pointer on success;
115
 *         NULL on failure
116
 */
117
cram_block *cram_new_block(enum cram_content_type content_type,
118
                           int content_id);
119
120
/*! Reads a block from a cram file.
121
 *
122
 * @return
123
 * Returns cram_block pointer on success;
124
 *         NULL on failure
125
 */
126
cram_block *cram_read_block(cram_fd *fd);
127
128
/*! Writes a CRAM block.
129
 *
130
 * @return
131
 * Returns 0 on success;
132
 *        -1 on failure
133
 */
134
int cram_write_block(cram_fd *fd, cram_block *b);
135
136
/*! Frees a CRAM block, deallocating internal data too.
137
 */
138
void cram_free_block(cram_block *b);
139
140
/*! Uncompress a memory block using Zlib.
141
 *
142
 * @return
143
 * Returns 0 on success;
144
 *        -1 on failure
145
 */
146
char *zlib_mem_inflate(char *cdata, size_t csize, size_t *size);
147
148
/*! Uncompresses a CRAM block, if compressed.
149
 *
150
 * @return
151
 * Returns 0 on success;
152
 *        -1 on failure
153
 */
154
int cram_uncompress_block(cram_block *b);
155
156
/*! Compresses a block.
157
 *
158
 * Compresses a block using one of two different zlib strategies. If we only
159
 * want one choice set strat2 to be -1.
160
 *
161
 * The logic here is that sometimes Z_RLE does a better job than Z_FILTERED
162
 * or Z_DEFAULT_STRATEGY on quality data. If so, we'd rather use it as it is
163
 * significantly faster.
164
 *
165
 * @return
166
 * Returns 0 on success;
167
 *        -1 on failure
168
 */
169
int cram_compress_block(cram_fd *fd, cram_block *b, cram_metrics *metrics,
170
                        int method, int level);
171
int cram_compress_block2(cram_fd *fd, cram_slice *s,
172
                         cram_block *b, cram_metrics *metrics,
173
                         int method, int level);
174
175
cram_metrics *cram_new_metrics(void);
176
char *cram_block_method2str(enum cram_block_method_int m);
177
char *cram_content_type2str(enum cram_content_type t);
178
179
/*
180
 * Find an external block by its content_id
181
 */
182
183
245
static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) {
184
  //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id);
185
245
    uint32_t v = id;
186
245
    if (slice->block_by_id && v < 256) {
187
245
        return slice->block_by_id[v];
188
245
    } else {
189
0
        v = 256 + v % 251;
190
0
        if (slice->block_by_id &&
191
0
            slice->block_by_id[v] &&
192
0
            slice->block_by_id[v]->content_id == id)
193
0
            return slice->block_by_id[v];
194
195
        // Otherwise a linear search in case of collision
196
0
        int i;
197
0
        for (i = 0; i < slice->hdr->num_blocks; i++) {
198
0
            cram_block *b = slice->block[i];
199
0
            if (b && b->content_type == EXTERNAL && b->content_id == id)
200
0
                return b;
201
0
        }
202
0
    }
203
0
    return NULL;
204
245
}
Unexecuted instantiation: hts.c:cram_get_block_by_id
Unexecuted instantiation: sam.c:cram_get_block_by_id
cram_decode.c:cram_get_block_by_id
Line
Count
Source
183
245
static inline cram_block *cram_get_block_by_id(cram_slice *slice, int id) {
184
  //fprintf(stderr, "%d\t%p\n", id, slice->block_by_id);
185
245
    uint32_t v = id;
186
245
    if (slice->block_by_id && v < 256) {
187
245
        return slice->block_by_id[v];
188
245
    } else {
189
0
        v = 256 + v % 251;
190
0
        if (slice->block_by_id &&
191
0
            slice->block_by_id[v] &&
192
0
            slice->block_by_id[v]->content_id == id)
193
0
            return slice->block_by_id[v];
194
195
        // Otherwise a linear search in case of collision
196
0
        int i;
197
0
        for (i = 0; i < slice->hdr->num_blocks; i++) {
198
0
            cram_block *b = slice->block[i];
199
0
            if (b && b->content_type == EXTERNAL && b->content_id == id)
200
0
                return b;
201
0
        }
202
0
    }
203
0
    return NULL;
204
245
}
Unexecuted instantiation: cram_encode.c:cram_get_block_by_id
Unexecuted instantiation: cram_index.c:cram_get_block_by_id
Unexecuted instantiation: cram_io.c:cram_get_block_by_id
Unexecuted instantiation: cram_stats.c:cram_get_block_by_id
Unexecuted instantiation: cram_codecs.c:cram_get_block_by_id
205
206
/* --- Accessor macros for manipulating blocks on a byte by byte basis --- */
207
208
/* Block size and data pointer. */
209
58.8M
#define BLOCK_SIZE(b) ((b)->byte)
210
4.46M
#define BLOCK_DATA(b) ((b)->data)
211
212
/* Returns the address one past the end of the block */
213
13.4M
#define BLOCK_END(b) (&(b)->data[(b)->byte])
214
215
/* Make block exactly 'l' bytes long */
216
31.1M
static inline int block_resize_exact(cram_block *b, size_t len) {
217
31.1M
    unsigned char *tmp = realloc(b->data, len);
218
31.1M
    if (!tmp)
219
0
        return -1;
220
31.1M
    b->alloc = len;
221
31.1M
    b->data = tmp;
222
31.1M
    return 0;
223
31.1M
}
Unexecuted instantiation: hts.c:block_resize_exact
Unexecuted instantiation: sam.c:block_resize_exact
cram_decode.c:block_resize_exact
Line
Count
Source
216
1.16k
static inline int block_resize_exact(cram_block *b, size_t len) {
217
1.16k
    unsigned char *tmp = realloc(b->data, len);
218
1.16k
    if (!tmp)
219
0
        return -1;
220
1.16k
    b->alloc = len;
221
1.16k
    b->data = tmp;
222
1.16k
    return 0;
223
1.16k
}
cram_encode.c:block_resize_exact
Line
Count
Source
216
25.2M
static inline int block_resize_exact(cram_block *b, size_t len) {
217
25.2M
    unsigned char *tmp = realloc(b->data, len);
218
25.2M
    if (!tmp)
219
0
        return -1;
220
25.2M
    b->alloc = len;
221
25.2M
    b->data = tmp;
222
25.2M
    return 0;
223
25.2M
}
Unexecuted instantiation: cram_index.c:block_resize_exact
cram_io.c:block_resize_exact
Line
Count
Source
216
5.16M
static inline int block_resize_exact(cram_block *b, size_t len) {
217
5.16M
    unsigned char *tmp = realloc(b->data, len);
218
5.16M
    if (!tmp)
219
0
        return -1;
220
5.16M
    b->alloc = len;
221
5.16M
    b->data = tmp;
222
5.16M
    return 0;
223
5.16M
}
Unexecuted instantiation: cram_stats.c:block_resize_exact
cram_codecs.c:block_resize_exact
Line
Count
Source
216
745k
static inline int block_resize_exact(cram_block *b, size_t len) {
217
745k
    unsigned char *tmp = realloc(b->data, len);
218
745k
    if (!tmp)
219
0
        return -1;
220
745k
    b->alloc = len;
221
745k
    b->data = tmp;
222
745k
    return 0;
223
745k
}
224
225
/* Request block to be at least 'l' bytes long */
226
31.7M
static inline int block_resize(cram_block *b, size_t len) {
227
31.7M
    if (b->alloc > len)
228
577k
        return 0;
229
230
31.1M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231
    // Removal of extra padding causes many more reallocs, but detects
232
    // more buffer overruns.
233
31.1M
    return block_resize_exact(b, len?len:1);
234
0
#endif
235
236
0
    size_t alloc = b->alloc+800;
237
0
    alloc = MAX(alloc + (alloc>>2), len);
238
0
    return block_resize_exact(b, alloc);
239
31.7M
}
Unexecuted instantiation: hts.c:block_resize
Unexecuted instantiation: sam.c:block_resize
cram_decode.c:block_resize
Line
Count
Source
226
201
static inline int block_resize(cram_block *b, size_t len) {
227
201
    if (b->alloc > len)
228
0
        return 0;
229
230
201
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231
    // Removal of extra padding causes many more reallocs, but detects
232
    // more buffer overruns.
233
201
    return block_resize_exact(b, len?len:1);
234
0
#endif
235
236
0
    size_t alloc = b->alloc+800;
237
0
    alloc = MAX(alloc + (alloc>>2), len);
238
0
    return block_resize_exact(b, alloc);
239
201
}
cram_encode.c:block_resize
Line
Count
Source
226
25.3M
static inline int block_resize(cram_block *b, size_t len) {
227
25.3M
    if (b->alloc > len)
228
167k
        return 0;
229
230
25.2M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231
    // Removal of extra padding causes many more reallocs, but detects
232
    // more buffer overruns.
233
25.2M
    return block_resize_exact(b, len?len:1);
234
0
#endif
235
236
0
    size_t alloc = b->alloc+800;
237
0
    alloc = MAX(alloc + (alloc>>2), len);
238
0
    return block_resize_exact(b, alloc);
239
25.3M
}
Unexecuted instantiation: cram_index.c:block_resize
cram_io.c:block_resize
Line
Count
Source
226
5.38M
static inline int block_resize(cram_block *b, size_t len) {
227
5.38M
    if (b->alloc > len)
228
220k
        return 0;
229
230
5.16M
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231
    // Removal of extra padding causes many more reallocs, but detects
232
    // more buffer overruns.
233
5.16M
    return block_resize_exact(b, len?len:1);
234
0
#endif
235
236
0
    size_t alloc = b->alloc+800;
237
0
    alloc = MAX(alloc + (alloc>>2), len);
238
0
    return block_resize_exact(b, alloc);
239
5.38M
}
Unexecuted instantiation: cram_stats.c:block_resize
cram_codecs.c:block_resize
Line
Count
Source
226
935k
static inline int block_resize(cram_block *b, size_t len) {
227
935k
    if (b->alloc > len)
228
189k
        return 0;
229
230
745k
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
231
    // Removal of extra padding causes many more reallocs, but detects
232
    // more buffer overruns.
233
745k
    return block_resize_exact(b, len?len:1);
234
0
#endif
235
236
0
    size_t alloc = b->alloc+800;
237
0
    alloc = MAX(alloc + (alloc>>2), len);
238
0
    return block_resize_exact(b, alloc);
239
935k
}
240
241
242
/* Ensure the block can hold at least another 'l' bytes */
243
31.7M
static inline int block_grow(cram_block *b, size_t len) {
244
31.7M
    return block_resize(b, BLOCK_SIZE(b) + len);
245
31.7M
}
Unexecuted instantiation: hts.c:block_grow
Unexecuted instantiation: sam.c:block_grow
cram_decode.c:block_grow
Line
Count
Source
243
201
static inline int block_grow(cram_block *b, size_t len) {
244
201
    return block_resize(b, BLOCK_SIZE(b) + len);
245
201
}
cram_encode.c:block_grow
Line
Count
Source
243
25.3M
static inline int block_grow(cram_block *b, size_t len) {
244
25.3M
    return block_resize(b, BLOCK_SIZE(b) + len);
245
25.3M
}
Unexecuted instantiation: cram_index.c:block_grow
cram_io.c:block_grow
Line
Count
Source
243
5.37M
static inline int block_grow(cram_block *b, size_t len) {
244
5.37M
    return block_resize(b, BLOCK_SIZE(b) + len);
245
5.37M
}
Unexecuted instantiation: cram_stats.c:block_grow
cram_codecs.c:block_grow
Line
Count
Source
243
935k
static inline int block_grow(cram_block *b, size_t len) {
244
935k
    return block_resize(b, BLOCK_SIZE(b) + len);
245
935k
}
246
247
/* Append string 's' of length 'l'. */
248
8.93M
static inline int block_append(cram_block *b, const void *s, size_t len) {
249
8.93M
    if (block_grow(b, len) < 0)
250
0
        return -1;
251
252
8.93M
    if (len) {
253
8.92M
        memcpy(BLOCK_END(b), s, len);
254
8.92M
        BLOCK_SIZE(b) += len;
255
8.92M
    }
256
257
8.93M
    return 0;
258
8.93M
}
Unexecuted instantiation: hts.c:block_append
Unexecuted instantiation: sam.c:block_append
cram_decode.c:block_append
Line
Count
Source
248
117
static inline int block_append(cram_block *b, const void *s, size_t len) {
249
117
    if (block_grow(b, len) < 0)
250
0
        return -1;
251
252
117
    if (len) {
253
117
        memcpy(BLOCK_END(b), s, len);
254
117
        BLOCK_SIZE(b) += len;
255
117
    }
256
257
117
    return 0;
258
117
}
cram_encode.c:block_append
Line
Count
Source
248
2.68M
static inline int block_append(cram_block *b, const void *s, size_t len) {
249
2.68M
    if (block_grow(b, len) < 0)
250
0
        return -1;
251
252
2.68M
    if (len) {
253
2.67M
        memcpy(BLOCK_END(b), s, len);
254
2.67M
        BLOCK_SIZE(b) += len;
255
2.67M
    }
256
257
2.68M
    return 0;
258
2.68M
}
Unexecuted instantiation: cram_index.c:block_append
cram_io.c:block_append
Line
Count
Source
248
5.37M
static inline int block_append(cram_block *b, const void *s, size_t len) {
249
5.37M
    if (block_grow(b, len) < 0)
250
0
        return -1;
251
252
5.37M
    if (len) {
253
5.37M
        memcpy(BLOCK_END(b), s, len);
254
5.37M
        BLOCK_SIZE(b) += len;
255
5.37M
    }
256
257
5.37M
    return 0;
258
5.37M
}
Unexecuted instantiation: cram_stats.c:block_append
cram_codecs.c:block_append
Line
Count
Source
248
871k
static inline int block_append(cram_block *b, const void *s, size_t len) {
249
871k
    if (block_grow(b, len) < 0)
250
0
        return -1;
251
252
871k
    if (len) {
253
871k
        memcpy(BLOCK_END(b), s, len);
254
871k
        BLOCK_SIZE(b) += len;
255
871k
    }
256
257
871k
    return 0;
258
871k
}
259
260
/* Append as single character 'c' */
261
16.0M
static inline int block_append_char(cram_block *b, char c) {
262
16.0M
    if (block_grow(b, 1) < 0)
263
0
        return -1;
264
265
16.0M
    b->data[b->byte++] = c;
266
16.0M
    return 0;
267
16.0M
}
Unexecuted instantiation: hts.c:block_append_char
Unexecuted instantiation: sam.c:block_append_char
cram_decode.c:block_append_char
Line
Count
Source
261
84
static inline int block_append_char(cram_block *b, char c) {
262
84
    if (block_grow(b, 1) < 0)
263
0
        return -1;
264
265
84
    b->data[b->byte++] = c;
266
84
    return 0;
267
84
}
cram_encode.c:block_append_char
Line
Count
Source
261
15.9M
static inline int block_append_char(cram_block *b, char c) {
262
15.9M
    if (block_grow(b, 1) < 0)
263
0
        return -1;
264
265
15.9M
    b->data[b->byte++] = c;
266
15.9M
    return 0;
267
15.9M
}
Unexecuted instantiation: cram_index.c:block_append_char
Unexecuted instantiation: cram_io.c:block_append_char
Unexecuted instantiation: cram_stats.c:block_append_char
cram_codecs.c:block_append_char
Line
Count
Source
261
64.0k
static inline int block_append_char(cram_block *b, char c) {
262
64.0k
    if (block_grow(b, 1) < 0)
263
0
        return -1;
264
265
64.0k
    b->data[b->byte++] = c;
266
64.0k
    return 0;
267
64.0k
}
268
269
/* Append a single unsigned integer */
270
static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i);
271
0
static inline int block_append_uint(cram_block *b, unsigned int i) {
272
0
    if (block_grow(b, 11) < 0)
273
0
        return -1;
274
275
0
    unsigned char *cp = &b->data[b->byte];
276
0
    b->byte += append_uint32(cp, i) - cp;
277
0
    return 0;
278
0
}
Unexecuted instantiation: hts.c:block_append_uint
Unexecuted instantiation: sam.c:block_append_uint
Unexecuted instantiation: cram_decode.c:block_append_uint
Unexecuted instantiation: cram_encode.c:block_append_uint
Unexecuted instantiation: cram_index.c:block_append_uint
Unexecuted instantiation: cram_io.c:block_append_uint
Unexecuted instantiation: cram_stats.c:block_append_uint
Unexecuted instantiation: cram_codecs.c:block_append_uint
279
280
// Versions of above with built in goto block_err calls.
281
964
#define BLOCK_RESIZE_EXACT(b,l) if (block_resize_exact((b),(l))<0) goto block_err
282
890
#define BLOCK_RESIZE(b,l)       if (block_resize((b),(l))      <0) goto block_err
283
6.71M
#define BLOCK_GROW(b,l)         if (block_grow((b),(l))        <0) goto block_err
284
8.93M
#define BLOCK_APPEND(b,s,l)     if (block_append((b),(s),(l))  <0) goto block_err
285
16.0M
#define BLOCK_APPEND_CHAR(b,c)  if (block_append_char((b),(c)) <0) goto block_err
286
0
#define BLOCK_APPEND_UINT(b,i)  if (block_append_uint((b),(i)) <0) goto block_err
287
288
0
static inline unsigned char *append_uint32(unsigned char *cp, uint32_t i) {
289
0
    uint32_t j;
290
291
0
    if (i == 0) {
292
0
        *cp++ = '0';
293
0
        return cp;
294
0
    }
295
296
0
    if (i < 100)        goto b1;
297
0
    if (i < 10000)      goto b3;
298
0
    if (i < 1000000)    goto b5;
299
0
    if (i < 100000000)  goto b7;
300
301
0
    if ((j = i / 1000000000)) {*cp++ = j + '0'; i -= j*1000000000; goto x8;}
302
0
    if ((j = i / 100000000))  {*cp++ = j + '0'; i -= j*100000000;  goto x7;}
303
0
 b7:if ((j = i / 10000000))   {*cp++ = j + '0'; i -= j*10000000;   goto x6;}
304
0
    if ((j = i / 1000000))    {*cp++ = j + '0', i -= j*1000000;    goto x5;}
305
0
 b5:if ((j = i / 100000))     {*cp++ = j + '0', i -= j*100000;     goto x4;}
306
0
    if ((j = i / 10000))      {*cp++ = j + '0', i -= j*10000;      goto x3;}
307
0
 b3:if ((j = i / 1000))       {*cp++ = j + '0', i -= j*1000;       goto x2;}
308
0
    if ((j = i / 100))        {*cp++ = j + '0', i -= j*100;        goto x1;}
309
0
 b1:if ((j = i / 10))         {*cp++ = j + '0', i -= j*10;         goto x0;}
310
0
    if (i)                     *cp++ = i + '0';
311
0
    return cp;
312
313
0
 x8: *cp++ = i / 100000000 + '0', i %= 100000000;
314
0
 x7: *cp++ = i / 10000000  + '0', i %= 10000000;
315
0
 x6: *cp++ = i / 1000000   + '0', i %= 1000000;
316
0
 x5: *cp++ = i / 100000    + '0', i %= 100000;
317
0
 x4: *cp++ = i / 10000     + '0', i %= 10000;
318
0
 x3: *cp++ = i / 1000      + '0', i %= 1000;
319
0
 x2: *cp++ = i / 100       + '0', i %= 100;
320
0
 x1: *cp++ = i / 10        + '0', i %= 10;
321
0
 x0: *cp++ = i             + '0';
322
323
0
    return cp;
324
0
}
Unexecuted instantiation: hts.c:append_uint32
Unexecuted instantiation: sam.c:append_uint32
Unexecuted instantiation: cram_decode.c:append_uint32
Unexecuted instantiation: cram_encode.c:append_uint32
Unexecuted instantiation: cram_index.c:append_uint32
Unexecuted instantiation: cram_io.c:append_uint32
Unexecuted instantiation: cram_stats.c:append_uint32
Unexecuted instantiation: cram_codecs.c:append_uint32
325
326
0
static inline unsigned char *append_sub32(unsigned char *cp, uint32_t i) {
327
0
    *cp++ = i / 100000000 + '0', i %= 100000000;
328
0
    *cp++ = i / 10000000  + '0', i %= 10000000;
329
0
    *cp++ = i / 1000000   + '0', i %= 1000000;
330
0
    *cp++ = i / 100000    + '0', i %= 100000;
331
0
    *cp++ = i / 10000     + '0', i %= 10000;
332
0
    *cp++ = i / 1000      + '0', i %= 1000;
333
0
    *cp++ = i / 100       + '0', i %= 100;
334
0
    *cp++ = i / 10        + '0', i %= 10;
335
0
    *cp++ = i             + '0';
336
337
0
    return cp;
338
0
}
Unexecuted instantiation: hts.c:append_sub32
Unexecuted instantiation: sam.c:append_sub32
Unexecuted instantiation: cram_decode.c:append_sub32
Unexecuted instantiation: cram_encode.c:append_sub32
Unexecuted instantiation: cram_index.c:append_sub32
Unexecuted instantiation: cram_io.c:append_sub32
Unexecuted instantiation: cram_stats.c:append_sub32
Unexecuted instantiation: cram_codecs.c:append_sub32
339
340
0
static inline unsigned char *append_uint64(unsigned char *cp, uint64_t i) {
341
0
    uint64_t j;
342
343
0
    if (i <= 0xffffffff)
344
0
        return append_uint32(cp, i);
345
346
0
    if ((j = i/1000000000) > 1000000000) {
347
0
        cp = append_uint32(cp, j/1000000000);
348
0
        j %= 1000000000;
349
0
        cp = append_sub32(cp, j);
350
0
    } else {
351
0
        cp = append_uint32(cp, i / 1000000000);
352
0
    }
353
0
    cp = append_sub32(cp, i % 1000000000);
354
355
0
    return cp;
356
0
}
Unexecuted instantiation: hts.c:append_uint64
Unexecuted instantiation: sam.c:append_uint64
Unexecuted instantiation: cram_decode.c:append_uint64
Unexecuted instantiation: cram_encode.c:append_uint64
Unexecuted instantiation: cram_index.c:append_uint64
Unexecuted instantiation: cram_io.c:append_uint64
Unexecuted instantiation: cram_stats.c:append_uint64
Unexecuted instantiation: cram_codecs.c:append_uint64
357
358
#define BLOCK_UPLEN(b) \
359
161k
    (b)->comp_size = (b)->uncomp_size = BLOCK_SIZE((b))
360
361
/**@}*/
362
/**@{ ----------------------------------------------------------------------
363
 * Reference sequence handling
364
 */
365
366
/*! Loads a reference set from fn and stores in the cram_fd.
367
 *
368
 * @return
369
 * Returns 0 on success;
370
 *        -1 on failure
371
 */
372
int cram_load_reference(cram_fd *fd, char *fn);
373
374
/*! Generates a lookup table in refs based on the SQ headers in sam_hdr_t.
375
 *
376
 * Indexes references by the order they appear in a BAM file. This may not
377
 * necessarily be the same order they appear in the fasta reference file.
378
 *
379
 * @return
380
 * Returns 0 on success;
381
 *        -1 on failure
382
 */
383
int refs2id(refs_t *r, sam_hdr_t *hdr);
384
385
void refs_free(refs_t *r);
386
387
/*! Returns a portion of a reference sequence from start to end inclusive.
388
 *
389
 * The returned pointer is owned by the cram_file fd and should not be freed
390
 * by the caller. It is valid only until the next cram_get_ref is called
391
 * with the same fd parameter (so is thread-safe if given multiple files).
392
 *
393
 * To return the entire reference sequence, specify start as 1 and end
394
 * as 0.
395
 *
396
 * @return
397
 * Returns reference on success;
398
 *         NULL on failure
399
 */
400
char *cram_get_ref(cram_fd *fd, int id, hts_pos_t start, hts_pos_t end);
401
void cram_ref_incr(refs_t *r, int id);
402
void cram_ref_decr(refs_t *r, int id);
403
/**@}*/
404
/**@{ ----------------------------------------------------------------------
405
 * Containers
406
 */
407
408
/*! Creates a new container, specifying the maximum number of slices
409
 * and records permitted.
410
 *
411
 * @return
412
 * Returns cram_container ptr on success;
413
 *         NULL on failure
414
 */
415
cram_container *cram_new_container(int nrec, int nslice);
416
void cram_free_container(cram_container *c);
417
418
/*! Reads a container header.
419
 *
420
 * @return
421
 * Returns cram_container on success;
422
 *         NULL on failure or no container left (fd->err == 0).
423
 */
424
cram_container *cram_read_container(cram_fd *fd);
425
426
/*! Writes a container structure.
427
 *
428
 * @return
429
 * Returns 0 on success;
430
 *        -1 on failure
431
 */
432
int cram_write_container(cram_fd *fd, cram_container *h);
433
434
/*! Flushes a container to disk.
435
 *
436
 * Flushes a completely or partially full container to disk, writing
437
 * container structure, header and blocks. This also calls the encoder
438
 * functions.
439
 *
440
 * @return
441
 * Returns 0 on success;
442
 *        -1 on failure
443
 */
444
int cram_flush_container(cram_fd *fd, cram_container *c);
445
int cram_flush_container_mt(cram_fd *fd, cram_container *c);
446
447
448
/**@}*/
449
/**@{ ----------------------------------------------------------------------
450
 * Compression headers; the first part of the container
451
 */
452
453
/*! Creates a new blank container compression header
454
 *
455
 * @return
456
 * Returns header ptr on success;
457
 *         NULL on failure
458
 */
459
cram_block_compression_hdr *cram_new_compression_header(void);
460
461
/*! Frees a cram_block_compression_hdr */
462
void cram_free_compression_header(cram_block_compression_hdr *hdr);
463
464
465
/**@}*/
466
/**@{ ----------------------------------------------------------------------
467
 * Slices and slice headers
468
 */
469
470
/*! Frees a slice header */
471
void cram_free_slice_header(cram_block_slice_hdr *hdr);
472
473
/*! Frees a slice */
474
void cram_free_slice(cram_slice *s);
475
476
/*! Creates a new empty slice in memory, for subsequent writing to
477
 * disk.
478
 *
479
 * @return
480
 * Returns cram_slice ptr on success;
481
 *         NULL on failure
482
 */
483
cram_slice *cram_new_slice(enum cram_content_type type, int nrecs);
484
485
/*! Loads an entire slice.
486
 *
487
 * FIXME: In 1.0 the native unit of slices within CRAM is broken
488
 * as slices contain references to objects in other slices.
489
 * To work around this while keeping the slice oriented outer loop
490
 * we read all slices and stitch them together into a fake large
491
 * slice instead.
492
 *
493
 * @return
494
 * Returns cram_slice ptr on success;
495
 *         NULL on failure
496
 */
497
cram_slice *cram_read_slice(cram_fd *fd);
498
499
500
501
/**@}*/
502
/**@{ ----------------------------------------------------------------------
503
 * CRAM file definition (header)
504
 */
505
506
/*! Reads a CRAM file definition structure.
507
 *
508
 * @return
509
 * Returns file_def ptr on success;
510
 *         NULL on failure
511
 */
512
cram_file_def *cram_read_file_def(cram_fd *fd);
513
514
/*! Writes a cram_file_def structure to cram_fd.
515
 *
516
 * @return
517
 * Returns 0 on success;
518
 *        -1 on failure
519
 */
520
int cram_write_file_def(cram_fd *fd, cram_file_def *def);
521
522
/*! Frees a cram_file_def structure. */
523
void cram_free_file_def(cram_file_def *def);
524
525
526
/**@}*/
527
/**@{ ----------------------------------------------------------------------
528
 * SAM header I/O
529
 */
530
531
/*! Reads the SAM header from the first CRAM data block.
532
 *
533
 * Also performs minimal parsing to extract read-group
534
 * and sample information.
535
 *
536
 * @return
537
 * Returns SAM hdr ptr on success;
538
 *         NULL on failure
539
 */
540
sam_hdr_t *cram_read_SAM_hdr(cram_fd *fd);
541
542
/*! Writes a CRAM SAM header.
543
 *
544
 * @return
545
 * Returns 0 on success;
546
 *        -1 on failure
547
 */
548
int cram_write_SAM_hdr(cram_fd *fd, sam_hdr_t *hdr);
549
550
551
/**@}*/
552
/**@{ ----------------------------------------------------------------------
553
 * The top-level cram opening, closing and option handling
554
 */
555
556
/*! Opens a CRAM file for read (mode "rb") or write ("wb").
557
 *
558
 * The filename may be "-" to indicate stdin or stdout.
559
 *
560
 * @return
561
 * Returns file handle on success;
562
 *         NULL on failure.
563
 */
564
cram_fd *cram_open(const char *filename, const char *mode);
565
566
/*! Opens an existing stream for reading or writing.
567
 *
568
 * @return
569
 * Returns file handle on success;
570
 *         NULL on failure.
571
 */
572
cram_fd *cram_dopen(struct hFILE *fp, const char *filename, const char *mode);
573
574
/*! Closes a CRAM file.
575
 *
576
 * @return
577
 * Returns 0 on success;
578
 *        -1 on failure
579
 */
580
int cram_close(cram_fd *fd);
581
582
/*
583
 * Seek within a CRAM file.
584
 *
585
 * Returns 0 on success
586
 *        -1 on failure
587
 */
588
int cram_seek(cram_fd *fd, off_t offset, int whence);
589
590
/*
591
 * Flushes a CRAM file.
592
 * Useful for when writing to stdout without wishing to close the stream.
593
 *
594
 * Returns 0 on success
595
 *        -1 on failure
596
 */
597
int cram_flush(cram_fd *fd);
598
599
/*! Checks for end of file on a cram_fd stream.
600
 *
601
 * @return
602
 * Returns 0 if not at end of file
603
 *         1 if we hit an expected EOF (end of range or EOF block)
604
 *         2 for other EOF (end of stream without EOF block)
605
 */
606
int cram_eof(cram_fd *fd);
607
608
/*! Sets options on the cram_fd.
609
 *
610
 * See CRAM_OPT_* definitions in cram_structs.h.
611
 * Use this immediately after opening.
612
 *
613
 * @return
614
 * Returns 0 on success;
615
 *        -1 on failure
616
 */
617
int cram_set_option(cram_fd *fd, enum hts_fmt_option opt, ...);
618
619
/*! Sets options on the cram_fd.
620
 *
621
 * See CRAM_OPT_* definitions in cram_structs.h.
622
 * Use this immediately after opening.
623
 *
624
 * @return
625
 * Returns 0 on success;
626
 *        -1 on failure
627
 */
628
int cram_set_voption(cram_fd *fd, enum hts_fmt_option opt, va_list args);
629
630
/*!
631
 * Attaches a header to a cram_fd.
632
 *
633
 * This should be used when creating a new cram_fd for writing where
634
 * we have an sam_hdr_t already constructed (eg from a file we've read
635
 * in).
636
 *
637
 * @return
638
 * Returns 0 on success;
639
 *        -1 on failure
640
 */
641
int cram_set_header2(cram_fd *fd, const sam_hdr_t *hdr);
642
643
/*!
644
 * Returns the hFILE connected to a cram_fd.
645
 */
646
0
static inline struct hFILE *cram_hfile(cram_fd *fd) {
647
0
    return fd->fp;
648
0
}
Unexecuted instantiation: hts.c:cram_hfile
Unexecuted instantiation: sam.c:cram_hfile
Unexecuted instantiation: cram_decode.c:cram_hfile
Unexecuted instantiation: cram_encode.c:cram_hfile
Unexecuted instantiation: cram_index.c:cram_hfile
Unexecuted instantiation: cram_io.c:cram_hfile
Unexecuted instantiation: cram_stats.c:cram_hfile
Unexecuted instantiation: cram_codecs.c:cram_hfile
649
650
#ifdef __cplusplus
651
}
652
#endif
653
654
#endif /* CRAM_IO_H */