Coverage Report

Created: 2025-07-18 07:26

/src/htslib/cram/cram_codecs.h
Line
Count
Source (jump to first uncovered line)
1
/*
2
Copyright (c) 2012-2015, 2018, 2020, 2023 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
   1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
   2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#ifndef CRAM_CODECS_H
32
#define CRAM_CODECS_H
33
34
#include <stdint.h>
35
36
#ifdef __cplusplus
37
extern "C" {
38
#endif
39
40
struct cram_codec;
41
42
/*
43
 * Slow but simple huffman decoder to start with.
44
 * Read a bit at a time, keeping track of {length, value}
45
 * eg. 1 1 0 1 => {1,1},  {2,3}, {3,6}, {4,13}
46
 *
47
 * Keep track of this through the huffman code table.
48
 * For fast scanning we have an index of where the first code of length X
49
 * appears.
50
 */
51
typedef struct {
52
    int64_t symbol;
53
    int32_t p; // next code start value, minus index to codes[]
54
    int32_t code;
55
    int32_t len;
56
} cram_huffman_code;
57
58
typedef struct {
59
    int ncodes;
60
    cram_huffman_code *codes;
61
    int option;
62
} cram_huffman_decoder;
63
64
940k
#define MAX_HUFF 128
65
typedef struct {
66
    cram_huffman_code *codes;
67
    int nvals;
68
    int val2code[MAX_HUFF+1]; // value to code lookup for small values
69
    int option;
70
} cram_huffman_encoder;
71
72
typedef struct {
73
    int32_t offset;
74
    int32_t nbits;
75
} cram_beta_decoder;
76
77
// A PACK transform, packing multiple values into a single byte
78
typedef struct {
79
    int32_t nbits;
80
    enum cram_encoding sub_encoding;
81
    void *sub_codec_dat;
82
    struct cram_codec *sub_codec;
83
    int nval;  // number of items in maps
84
    uint32_t rmap[256]; // 0,1,2,3 -> P,A,C,K
85
    int map[256];       // P,A,C,K -> 0,1,2,3 // NB: max input is uint8_tb? Or use hash?
86
} cram_xpack_decoder;
87
typedef cram_xpack_decoder cram_xpack_encoder;
88
89
// Transforms symbols X,Y,Z to bytes 0,1,2.
90
typedef struct {
91
    enum cram_encoding len_encoding;
92
    enum cram_encoding lit_encoding;
93
    void *len_dat;
94
    void *lit_dat;
95
    struct cram_codec *len_codec;
96
    struct cram_codec *lit_codec;
97
    int cur_len;
98
    int cur_lit;
99
    int rep_score[256];
100
    char *to_flush;
101
    size_t to_flush_size;
102
} cram_xrle_decoder;
103
typedef cram_xrle_decoder cram_xrle_encoder;
104
105
// DELTA + zigzag + varint encoding
106
typedef struct {
107
    // FIXME: define endian here too.  Require little endian?
108
    int64_t last;
109
    uint8_t word_size; // 1, 2, 4, 8
110
    //uint8_t sign;      // true if input data is already signed
111
    enum cram_encoding sub_encoding;
112
    void *sub_codec_dat;
113
    struct cram_codec *sub_codec;
114
} cram_xdelta_decoder;
115
typedef cram_xdelta_decoder cram_xdelta_encoder;
116
117
typedef struct {
118
    int32_t offset;
119
} cram_gamma_decoder;
120
121
typedef struct {
122
    int32_t offset;
123
    int32_t k;
124
} cram_subexp_decoder;
125
126
typedef struct {
127
    int32_t content_id;
128
    enum cram_external_type type;
129
} cram_external_decoder;
130
131
typedef struct {
132
    int32_t content_id;
133
    int64_t offset;
134
    enum cram_external_type type;
135
} cram_varint_decoder;
136
137
typedef struct {
138
    struct cram_codec *len_codec;
139
    struct cram_codec *val_codec;
140
} cram_byte_array_len_decoder;
141
142
typedef struct {
143
    unsigned char stop;
144
    int32_t content_id;
145
} cram_byte_array_stop_decoder;
146
147
typedef struct {
148
    enum cram_encoding len_encoding;
149
    enum cram_encoding val_encoding;
150
    void *len_dat;
151
    void *val_dat;
152
    struct cram_codec *len_codec;
153
    struct cram_codec *val_codec;
154
} cram_byte_array_len_encoder;
155
156
typedef struct {
157
    int64_t val;
158
} cram_const_codec;
159
160
/*
161
 * A generic codec structure.
162
 */
163
struct cram_codec {
164
    enum cram_encoding codec;
165
    cram_block *out;
166
    varint_vec *vv;
167
    int codec_id;
168
    void (*free)(struct cram_codec *codec);
169
    int (*decode)(cram_slice *slice, struct cram_codec *codec,
170
                  cram_block *in, char *out, int *out_size);
171
    int (*encode)(cram_slice *slice, struct cram_codec *codec,
172
                  char *in, int in_size);
173
    int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
174
                 int version);
175
    int (*size)(cram_slice *slice, struct cram_codec *codec);
176
    int (*flush)(struct cram_codec *codec);
177
    cram_block *(*get_block)(cram_slice *slice, struct cram_codec *codec);
178
    int (*describe)(struct cram_codec *codec, kstring_t *ks);
179
180
    union {
181
        cram_huffman_decoder         huffman;
182
        cram_external_decoder        external;
183
        cram_beta_decoder            beta;
184
        cram_gamma_decoder           gamma;
185
        cram_subexp_decoder          subexp;
186
        cram_byte_array_len_decoder  byte_array_len;
187
        cram_byte_array_stop_decoder byte_array_stop;
188
        cram_xpack_decoder           xpack;
189
        cram_xrle_decoder            xrle;
190
        cram_xdelta_decoder          xdelta;
191
        cram_const_codec             xconst;
192
        cram_varint_decoder          varint;
193
194
        cram_huffman_encoder         e_huffman;
195
        cram_external_decoder        e_external;
196
        cram_byte_array_stop_decoder e_byte_array_stop;
197
        cram_byte_array_len_encoder  e_byte_array_len;
198
        cram_beta_decoder            e_beta;
199
        cram_xpack_decoder           e_xpack;
200
        cram_xrle_decoder            e_xrle;
201
        cram_xdelta_decoder          e_xdelta;
202
        cram_const_codec             e_xconst;
203
        cram_varint_decoder          e_varint;
204
    } u;
205
};
206
207
const char *cram_encoding2str(enum cram_encoding t);
208
209
cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr,
210
                              enum cram_encoding codec, char *data, int size,
211
                              enum cram_external_type option,
212
                              int version, varint_vec *vv);
213
cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
214
                              enum cram_external_type option, void *dat,
215
                              int version, varint_vec *vv);
216
217
//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
218
//void cram_decoder_free(void *codes);
219
220
//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
221
222
0
#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
223
224
/*
225
 * Check that enough bits are left in a block to satisy a bit-based decoder.
226
 * Return  0 if there are enough
227
 *         1 if not.
228
 */
229
230
0
static inline int cram_not_enough_bits(cram_block *blk, int nbits) {
231
0
    if (nbits < 0 ||
232
0
        (blk->byte >= blk->uncomp_size && nbits > 0) ||
233
0
        (blk->uncomp_size - blk->byte <= INT32_MAX / 8 + 1 &&
234
0
         (blk->uncomp_size - blk->byte) * 8 + blk->bit - 7 < nbits)) {
235
0
        return 1;
236
0
    }
237
0
    return 0;
238
0
}
Unexecuted instantiation: hts.c:cram_not_enough_bits
Unexecuted instantiation: sam.c:cram_not_enough_bits
Unexecuted instantiation: cram_decode.c:cram_not_enough_bits
Unexecuted instantiation: cram_encode.c:cram_not_enough_bits
Unexecuted instantiation: cram_index.c:cram_not_enough_bits
Unexecuted instantiation: cram_io.c:cram_not_enough_bits
Unexecuted instantiation: cram_stats.c:cram_not_enough_bits
Unexecuted instantiation: cram_codecs.c:cram_not_enough_bits
239
240
/*
241
 * Returns the content_id used by this codec, also in id2 if byte_array_len.
242
 * Returns -1 for the CORE block and -2 for unneeded.
243
 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
244
 */
245
int cram_codec_to_id(cram_codec *c, int *id2);
246
247
/*
248
 * cram_codec structures are specialised for decoding or encoding.
249
 * Unfortunately this makes turning a decoder into an encoder (such as
250
 * when transcoding files) problematic.
251
 *
252
 * This function converts a cram decoder codec into an encoder version
253
 * in-place (ie it modifiers the codec itself).
254
 *
255
 * Returns 0 on success;
256
 *        -1 on failure.
257
 */
258
int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c);
259
260
#ifdef __cplusplus
261
}
262
#endif
263
264
#endif /* CRAM_CODECS_H */