Coverage Report

Created: 2026-06-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/cram/cram_codecs.h
Line
Count
Source
1
/*
2
Copyright (c) 2012-2015, 2018, 2020, 2023, 2026 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
   1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
   2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
#ifndef CRAM_CODECS_H
32
#define CRAM_CODECS_H
33
34
#include <stdint.h>
35
36
#ifdef __cplusplus
37
extern "C" {
38
#endif
39
40
struct cram_codec;
41
42
/*
43
 * Slow but simple huffman decoder to start with.
44
 * Read a bit at a time, keeping track of {length, value}
45
 * eg. 1 1 0 1 => {1,1},  {2,3}, {3,6}, {4,13}
46
 *
47
 * Keep track of this through the huffman code table.
48
 * For fast scanning we have an index of where the first code of length X
49
 * appears.
50
 */
51
typedef struct {
52
    int64_t symbol;
53
    int32_t p; // next code start value, minus index to codes[]
54
    int32_t code;
55
    int32_t len;
56
} cram_huffman_code;
57
58
typedef struct {
59
    int ncodes;
60
    cram_huffman_code *codes;
61
    int option;
62
} cram_huffman_decoder;
63
64
33.7k
#define MAX_HUFF 128
65
typedef struct {
66
    cram_huffman_code *codes;
67
    int nvals;
68
    int val2code[MAX_HUFF+1]; // value to code lookup for small values
69
    int option;
70
} cram_huffman_encoder;
71
72
typedef struct {
73
    int32_t offset;
74
    int32_t nbits;
75
} cram_beta_decoder;
76
77
typedef struct {
78
    int32_t offset;
79
} cram_gamma_decoder;
80
81
typedef struct {
82
    int32_t offset;
83
    int32_t k;
84
} cram_subexp_decoder;
85
86
typedef struct {
87
    int32_t content_id;
88
    enum cram_external_type type;
89
} cram_external_decoder;
90
91
typedef struct {
92
    struct cram_codec *len_codec;
93
    struct cram_codec *val_codec;
94
} cram_byte_array_len_decoder;
95
96
typedef struct {
97
    unsigned char stop;
98
    int32_t content_id;
99
} cram_byte_array_stop_decoder;
100
101
typedef struct {
102
    enum cram_encoding len_encoding;
103
    enum cram_encoding val_encoding;
104
    void *len_dat;
105
    void *val_dat;
106
    struct cram_codec *len_codec;
107
    struct cram_codec *val_codec;
108
} cram_byte_array_len_encoder;
109
110
/*
111
 * A generic codec structure.
112
 */
113
struct cram_codec {
114
    enum cram_encoding codec;
115
    cram_block *out;
116
    varint_vec *vv;
117
    int codec_id;
118
    void (*free)(struct cram_codec *codec);
119
    int (*decode)(cram_slice *slice, struct cram_codec *codec,
120
                  cram_block *in, char *out, int *out_size);
121
    int (*encode)(cram_slice *slice, struct cram_codec *codec,
122
                  char *in, int in_size);
123
    int (*store)(struct cram_codec *codec, cram_block *b, char *prefix,
124
                 int version);
125
    int (*size)(cram_slice *slice, struct cram_codec *codec);
126
    int (*flush)(struct cram_codec *codec);
127
    cram_block *(*get_block)(cram_slice *slice, struct cram_codec *codec);
128
    int (*describe)(struct cram_codec *codec, kstring_t *ks);
129
130
    union {
131
        cram_huffman_decoder         huffman;
132
        cram_external_decoder        external;
133
        cram_beta_decoder            beta;
134
        cram_gamma_decoder           gamma;
135
        cram_subexp_decoder          subexp;
136
        cram_byte_array_len_decoder  byte_array_len;
137
        cram_byte_array_stop_decoder byte_array_stop;
138
139
        cram_huffman_encoder         e_huffman;
140
        cram_external_decoder        e_external;
141
        cram_byte_array_stop_decoder e_byte_array_stop;
142
        cram_byte_array_len_encoder  e_byte_array_len;
143
        cram_beta_decoder            e_beta;
144
    } u;
145
};
146
147
const char *cram_encoding2str(enum cram_encoding t);
148
149
cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr,
150
                              enum cram_encoding codec, char *data, int size,
151
                              enum cram_external_type option,
152
                              int version, varint_vec *vv);
153
cram_codec *cram_encoder_init(enum cram_encoding codec, cram_stats *st,
154
                              enum cram_external_type option, void *dat,
155
                              int version, varint_vec *vv);
156
157
//int cram_decode(void *codes, char *in, int in_size, char *out, int *out_size);
158
//void cram_decoder_free(void *codes);
159
160
//#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, (--b->bit == -1) && (b->bit = 7, b->byte++))
161
162
0
#define GET_BIT_MSB(b,v) (void)(v<<=1, v|=(b->data[b->byte] >> b->bit)&1, b->byte += (--b->bit<0), b->bit&=7)
163
164
/*
165
 * Check that enough bits are left in a block to satisy a bit-based decoder.
166
 * Return  0 if there are enough
167
 *         1 if not.
168
 */
169
170
0
static inline int cram_not_enough_bits(cram_block *blk, int nbits) {
171
0
    if (nbits < 0 ||
172
0
        (blk->byte >= blk->uncomp_size && nbits > 0) ||
173
0
        (blk->uncomp_size - blk->byte <= INT32_MAX / 8 + 1 &&
174
0
         (blk->uncomp_size - blk->byte) * 8 + blk->bit - 7 < nbits)) {
175
0
        return 1;
176
0
    }
177
0
    return 0;
178
0
}
Unexecuted instantiation: hts.c:cram_not_enough_bits
Unexecuted instantiation: sam.c:cram_not_enough_bits
Unexecuted instantiation: cram_decode.c:cram_not_enough_bits
Unexecuted instantiation: cram_encode.c:cram_not_enough_bits
Unexecuted instantiation: cram_index.c:cram_not_enough_bits
Unexecuted instantiation: cram_io.c:cram_not_enough_bits
Unexecuted instantiation: cram_stats.c:cram_not_enough_bits
Unexecuted instantiation: cram_codecs.c:cram_not_enough_bits
179
180
/*
181
 * Returns the content_id used by this codec, also in id2 if byte_array_len.
182
 * Returns -1 for the CORE block and -2 for unneeded.
183
 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
184
 */
185
int cram_codec_to_id(cram_codec *c, int *id2);
186
187
/*
188
 * cram_codec structures are specialised for decoding or encoding.
189
 * Unfortunately this makes turning a decoder into an encoder (such as
190
 * when transcoding files) problematic.
191
 *
192
 * This function converts a cram decoder codec into an encoder version
193
 * in-place (ie it modifiers the codec itself).
194
 *
195
 * Returns 0 on success;
196
 *        -1 on failure.
197
 */
198
int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c);
199
200
#ifdef __cplusplus
201
}
202
#endif
203
204
#endif /* CRAM_CODECS_H */