Coverage Report

Created: 2026-06-30 06:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/htslib/cram/cram_codecs.c
Line
Count
Source
1
/*
2
Copyright (c) 2012-2021,2023, 2025, 2026 Genome Research Ltd.
3
Author: James Bonfield <jkb@sanger.ac.uk>
4
5
Redistribution and use in source and binary forms, with or without
6
modification, are permitted provided that the following conditions are met:
7
8
   1. Redistributions of source code must retain the above copyright notice,
9
this list of conditions and the following disclaimer.
10
11
   2. Redistributions in binary form must reproduce the above copyright notice,
12
this list of conditions and the following disclaimer in the documentation
13
and/or other materials provided with the distribution.
14
15
   3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
16
Institute nor the names of its contributors may be used to endorse or promote
17
products derived from this software without specific prior written permission.
18
19
THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS IS" AND
20
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH LTD OR CONTRIBUTORS BE LIABLE
23
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
*/
30
31
/*
32
 * FIXME: add checking of cram_external_type to return NULL on unsupported
33
 * {codec,type} tuples.
34
 */
35
36
#define HTS_BUILDING_LIBRARY // Enables HTSLIB_EXPORT, see htslib/hts_defs.h
37
#include <config.h>
38
39
#include <stdlib.h>
40
#include <string.h>
41
#include <assert.h>
42
#include <limits.h>
43
#include <stdint.h>
44
#include <errno.h>
45
#include <stddef.h>
46
47
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
48
#include "../fuzz_settings.h"
49
#endif
50
51
#include "../htslib/hts_alloc.h"
52
#include "../htslib/hts_endian.h"
53
54
#if defined(HAVE_EXTERNAL_LIBHTSCODECS)
55
#include <htscodecs/varint.h>
56
#include <htscodecs/pack.h>
57
#include <htscodecs/rle.h>
58
#else
59
#include "../htscodecs/htscodecs/varint.h"
60
#include "../htscodecs/htscodecs/pack.h"
61
#include "../htscodecs/htscodecs/rle.h"
62
#endif
63
64
#include "cram.h"
65
66
/*
67
 * ---------------------------------------------------------------------------
68
 * Block bit-level I/O functions.
69
 * All defined static here to promote easy inlining by the compiler.
70
 */
71
72
#if 0
73
/* Get a single bit, MSB first */
74
static signed int get_bit_MSB(cram_block *block) {
75
    unsigned int val;
76
77
    if (block->byte > block->alloc)
78
        return -1;
79
80
    val = block->data[block->byte] >> block->bit;
81
    if (--block->bit == -1) {
82
        block->bit = 7;
83
        block->byte++;
84
        //printf("(%02X)", block->data[block->byte]);
85
    }
86
87
    //printf("-B%d-", val&1);
88
89
    return val & 1;
90
}
91
#endif
92
93
/*
94
 * Count number of successive 0 and 1 bits
95
 */
96
0
static int get_one_bits_MSB(cram_block *block) {
97
0
    int n = 0, b;
98
0
    if (block->byte >= block->uncomp_size)
99
0
        return -1;
100
0
    do {
101
0
        b = block->data[block->byte] >> block->bit;
102
0
        if (--block->bit == -1) {
103
0
            block->bit = 7;
104
0
            block->byte++;
105
0
            if (block->byte == block->uncomp_size && (b&1))
106
0
                return -1;
107
0
        }
108
0
        n++;
109
0
    } while (b&1);
110
111
0
    return n-1;
112
0
}
113
114
0
static int get_zero_bits_MSB(cram_block *block) {
115
0
    int n = 0, b;
116
0
    if (block->byte >= block->uncomp_size)
117
0
        return -1;
118
0
    do {
119
0
        b = block->data[block->byte] >> block->bit;
120
0
        if (--block->bit == -1) {
121
0
            block->bit = 7;
122
0
            block->byte++;
123
0
            if (block->byte == block->uncomp_size && !(b&1))
124
0
                return -1;
125
0
        }
126
0
        n++;
127
0
    } while (!(b&1));
128
129
0
    return n-1;
130
0
}
131
132
#if 0
133
/* Stores a single bit */
134
static void store_bit_MSB(cram_block *block, unsigned int bit) {
135
    if (block->byte >= block->alloc) {
136
        block->alloc = block->alloc ? block->alloc*2 : 1024;
137
        block->data = realloc(block->data, block->alloc);
138
    }
139
140
    if (bit)
141
        block->data[block->byte] |= (1 << block->bit);
142
143
    if (--block->bit == -1) {
144
        block->bit = 7;
145
        block->byte++;
146
        block->data[block->byte] = 0;
147
    }
148
}
149
#endif
150
151
#if 0
152
/* Rounds to the next whole byte boundary first */
153
static void store_bytes_MSB(cram_block *block, char *bytes, int len) {
154
    if (block->bit != 7) {
155
        block->bit = 7;
156
        block->byte++;
157
    }
158
159
    while (block->byte + len >= block->alloc) {
160
        block->alloc = block->alloc ? block->alloc*2 : 1024;
161
        block->data = realloc(block->data, block->alloc);
162
    }
163
164
    memcpy(&block->data[block->byte], bytes, len);
165
    block->byte += len;
166
}
167
#endif
168
169
/* Local optimised copy for inlining */
170
0
static inline int64_t get_bits_MSB(cram_block *block, int nbits) {
171
0
    uint64_t val = 0;
172
0
    int i;
173
174
#if 0
175
    // Fits within the current byte */
176
    if (nbits <= block->bit+1) {
177
        val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
178
        if ((block->bit -= nbits) == -1) {
179
            block->bit = 7;
180
            block->byte++;
181
        }
182
        return val;
183
    }
184
185
    // partial first byte
186
    val = block->data[block->byte] & ((1<<(block->bit+1))-1);
187
    nbits -= block->bit+1;
188
    block->bit = 7;
189
    block->byte++;
190
191
    // whole middle bytes
192
    while (nbits >= 8) {
193
        val = (val << 8) | block->data[block->byte++];
194
        nbits -= 8;
195
    }
196
197
    val <<= nbits;
198
    val |= (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
199
    block->bit -= nbits;
200
    return val;
201
#endif
202
203
#if 0
204
    /* Inefficient implementation! */
205
    //printf("{");
206
    for (i = 0; i < nbits; i++)
207
        //val = (val << 1) | get_bit_MSB(block);
208
        GET_BIT_MSB(block, val);
209
#endif
210
211
0
#if 1
212
    /* Combination of 1st two methods */
213
0
    if (nbits <= block->bit+1) {
214
0
        val = (block->data[block->byte]>>(block->bit-(nbits-1))) & ((1<<nbits)-1);
215
0
        if ((block->bit -= nbits) == -1) {
216
0
            block->bit = 7;
217
0
            block->byte++;
218
0
        }
219
0
        return val;
220
0
    }
221
222
0
    switch(nbits) {
223
//  case 15: GET_BIT_MSB(block, val); // fall through
224
//  case 14: GET_BIT_MSB(block, val); // fall through
225
//  case 13: GET_BIT_MSB(block, val); // fall through
226
//  case 12: GET_BIT_MSB(block, val); // fall through
227
//  case 11: GET_BIT_MSB(block, val); // fall through
228
//  case 10: GET_BIT_MSB(block, val); // fall through
229
//  case  9: GET_BIT_MSB(block, val); // fall through
230
0
    case  8: GET_BIT_MSB(block, val); // fall through
231
0
    case  7: GET_BIT_MSB(block, val); // fall through
232
0
    case  6: GET_BIT_MSB(block, val); // fall through
233
0
    case  5: GET_BIT_MSB(block, val); // fall through
234
0
    case  4: GET_BIT_MSB(block, val); // fall through
235
0
    case  3: GET_BIT_MSB(block, val); // fall through
236
0
    case  2: GET_BIT_MSB(block, val); // fall through
237
0
    case  1: GET_BIT_MSB(block, val);
238
0
        break;
239
240
0
    default:
241
0
        for (i = 0; i < nbits; i++)
242
            //val = (val << 1) | get_bit_MSB(block);
243
0
            GET_BIT_MSB(block, val);
244
0
    }
245
0
#endif
246
247
    //printf("=0x%x}", val);
248
249
0
    return val;
250
0
}
251
252
/*
253
 * Can store up to 24-bits worth of data encoded in an integer value
254
 * Possibly we'd want to have a less optimal store_bits function when dealing
255
 * with nbits > 24, but for now we assume the codes generated are never
256
 * that big. (Given this is only possible with 121392 or more
257
 * characters with exactly the correct frequency distribution we check
258
 * for it elsewhere.)
259
 */
260
92
static int store_bits_MSB(cram_block *block, uint64_t val, int nbits) {
261
    //fprintf(stderr, " store_bits: %02x %d\n", val, nbits);
262
263
    /*
264
     * Use slow mode until we tweak the huffman generator to never generate
265
     * codes longer than 24-bits.
266
     */
267
92
    unsigned int mask;
268
269
92
    if (block->byte+8 >= block->alloc) {
270
35
        if (block->byte) {
271
0
            block->alloc *= 2;
272
0
            block->data = hts_realloc_ps(block->data, sizeof(*block->data),
273
0
                                         block->alloc, 8);
274
0
            if (!block->data)
275
0
                return -1;
276
35
        } else {
277
35
            block->alloc = 1024;
278
35
            block->data = hts_realloc_ps(block->data, sizeof(*block->data),
279
35
                                         block->alloc, 8);
280
35
            if (!block->data)
281
0
                return -1;
282
35
            block->data[0] = 0; // initialise first byte of buffer
283
35
        }
284
35
    }
285
286
    /* fits in current bit-field */
287
92
    if (nbits <= block->bit+1) {
288
15
        block->data[block->byte] |= (val << (block->bit+1-nbits));
289
15
        if ((block->bit-=nbits) == -1) {
290
0
            block->bit = 7;
291
0
            block->byte++;
292
0
            block->data[block->byte] = 0;
293
0
        }
294
15
        return 0;
295
15
    }
296
297
77
    block->data[block->byte] |= (val >> (nbits -= block->bit+1));
298
77
    block->bit = 7;
299
77
    block->byte++;
300
77
    block->data[block->byte] = 0;
301
302
77
    mask = 1<<(nbits-1);
303
1.85k
    do {
304
1.85k
        if (val & mask)
305
776
            block->data[block->byte] |= (1 << block->bit);
306
1.85k
        if (--block->bit == -1) {
307
191
            block->bit = 7;
308
191
            block->byte++;
309
191
            block->data[block->byte] = 0;
310
191
        }
311
1.85k
        mask >>= 1;
312
1.85k
    } while(--nbits);
313
314
77
    return 0;
315
92
}
316
317
/*
318
 * Returns the next 'size' bytes from a block, or NULL if insufficient
319
 * data left.This is just a pointer into the block data and not an
320
 * allocated object, so do not free the result.
321
 */
322
0
static char *cram_extract_block(cram_block *b, int size) {
323
0
    char *cp = (char *)b->data + b->idx;
324
0
    b->idx += size;
325
0
    if (b->idx > b->uncomp_size)
326
0
        return NULL;
327
328
0
    return cp;
329
0
}
330
331
/*
332
 * ---------------------------------------------------------------------------
333
 * EXTERNAL
334
 *
335
 * In CRAM 3.0 and earlier, E_EXTERNAL use the data type to determine the
336
 * size of the object being returned.  This type is hard coded in the
337
 * spec document (changing from uint32 to uint64 requires a spec change)
338
 * and there is no data format introspection so implementations have
339
 * to determine which size to use based on version numbers.   It also
340
 * doesn't support signed data.
341
 *
342
 * For simplicity we use the same encode and decode functions for
343
 * bytes (CRAM4) and external (CRAM3). Given we already had code to
344
 * replace codec + type into a function pointer it makes little
345
 * difference how we ended up at that function.  However we disallow
346
 * this codec to operate on integer data for CRAM4 onwards.
347
 */
348
int cram_external_decode_int(cram_slice *slice, cram_codec *c,
349
0
                             cram_block *in, char *out, int *out_size) {
350
0
    char *cp;
351
0
    cram_block *b;
352
353
    /* Find the external block */
354
0
    b = cram_get_block_by_id(slice, c->u.external.content_id);
355
0
    if (!b)
356
0
        return *out_size?-1:0;
357
358
0
    cp = (char *)b->data + b->idx;
359
    // E_INT and E_LONG are guaranteed single item queries
360
0
    int err = 0;
361
0
    *(int32_t *)out = c->vv->varint_get32(&cp, (char *)b->data + b->uncomp_size, &err);
362
0
    b->idx = cp - (char *)b->data;
363
0
    *out_size = 1;
364
365
0
    return err ? -1 : 0;
366
0
}
367
368
int cram_external_decode_long(cram_slice *slice, cram_codec *c,
369
0
                              cram_block *in, char *out, int *out_size) {
370
0
    char *cp;
371
0
    cram_block *b;
372
373
    /* Find the external block */
374
0
    b = cram_get_block_by_id(slice, c->u.external.content_id);
375
0
    if (!b)
376
0
        return *out_size?-1:0;
377
378
0
    cp = (char *)b->data + b->idx;
379
    // E_INT and E_LONG are guaranteed single item queries
380
0
    int err = 0;
381
0
    *(int64_t *)out = c->vv->varint_get64(&cp, (char *)b->data + b->uncomp_size, &err);
382
0
    b->idx = cp - (char *)b->data;
383
0
    *out_size = 1;
384
385
0
    return err ? -1 : 0;
386
0
}
387
388
int cram_external_decode_char(cram_slice *slice, cram_codec *c,
389
                              cram_block *in, char *out,
390
0
                              int *out_size) {
391
0
    char *cp;
392
0
    cram_block *b;
393
394
    /* Find the external block */
395
0
    b = cram_get_block_by_id(slice, c->u.external.content_id);
396
0
    if (!b)
397
0
        return *out_size?-1:0;
398
399
0
    cp = cram_extract_block(b, *out_size);
400
0
    if (!cp)
401
0
        return -1;
402
403
0
    if (out)
404
0
        memcpy(out, cp, *out_size);
405
0
    return 0;
406
0
}
407
408
static int cram_external_decode_block(cram_slice *slice, cram_codec *c,
409
                                      cram_block *in, char *out_,
410
0
                                      int *out_size) {
411
0
    char *cp;
412
0
    cram_block *out = (cram_block *)out_;
413
0
    cram_block *b = NULL;
414
415
    /* Find the external block */
416
0
    b = cram_get_block_by_id(slice, c->u.external.content_id);
417
0
    if (!b)
418
0
        return *out_size?-1:0;
419
420
0
    cp = cram_extract_block(b, *out_size);
421
0
    if (!cp)
422
0
        return -1;
423
424
0
    BLOCK_APPEND(out, cp, *out_size);
425
0
    return 0;
426
427
0
 block_err:
428
0
    return -1;
429
0
}
430
431
723
void cram_external_decode_free(cram_codec *c) {
432
723
    if (c)
433
723
        free(c);
434
723
}
435
436
437
0
int cram_external_decode_size(cram_slice *slice, cram_codec *c) {
438
0
    cram_block *b;
439
440
    /* Find the external block */
441
0
    b = cram_get_block_by_id(slice, c->u.external.content_id);
442
0
    if (!b)
443
0
        return -1;
444
445
0
    return b->uncomp_size;
446
0
}
447
448
0
cram_block *cram_external_get_block(cram_slice *slice, cram_codec *c) {
449
0
    return cram_get_block_by_id(slice, c->u.external.content_id);
450
0
}
451
452
0
int cram_external_describe(cram_codec *c, kstring_t *ks) {
453
0
    return ksprintf(ks, "EXTERNAL(id=%d)",
454
0
                    c->u.external.content_id) < 0 ? -1 : 0;
455
0
}
456
457
cram_codec *cram_external_decode_init(cram_block_compression_hdr *hdr,
458
                                      char *data, int size,
459
                                      enum cram_encoding codec,
460
                                      enum cram_external_type option,
461
741
                                      int version, varint_vec *vv) {
462
741
    cram_codec *c = NULL;
463
741
    char *cp = data;
464
465
741
    if (size < 1)
466
0
        goto malformed;
467
468
741
    if (!(c = malloc(sizeof(*c))))
469
0
        return NULL;
470
471
741
    c->codec  = E_EXTERNAL;
472
    // CRAM 3 and earlier encodes integers as EXTERNAL.  We need
473
    // use the option field to indicate the input data format so
474
    // we know which serialisation format to use.
475
741
    if (option == E_INT)
476
402
        c->decode = cram_external_decode_int;
477
339
    else if (option == E_LONG)
478
0
        c->decode = cram_external_decode_long;
479
339
    else if (option == E_BYTE_ARRAY || option == E_BYTE)
480
33
        c->decode = cram_external_decode_char;
481
306
    else
482
306
        c->decode = cram_external_decode_block;
483
484
741
    c->free   = cram_external_decode_free;
485
741
    c->size   = cram_external_decode_size;
486
741
    c->get_block = cram_external_get_block;
487
741
    c->describe = cram_external_describe;
488
489
741
    c->u.external.content_id = vv->varint_get32(&cp, data+size, NULL);
490
491
741
    if (cp - data != size)
492
18
        goto malformed;
493
494
723
    c->u.external.type = option;
495
496
723
    return c;
497
498
18
 malformed:
499
18
    hts_log_error("Malformed external header stream");
500
18
    free(c);
501
18
    return NULL;
502
741
}
503
504
int cram_external_encode_int(cram_slice *slice, cram_codec *c,
505
11.1M
                             char *in, int in_size) {
506
11.1M
    uint32_t *i32 = (uint32_t *)in;
507
11.1M
    return c->vv->varint_put32_blk(c->out, *i32) >= 0 ? 0 : -1;
508
11.1M
}
509
510
int cram_external_encode_sint(cram_slice *slice, cram_codec *c,
511
0
                             char *in, int in_size) {
512
0
    int32_t *i32 = (int32_t *)in;
513
0
    return c->vv->varint_put32s_blk(c->out, *i32) >= 0 ? 0 : -1;
514
0
}
515
516
int cram_external_encode_long(cram_slice *slice, cram_codec *c,
517
0
                             char *in, int in_size) {
518
0
    uint64_t *i64 = (uint64_t *)in;
519
0
    return c->vv->varint_put64_blk(c->out, *i64) >= 0 ? 0 : -1;
520
0
}
521
522
int cram_external_encode_slong(cram_slice *slice, cram_codec *c,
523
0
                               char *in, int in_size) {
524
0
    int64_t *i64 = (int64_t *)in;
525
0
    return c->vv->varint_put64s_blk(c->out, *i64) >= 0 ? 0 : -1;
526
0
}
527
528
int cram_external_encode_char(cram_slice *slice, cram_codec *c,
529
155k
                              char *in, int in_size) {
530
155k
    BLOCK_APPEND(c->out, in, in_size);
531
155k
    return 0;
532
533
0
 block_err:
534
0
    return -1;
535
155k
}
536
537
15.3k
void cram_external_encode_free(cram_codec *c) {
538
15.3k
    if (!c)
539
0
        return;
540
15.3k
    free(c);
541
15.3k
}
542
543
int cram_external_encode_store(cram_codec *c, cram_block *b, char *prefix,
544
14.7k
                               int version) {
545
14.7k
    char tmp[99], *tp = tmp, *tpend = tmp+99;
546
14.7k
    int len = 0, r = 0, n;
547
548
14.7k
    if (prefix) {
549
7.31k
        size_t l = strlen(prefix);
550
7.31k
        BLOCK_APPEND(b, prefix, l);
551
7.31k
        len += l;
552
7.31k
    }
553
554
14.7k
    tp += c->vv->varint_put32(tp, tpend, c->u.e_external.content_id);
555
14.7k
    len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
556
14.7k
    len += (n = c->vv->varint_put32_blk(b, tp-tmp));   r |= n;
557
14.7k
    BLOCK_APPEND(b, tmp, tp-tmp);
558
14.7k
    len += tp-tmp;
559
560
14.7k
    if (r > 0)
561
14.7k
        return len;
562
563
0
 block_err:
564
0
    return -1;
565
14.7k
}
566
567
cram_codec *cram_external_encode_init(cram_stats *st,
568
                                      enum cram_encoding codec,
569
                                      enum cram_external_type option,
570
                                      void *dat,
571
15.3k
                                      int version, varint_vec *vv) {
572
15.3k
    cram_codec *c;
573
574
15.3k
    c = malloc(sizeof(*c));
575
15.3k
    if (!c)
576
0
        return NULL;
577
15.3k
    c->codec = E_EXTERNAL;
578
15.3k
    c->free = cram_external_encode_free;
579
    // CRAM 3 and earlier encodes integers as EXTERNAL.  We need
580
    // use the option field to indicate the input data format so
581
    // we know which serialisation format to use.
582
15.3k
    if (option == E_INT)
583
6.38k
        c->encode = cram_external_encode_int;
584
8.94k
    else if (option == E_LONG)
585
0
        c->encode = cram_external_encode_long;
586
8.94k
    else if (option == E_BYTE_ARRAY || option == E_BYTE)
587
8.94k
        c->encode = cram_external_encode_char;
588
0
    else
589
0
        abort();
590
15.3k
    c->store = cram_external_encode_store;
591
15.3k
    c->flush = NULL;
592
593
15.3k
    c->u.e_external.content_id = (size_t)dat;
594
595
15.3k
    return c;
596
15.3k
}
597
598
/*
599
 * ---------------------------------------------------------------------------
600
 * BETA
601
 */
602
0
int cram_beta_decode_long(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
603
0
    int64_t *out_i = (int64_t *)out;
604
0
    int i, n = *out_size;
605
606
0
    if (c->u.beta.nbits) {
607
0
        if (cram_not_enough_bits(in, c->u.beta.nbits * n))
608
0
            return -1;
609
610
0
        for (i = 0; i < n; i++)
611
0
            out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
612
0
    } else {
613
0
        for (i = 0; i < n; i++)
614
0
            out_i[i] = -c->u.beta.offset;
615
0
    }
616
617
0
    return 0;
618
0
}
619
620
0
int cram_beta_decode_int(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
621
0
    int32_t *out_i = (int32_t *)out;
622
0
    int i, n = *out_size;
623
624
0
    if (c->u.beta.nbits) {
625
0
        if (cram_not_enough_bits(in, c->u.beta.nbits * n))
626
0
            return -1;
627
628
0
        for (i = 0; i < n; i++)
629
0
            out_i[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
630
0
    } else {
631
0
        for (i = 0; i < n; i++)
632
0
            out_i[i] = -c->u.beta.offset;
633
0
    }
634
635
0
    return 0;
636
0
}
637
638
0
int cram_beta_decode_char(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
639
0
    int i, n = *out_size;
640
641
642
0
    if (c->u.beta.nbits) {
643
0
        if (cram_not_enough_bits(in, c->u.beta.nbits * n))
644
0
            return -1;
645
646
0
        if (out)
647
0
            for (i = 0; i < n; i++)
648
0
                out[i] = get_bits_MSB(in, c->u.beta.nbits) - c->u.beta.offset;
649
0
        else
650
0
            for (i = 0; i < n; i++)
651
0
                get_bits_MSB(in, c->u.beta.nbits);
652
0
    } else {
653
0
        if (out)
654
0
            for (i = 0; i < n; i++)
655
0
                out[i] = -c->u.beta.offset;
656
0
    }
657
658
0
    return 0;
659
0
}
660
661
162
void cram_beta_decode_free(cram_codec *c) {
662
162
    if (c)
663
162
        free(c);
664
162
}
665
666
0
int cram_beta_describe(cram_codec *c, kstring_t *ks) {
667
0
    return ksprintf(ks, "BETA(offset=%d, nbits=%d)",
668
0
                    c->u.beta.offset, c->u.beta.nbits)
669
0
        < 0 ? -1 : 0;
670
0
}
671
672
cram_codec *cram_beta_decode_init(cram_block_compression_hdr *hdr,
673
                                  char *data, int size,
674
                                  enum cram_encoding codec,
675
                                  enum cram_external_type option,
676
180
                                  int version, varint_vec *vv) {
677
180
    cram_codec *c;
678
180
    char *cp = data;
679
680
180
    if (!(c = malloc(sizeof(*c))))
681
0
        return NULL;
682
683
180
    c->codec  = E_BETA;
684
180
    if (option == E_INT)
685
27
        c->decode = cram_beta_decode_int;
686
153
    else if (option == E_LONG)
687
0
        c->decode = cram_beta_decode_long;
688
153
    else if (option == E_BYTE_ARRAY || option == E_BYTE)
689
150
        c->decode = cram_beta_decode_char;
690
3
    else {
691
3
        hts_log_error("BYTE_ARRAYs not supported by this codec");
692
3
        free(c);
693
3
        return NULL;
694
3
    }
695
177
    c->free   = cram_beta_decode_free;
696
177
    c->describe = cram_beta_describe;
697
698
177
    c->u.beta.nbits = -1;
699
177
    c->u.beta.offset = vv->varint_get32(&cp, data + size, NULL);
700
177
    if (cp < data + size) // Ensure test below works
701
174
        c->u.beta.nbits  = vv->varint_get32(&cp, data + size, NULL);
702
703
177
    if (cp - data != size
704
171
        || c->u.beta.nbits < 0 || c->u.beta.nbits > 8 * sizeof(int)) {
705
15
        hts_log_error("Malformed beta header stream");
706
15
        free(c);
707
15
        return NULL;
708
15
    }
709
710
162
    return c;
711
177
}
712
713
int cram_beta_encode_store(cram_codec *c, cram_block *b,
714
35
                           char *prefix, int version) {
715
35
    int len = 0, r = 0, n;
716
717
35
    if (prefix) {
718
35
        size_t l = strlen(prefix);
719
35
        BLOCK_APPEND(b, prefix, l);
720
35
        len += l;
721
35
    }
722
723
35
    len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
724
    // codec length
725
35
    len += (n = c->vv->varint_put32_blk(b, c->vv->varint_size(c->u.e_beta.offset)
726
35
                                         + c->vv->varint_size(c->u.e_beta.nbits)));
727
35
    r |= n;
728
35
    len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.offset)); r |= n;
729
35
    len += (n = c->vv->varint_put32_blk(b, c->u.e_beta.nbits));  r |= n;
730
731
35
    if (r > 0) return len;
732
733
0
 block_err:
734
0
    return -1;
735
35
}
736
737
int cram_beta_encode_long(cram_slice *slice, cram_codec *c,
738
0
                          char *in, int in_size) {
739
0
    int64_t *syms = (int64_t *)in;
740
0
    int i, r = 0;
741
742
0
    for (i = 0; i < in_size; i++)
743
0
        r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
744
0
                            c->u.e_beta.nbits);
745
746
0
    return r;
747
0
}
748
749
int cram_beta_encode_int(cram_slice *slice, cram_codec *c,
750
92
                         char *in, int in_size) {
751
92
    int *syms = (int *)in;
752
92
    int i, r = 0;
753
754
184
    for (i = 0; i < in_size; i++)
755
92
        r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
756
92
                            c->u.e_beta.nbits);
757
758
92
    return r;
759
92
}
760
761
int cram_beta_encode_char(cram_slice *slice, cram_codec *c,
762
0
                          char *in, int in_size) {
763
0
    unsigned char *syms = (unsigned char *)in;
764
0
    int i, r = 0;
765
766
0
    for (i = 0; i < in_size; i++)
767
0
        r |= store_bits_MSB(c->out, syms[i] + c->u.e_beta.offset,
768
0
                            c->u.e_beta.nbits);
769
770
0
    return r;
771
0
}
772
773
35
void cram_beta_encode_free(cram_codec *c) {
774
35
    if (c) free(c);
775
35
}
776
777
cram_codec *cram_beta_encode_init(cram_stats *st,
778
                                  enum cram_encoding codec,
779
                                  enum cram_external_type option,
780
                                  void *dat,
781
41
                                  int version, varint_vec *vv) {
782
41
    cram_codec *c;
783
41
    hts_pos_t min_val, max_val;
784
41
    int len = 0;
785
41
    int64_t range;
786
787
41
    c = malloc(sizeof(*c));
788
41
    if (!c)
789
0
        return NULL;
790
41
    c->codec  = E_BETA;
791
41
    c->free   = cram_beta_encode_free;
792
41
    if (option == E_INT)
793
41
        c->encode = cram_beta_encode_int;
794
0
    else if (option == E_LONG)
795
0
        c->encode = cram_beta_encode_long;
796
0
    else
797
0
        c->encode = cram_beta_encode_char;
798
41
    c->store  = cram_beta_encode_store;
799
41
    c->flush = NULL;
800
801
41
    if (dat) {
802
41
        min_val = ((hts_pos_t *)dat)[0];
803
41
        max_val = ((hts_pos_t *)dat)[1];
804
41
    } else {
805
0
        min_val = INT_MAX;
806
0
        max_val = INT_MIN;
807
0
        int i;
808
0
        for (i = 0; i < MAX_STAT_VAL; i++) {
809
0
            if (!st->freqs[i])
810
0
                continue;
811
0
            if (min_val > i)
812
0
                min_val = i;
813
0
            max_val = i;
814
0
        }
815
0
        if (st->h) {
816
0
            khint_t k;
817
818
0
            for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
819
0
                if (!kh_exist(st->h, k))
820
0
                    continue;
821
822
0
                i = kh_key(st->h, k);
823
0
                if (min_val > i)
824
0
                    min_val = i;
825
0
                if (max_val < i)
826
0
                    max_val = i;
827
0
            }
828
0
        }
829
0
    }
830
831
41
    if (max_val < min_val)
832
0
        goto err;
833
834
41
    range = (int64_t) max_val - min_val;
835
41
    switch (option) {
836
41
    case E_INT:
837
41
        if (max_val > UINT_MAX || range > UINT_MAX)
838
6
            goto err;
839
35
        break;
840
841
35
    default:
842
0
        break;
843
41
    }
844
845
35
    c->u.e_beta.offset = -min_val;
846
771
    while (range) {
847
736
        len++;
848
736
        range >>= 1;
849
736
    }
850
35
    c->u.e_beta.nbits = len;
851
852
35
    return c;
853
854
6
 err:
855
6
    free(c);
856
6
    return NULL;
857
41
}
858
859
/*
860
 * ---------------------------------------------------------------------------
861
 * SUBEXP
862
 */
863
0
int cram_subexp_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
864
0
    int32_t *out_i = (int32_t *)out;
865
0
    int n, count;
866
0
    int k = c->u.subexp.k;
867
868
0
    for (count = 0, n = *out_size; count < n; count++) {
869
0
        int u = 0, tail;
870
0
        int val;
871
872
        /* Get number of 1s: u */
873
0
        u = get_one_bits_MSB(in);
874
0
        if (u < 0 || cram_not_enough_bits(in, u > 0 ? u + k - 1 : k))
875
0
            return -1;
876
        /*
877
         * Val is
878
         * u > 0:  2^(k+u-1) + k+u-1 bits
879
         * u = 0:  k bits
880
         */
881
0
        if (u) {
882
0
            if (u > 31-k)
883
0
                return -1;
884
0
            tail = u + k-1;
885
0
            val = 0;
886
0
            while (tail) {
887
0
                GET_BIT_MSB(in, val);
888
0
                tail--;
889
0
            }
890
0
            val += 1 << (u + k-1);
891
0
        } else {
892
0
            tail = k;
893
0
            val = 0;
894
0
            while (tail) {
895
0
                GET_BIT_MSB(in, val);
896
0
                tail--;
897
0
            }
898
0
        }
899
900
0
        out_i[count] = val - c->u.subexp.offset;
901
0
    }
902
903
0
    return 0;
904
0
}
905
906
171
void cram_subexp_decode_free(cram_codec *c) {
907
171
    if (c)
908
171
        free(c);
909
171
}
910
911
0
int cram_subexp_describe(cram_codec *c, kstring_t *ks) {
912
0
    return ksprintf(ks, "SUBEXP(offset=%d,k=%d)",
913
0
                    c->u.subexp.offset,
914
0
                    c->u.subexp.k)
915
0
        < 0 ? -1 : 0;
916
0
}
917
918
cram_codec *cram_subexp_decode_init(cram_block_compression_hdr *hdr,
919
                                    char *data, int size,
920
                                    enum cram_encoding codec,
921
                                    enum cram_external_type option,
922
171
                                    int version, varint_vec *vv) {
923
171
    cram_codec *c;
924
171
    char *cp = data;
925
926
171
    if (option != E_INT) {
927
0
        hts_log_error("This codec only supports INT encodings");
928
0
        return NULL;
929
0
    }
930
931
171
    if (!(c = malloc(sizeof(*c))))
932
0
        return NULL;
933
934
171
    c->codec  = E_SUBEXP;
935
171
    c->decode = cram_subexp_decode;
936
171
    c->free   = cram_subexp_decode_free;
937
171
    c->describe = cram_subexp_describe;
938
171
    c->u.subexp.k = -1;
939
940
171
    c->u.subexp.offset = vv->varint_get32(&cp, data + size, NULL);
941
171
    c->u.subexp.k      = vv->varint_get32(&cp, data + size, NULL);
942
943
171
    if (cp - data != size || c->u.subexp.k < 0 || c->u.subexp.k > 31) {
944
0
        hts_log_error("Malformed subexp header stream");
945
0
        free(c);
946
0
        return NULL;
947
0
    }
948
949
171
    return c;
950
171
}
951
952
/*
953
 * ---------------------------------------------------------------------------
954
 * GAMMA
955
 */
956
0
int cram_gamma_decode(cram_slice *slice, cram_codec *c, cram_block *in, char *out, int *out_size) {
957
0
    int32_t *out_i = (int32_t *)out;
958
0
    int i, n;
959
960
0
    for (i = 0, n = *out_size; i < n; i++) {
961
0
        int nz = 0;
962
0
        int val;
963
        //while (get_bit_MSB(in) == 0) nz++;
964
0
        nz = get_zero_bits_MSB(in);
965
0
        if (cram_not_enough_bits(in, nz))
966
0
            return -1;
967
0
        val = 1;
968
0
        while (nz > 0) {
969
            //val <<= 1; val |= get_bit_MSB(in);
970
0
            GET_BIT_MSB(in, val);
971
0
            nz--;
972
0
        }
973
974
0
        out_i[i] = val - c->u.gamma.offset;
975
0
    }
976
977
0
    return 0;
978
0
}
979
980
474
void cram_gamma_decode_free(cram_codec *c) {
981
474
    if (c)
982
474
        free(c);
983
474
}
984
985
0
int cram_gamma_describe(cram_codec *c, kstring_t *ks) {
986
0
    return ksprintf(ks, "GAMMA(offset=%d)", c->u.subexp.offset)
987
0
        < 0 ? -1 : 0;
988
0
}
989
990
cram_codec *cram_gamma_decode_init(cram_block_compression_hdr *hdr,
991
                                   char *data, int size,
992
                                   enum cram_encoding codec,
993
                                   enum cram_external_type option,
994
483
                                   int version, varint_vec *vv) {
995
483
    cram_codec *c = NULL;
996
483
    char *cp = data;
997
998
483
    if (option != E_INT) {
999
3
        hts_log_error("This codec only supports INT encodings");
1000
3
        return NULL;
1001
3
    }
1002
1003
480
    if (size < 1)
1004
0
        goto malformed;
1005
1006
480
    if (!(c = malloc(sizeof(*c))))
1007
0
        return NULL;
1008
1009
480
    c->codec  = E_GAMMA;
1010
480
    c->decode = cram_gamma_decode;
1011
480
    c->free   = cram_gamma_decode_free;
1012
480
    c->describe = cram_gamma_describe;
1013
1014
480
    c->u.gamma.offset = vv->varint_get32(&cp, data+size, NULL);
1015
1016
480
    if (cp - data != size)
1017
6
        goto malformed;
1018
1019
474
    return c;
1020
1021
6
 malformed:
1022
6
    hts_log_error("Malformed gamma header stream");
1023
6
    free(c);
1024
6
    return NULL;
1025
480
}
1026
1027
/*
1028
 * ---------------------------------------------------------------------------
1029
 * HUFFMAN
1030
 */
1031
1032
2.26k
static int code_sort(const void *vp1, const void *vp2) {
1033
2.26k
    const cram_huffman_code *c1 = (const cram_huffman_code *)vp1;
1034
2.26k
    const cram_huffman_code *c2 = (const cram_huffman_code *)vp2;
1035
1036
2.26k
    if (c1->len != c2->len)
1037
633
        return c1->len - c2->len;
1038
1.62k
    else
1039
1.62k
        return c1->symbol < c2->symbol ? -1 : (c1->symbol > c2->symbol ? 1 : 0);
1040
2.26k
}
1041
1042
654
void cram_huffman_decode_free(cram_codec *c) {
1043
654
    if (!c)
1044
0
        return;
1045
1046
654
    if (c->u.huffman.codes)
1047
633
        free(c->u.huffman.codes);
1048
654
    free(c);
1049
654
}
1050
1051
int cram_huffman_decode_null(cram_slice *slice, cram_codec *c,
1052
0
                             cram_block *in, char *out, int *out_size) {
1053
0
    return -1;
1054
0
}
1055
1056
int cram_huffman_decode_char0(cram_slice *slice, cram_codec *c,
1057
0
                              cram_block *in, char *out, int *out_size) {
1058
0
    int i, n;
1059
1060
0
    if (!out)
1061
0
        return 0;
1062
1063
    /* Special case of 0 length codes */
1064
0
    for (i = 0, n = *out_size; i < n; i++) {
1065
0
        out[i] = c->u.huffman.codes[0].symbol;
1066
0
    }
1067
0
    return 0;
1068
0
}
1069
1070
int cram_huffman_decode_char(cram_slice *slice, cram_codec *c,
1071
0
                             cram_block *in, char *out, int *out_size) {
1072
0
    int i, n, ncodes = c->u.huffman.ncodes;
1073
0
    const cram_huffman_code * const codes = c->u.huffman.codes;
1074
1075
0
    for (i = 0, n = *out_size; i < n; i++) {
1076
0
        int idx = 0;
1077
0
        int val = 0, len = 0, last_len = 0;
1078
1079
0
        for (;;) {
1080
0
            int dlen = codes[idx].len - last_len;
1081
0
            if (cram_not_enough_bits(in, dlen))
1082
0
                return -1;
1083
1084
            //val <<= dlen;
1085
            //val  |= get_bits_MSB(in, dlen);
1086
            //last_len = (len += dlen);
1087
1088
0
            last_len = (len += dlen);
1089
0
            for (; dlen; dlen--) GET_BIT_MSB(in, val);
1090
1091
0
            idx = val - codes[idx].p;
1092
0
            if (idx >= ncodes || idx < 0)
1093
0
                return -1;
1094
1095
0
            if (codes[idx].code == val && codes[idx].len == len) {
1096
0
                if (out) out[i] = codes[idx].symbol;
1097
0
                break;
1098
0
            }
1099
0
        }
1100
0
    }
1101
1102
0
    return 0;
1103
0
}
1104
1105
int cram_huffman_decode_int0(cram_slice *slice, cram_codec *c,
1106
0
                             cram_block *in, char *out, int *out_size) {
1107
0
    int32_t *out_i = (int32_t *)out;
1108
0
    int i, n;
1109
0
    const cram_huffman_code * const codes = c->u.huffman.codes;
1110
1111
    /* Special case of 0 length codes */
1112
0
    for (i = 0, n = *out_size; i < n; i++) {
1113
0
        out_i[i] = codes[0].symbol;
1114
0
    }
1115
0
    return 0;
1116
0
}
1117
1118
int cram_huffman_decode_int(cram_slice *slice, cram_codec *c,
1119
0
                            cram_block *in, char *out, int *out_size) {
1120
0
    int32_t *out_i = (int32_t *)out;
1121
0
    int i, n, ncodes = c->u.huffman.ncodes;
1122
0
    const cram_huffman_code * const codes = c->u.huffman.codes;
1123
1124
0
    for (i = 0, n = *out_size; i < n; i++) {
1125
0
        int idx = 0;
1126
0
        int val = 0, len = 0, last_len = 0;
1127
1128
        // Now one bit at a time for remaining checks
1129
0
        for (;;) {
1130
0
            int dlen = codes[idx].len - last_len;
1131
0
            if (cram_not_enough_bits(in, dlen))
1132
0
                return -1;
1133
1134
            //val <<= dlen;
1135
            //val  |= get_bits_MSB(in, dlen);
1136
            //last_len = (len += dlen);
1137
1138
0
            last_len = (len += dlen);
1139
0
            for (; dlen; dlen--) GET_BIT_MSB(in, val);
1140
1141
0
            idx = val - codes[idx].p;
1142
0
            if (idx >= ncodes || idx < 0)
1143
0
                return -1;
1144
1145
0
            if (codes[idx].code == val && codes[idx].len == len) {
1146
0
                out_i[i] = codes[idx].symbol;
1147
0
                break;
1148
0
            }
1149
0
        }
1150
0
    }
1151
1152
0
    return 0;
1153
0
}
1154
1155
int cram_huffman_decode_long0(cram_slice *slice, cram_codec *c,
1156
0
                              cram_block *in, char *out, int *out_size) {
1157
0
    int64_t *out_i = (int64_t *)out;
1158
0
    int i, n;
1159
0
    const cram_huffman_code * const codes = c->u.huffman.codes;
1160
1161
    /* Special case of 0 length codes */
1162
0
    for (i = 0, n = *out_size; i < n; i++) {
1163
0
        out_i[i] = codes[0].symbol;
1164
0
    }
1165
0
    return 0;
1166
0
}
1167
1168
int cram_huffman_decode_long(cram_slice *slice, cram_codec *c,
1169
0
                             cram_block *in, char *out, int *out_size) {
1170
0
    int64_t *out_i = (int64_t *)out;
1171
0
    int i, n, ncodes = c->u.huffman.ncodes;
1172
0
    const cram_huffman_code * const codes = c->u.huffman.codes;
1173
1174
0
    for (i = 0, n = *out_size; i < n; i++) {
1175
0
        int idx = 0;
1176
0
        int val = 0, len = 0, last_len = 0;
1177
1178
        // Now one bit at a time for remaining checks
1179
0
        for (;;) {
1180
0
            int dlen = codes[idx].len - last_len;
1181
0
            if (cram_not_enough_bits(in, dlen))
1182
0
                return -1;
1183
1184
            //val <<= dlen;
1185
            //val  |= get_bits_MSB(in, dlen);
1186
            //last_len = (len += dlen);
1187
1188
0
            last_len = (len += dlen);
1189
0
            for (; dlen; dlen--) GET_BIT_MSB(in, val);
1190
1191
0
            idx = val - codes[idx].p;
1192
0
            if (idx >= ncodes || idx < 0)
1193
0
                return -1;
1194
1195
0
            if (codes[idx].code == val && codes[idx].len == len) {
1196
0
                out_i[i] = codes[idx].symbol;
1197
0
                break;
1198
0
            }
1199
0
        }
1200
0
    }
1201
1202
0
    return 0;
1203
0
}
1204
1205
0
int cram_huffman_describe(cram_codec *c, kstring_t *ks) {
1206
0
    int r = 0, n;
1207
0
    r |= ksprintf(ks, "HUFFMAN(codes={") < 0;
1208
0
    for (n = 0; n < c->u.huffman.ncodes; n++) {
1209
0
        r |= ksprintf(ks, "%s%"PRId64, n?",":"",
1210
0
                      c->u.huffman.codes[n].symbol);
1211
0
    }
1212
0
    r |= ksprintf(ks, "},lengths={") < 0;
1213
0
    for (n = 0; n < c->u.huffman.ncodes; n++) {
1214
0
        r |= ksprintf(ks, "%s%d", n?",":"",
1215
0
                      c->u.huffman.codes[n].len);
1216
0
    }
1217
0
    r |= ksprintf(ks, "})") < 0;
1218
0
    return r;
1219
0
}
1220
1221
/*
1222
 * Initialises a huffman decoder from an encoding data stream.
1223
 */
1224
cram_codec *cram_huffman_decode_init(cram_block_compression_hdr *hdr,
1225
                                     char *data, int size,
1226
                                     enum cram_encoding codec,
1227
                                     enum cram_external_type option,
1228
771
                                     int version, varint_vec *vv) {
1229
771
    int32_t ncodes = 0, i, j;
1230
771
    char *cp = data, *data_end = &data[size];
1231
771
    cram_codec *h;
1232
771
    cram_huffman_code *codes = NULL;
1233
771
    int32_t val, last_len, max_len = 0;
1234
771
    uint32_t max_val; // needs one more bit than val
1235
771
    const int max_code_bits = sizeof(val) * 8 - 1;
1236
771
    int err = 0;
1237
1238
771
    if (option == E_BYTE_ARRAY_BLOCK) {
1239
3
        hts_log_error("BYTE_ARRAYs not supported by this codec");
1240
3
        return NULL;
1241
3
    }
1242
1243
768
    ncodes = vv->varint_get32(&cp, data_end, &err);
1244
768
    if (ncodes < 0) {
1245
6
        hts_log_error("Invalid number of symbols in huffman stream");
1246
6
        return NULL;
1247
6
    }
1248
762
    if (ncodes >= SIZE_MAX / sizeof(*codes)) {
1249
0
        errno = ENOMEM;
1250
0
        return NULL;
1251
0
    }
1252
762
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
1253
762
    if (ncodes > FUZZ_ALLOC_LIMIT / sizeof(*codes)) {
1254
9
        errno = ENOMEM;
1255
9
        return NULL;
1256
9
    }
1257
753
#endif
1258
753
    h = calloc(1, sizeof(*h));
1259
753
    if (!h)
1260
0
        return NULL;
1261
1262
753
    h->codec  = E_HUFFMAN;
1263
753
    h->free   = cram_huffman_decode_free;
1264
1265
753
    h->u.huffman.ncodes = ncodes;
1266
753
    h->u.huffman.option = option;
1267
753
    if (ncodes) {
1268
726
        codes = h->u.huffman.codes = malloc(ncodes * sizeof(*codes));
1269
726
        if (!codes) {
1270
0
            free(h);
1271
0
            return NULL;
1272
0
        }
1273
726
    } else {
1274
27
        codes = h->u.huffman.codes = NULL;
1275
27
    }
1276
1277
    /* Read symbols and bit-lengths */
1278
753
    if (option == E_LONG) {
1279
0
        for (i = 0; i < ncodes; i++)
1280
0
            codes[i].symbol = vv->varint_get64(&cp, data_end, &err);
1281
753
    } else if (option == E_INT || option == E_BYTE) {
1282
1.27M
        for (i = 0; i < ncodes; i++)
1283
1.27M
            codes[i].symbol = vv->varint_get32(&cp, data_end, &err);
1284
747
    } else {
1285
6
        goto malformed;
1286
6
    }
1287
1288
747
    if (err)
1289
12
        goto malformed;
1290
1291
735
    i = vv->varint_get32(&cp, data_end, &err);
1292
735
    if (i != ncodes)
1293
24
        goto malformed;
1294
1295
711
    if (ncodes == 0) {
1296
        /* NULL huffman stream.  Ensure it returns an error if
1297
           anything tries to use it. */
1298
21
        h->decode = cram_huffman_decode_null;
1299
21
        return h;
1300
21
    }
1301
1302
2.83k
    for (i = 0; i < ncodes; i++) {
1303
2.16k
        codes[i].len = vv->varint_get32(&cp, data_end, &err);
1304
2.16k
        if (err)
1305
12
            break;
1306
2.15k
        if (codes[i].len < 0) {
1307
12
            hts_log_error("Huffman code length (%d) is negative", codes[i].len);
1308
12
            goto malformed;
1309
12
        }
1310
2.14k
        if (max_len < codes[i].len)
1311
513
            max_len = codes[i].len;
1312
2.14k
    }
1313
678
    if (err || cp - data != size || max_len >= ncodes)
1314
21
        goto malformed;
1315
1316
    /* 31 is max. bits available in val */
1317
657
    if (max_len > max_code_bits) {
1318
3
        hts_log_error("Huffman code length (%d) is greater "
1319
3
                      "than maximum supported (%d)", max_len, max_code_bits);
1320
3
        goto malformed;
1321
3
    }
1322
1323
    /* Sort by bit length and then by symbol value */
1324
654
    qsort(codes, ncodes, sizeof(*codes), code_sort);
1325
1326
    /* Assign canonical codes */
1327
654
    val = -1, last_len = 0, max_val = 0;
1328
1.90k
    for (i = 0; i < ncodes; i++) {
1329
1.27k
        val++;
1330
1.27k
        if (val > max_val)
1331
21
            goto malformed;
1332
1333
1.25k
        if (codes[i].len > last_len) {
1334
405
            val <<= (codes[i].len - last_len);
1335
405
            last_len = codes[i].len;
1336
405
            max_val = (1U << codes[i].len) - 1;
1337
405
        }
1338
1.25k
        codes[i].code = val;
1339
1.25k
    }
1340
1341
    /*
1342
     * Compute the next starting point, offset by the i'th value.
1343
     * For example if codes 10, 11, 12, 13 are 30, 31, 32, 33 then
1344
     * codes[10..13].p = 30 - 10.
1345
     */
1346
633
    last_len = 0;
1347
1.86k
    for (i = j = 0; i < ncodes; i++) {
1348
1.23k
        if (codes[i].len > last_len) {
1349
405
            j = codes[i].code - i;
1350
405
            last_len = codes[i].len;
1351
405
        }
1352
1.23k
        codes[i].p = j;
1353
1.23k
    }
1354
1355
    // puts("==HUFF LEN==");
1356
    // for (i = 0; i <= last_len+1; i++) {
1357
    //     printf("len %d=%d prefix %d\n", i, h->u.huffman.lengths[i], h->u.huffman.prefix[i]);
1358
    // }
1359
    // puts("===HUFFMAN CODES===");
1360
    // for (i = 0; i < ncodes; i++) {
1361
    //     int j;
1362
    //     printf("%d: %d %d %d ", i, codes[i].symbol, codes[i].len, codes[i].code);
1363
    //     j = codes[i].len;
1364
    //     while (j) {
1365
    //         putchar(codes[i].code & (1 << --j) ? '1' : '0');
1366
    //     }
1367
    //     printf(" %d\n", codes[i].code);
1368
    // }
1369
1370
633
    if (option == E_BYTE || option == E_BYTE_ARRAY) {
1371
297
        if (h->u.huffman.codes[0].len == 0)
1372
144
            h->decode = cram_huffman_decode_char0;
1373
153
        else
1374
153
            h->decode = cram_huffman_decode_char;
1375
336
    } else if (option == E_LONG) {
1376
0
        if (h->u.huffman.codes[0].len == 0)
1377
0
            h->decode = cram_huffman_decode_long0;
1378
0
        else
1379
0
            h->decode = cram_huffman_decode_long;
1380
336
    } else if (option == E_INT || option == E_BYTE) {
1381
336
        if (h->u.huffman.codes[0].len == 0)
1382
114
            h->decode = cram_huffman_decode_int0;
1383
222
        else
1384
222
            h->decode = cram_huffman_decode_int;
1385
336
    } else {
1386
0
        return NULL;
1387
0
    }
1388
633
    h->describe = cram_huffman_describe;
1389
1390
633
    return (cram_codec *)h;
1391
1392
99
 malformed:
1393
99
    hts_log_error("Malformed huffman header stream");
1394
99
    free(codes);
1395
99
    free(h);
1396
99
    return NULL;
1397
633
}
1398
1399
int cram_huffman_encode_char0(cram_slice *slice, cram_codec *c,
1400
9.41k
                              char *in, int in_size) {
1401
9.41k
    return 0;
1402
9.41k
}
1403
1404
int cram_huffman_encode_char(cram_slice *slice, cram_codec *c,
1405
0
                             char *in, int in_size) {
1406
0
    int i, code, len, r = 0;
1407
0
    unsigned char *syms = (unsigned char *)in;
1408
1409
0
    while (in_size--) {
1410
0
        int sym = *syms++;
1411
0
        if (sym >= -1 && sym < MAX_HUFF) {
1412
0
            i = c->u.e_huffman.val2code[sym+1];
1413
0
            assert(c->u.e_huffman.codes[i].symbol == sym);
1414
0
            code = c->u.e_huffman.codes[i].code;
1415
0
            len  = c->u.e_huffman.codes[i].len;
1416
0
        } else {
1417
            /* Slow - use a lookup table for when sym < MAX_HUFF? */
1418
0
            for (i = 0; i < c->u.e_huffman.nvals; i++) {
1419
0
                if (c->u.e_huffman.codes[i].symbol == sym)
1420
0
                    break;
1421
0
            }
1422
0
            if (i == c->u.e_huffman.nvals)
1423
0
                return -1;
1424
1425
0
            code = c->u.e_huffman.codes[i].code;
1426
0
            len  = c->u.e_huffman.codes[i].len;
1427
0
        }
1428
1429
0
        r |= store_bits_MSB(c->out, code, len);
1430
0
    }
1431
1432
0
    return r;
1433
0
}
1434
1435
int cram_huffman_encode_int0(cram_slice *slice, cram_codec *c,
1436
43.9M
                             char *in, int in_size) {
1437
43.9M
    return 0;
1438
43.9M
}
1439
1440
int cram_huffman_encode_int(cram_slice *slice, cram_codec *c,
1441
0
                            char *in, int in_size) {
1442
0
    int i, code, len, r = 0;
1443
0
    int *syms = (int *)in;
1444
1445
0
    while (in_size--) {
1446
0
        int sym = *syms++;
1447
1448
0
        if (sym >= -1 && sym < MAX_HUFF) {
1449
0
            i = c->u.e_huffman.val2code[sym+1];
1450
0
            assert(c->u.e_huffman.codes[i].symbol == sym);
1451
0
            code = c->u.e_huffman.codes[i].code;
1452
0
            len  = c->u.e_huffman.codes[i].len;
1453
0
        } else {
1454
            /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */
1455
0
            for (i = 0; i < c->u.e_huffman.nvals; i++) {
1456
0
                if (c->u.e_huffman.codes[i].symbol == sym)
1457
0
                    break;
1458
0
            }
1459
0
            if (i == c->u.e_huffman.nvals)
1460
0
                return -1;
1461
1462
0
            code = c->u.e_huffman.codes[i].code;
1463
0
            len  = c->u.e_huffman.codes[i].len;
1464
0
        }
1465
1466
0
        r |= store_bits_MSB(c->out, code, len);
1467
0
    }
1468
1469
0
    return r;
1470
0
}
1471
1472
int cram_huffman_encode_long0(cram_slice *slice, cram_codec *c,
1473
0
                              char *in, int in_size) {
1474
0
    return 0;
1475
0
}
1476
1477
int cram_huffman_encode_long(cram_slice *slice, cram_codec *c,
1478
0
                             char *in, int in_size) {
1479
0
    int i, code, len, r = 0;
1480
0
    int64_t *syms = (int64_t *)in;
1481
1482
0
    while (in_size--) {
1483
0
        int sym = *syms++;
1484
1485
0
        if (sym >= -1 && sym < MAX_HUFF) {
1486
0
            i = c->u.e_huffman.val2code[sym+1];
1487
0
            assert(c->u.e_huffman.codes[i].symbol == sym);
1488
0
            code = c->u.e_huffman.codes[i].code;
1489
0
            len  = c->u.e_huffman.codes[i].len;
1490
0
        } else {
1491
            /* Slow - use a lookup table for when sym < MAX_HUFFMAN_SYM? */
1492
0
            for (i = 0; i < c->u.e_huffman.nvals; i++) {
1493
0
                if (c->u.e_huffman.codes[i].symbol == sym)
1494
0
                    break;
1495
0
            }
1496
0
            if (i == c->u.e_huffman.nvals)
1497
0
                return -1;
1498
1499
0
            code = c->u.e_huffman.codes[i].code;
1500
0
            len  = c->u.e_huffman.codes[i].len;
1501
0
        }
1502
1503
0
        r |= store_bits_MSB(c->out, code, len);
1504
0
    }
1505
1506
0
    return r;
1507
0
}
1508
1509
33.8k
void cram_huffman_encode_free(cram_codec *c) {
1510
33.8k
    if (!c)
1511
0
        return;
1512
1513
33.8k
    if (c->u.e_huffman.codes)
1514
33.8k
        free(c->u.e_huffman.codes);
1515
33.8k
    free(c);
1516
33.8k
}
1517
1518
/*
1519
 * Encodes a huffman tree.
1520
 * Returns number of bytes written.
1521
 */
1522
int cram_huffman_encode_store(cram_codec *c, cram_block *b, char *prefix,
1523
33.2k
                              int version) {
1524
33.2k
    int i, len = 0, r = 0, n;
1525
33.2k
    cram_huffman_code *codes = c->u.e_huffman.codes;
1526
    /*
1527
     * Up to code length 127 means 2.5e+26 bytes of data required (worst
1528
     * case huffman tree needs symbols with freqs matching the Fibonacci
1529
     * series). So guaranteed 1 byte per code.
1530
     *
1531
     * Symbols themselves could be 5 bytes (eg -1 is 5 bytes in itf8).
1532
     *
1533
     * Therefore 6*ncodes + 5 + 5 + 1 + 5 is max memory
1534
     */
1535
33.2k
    char *tmp = hts_malloc_pse(6, c->u.e_huffman.nvals, 0, 16);
1536
33.2k
    char *tp = tmp, *tpend = tmp+6*c->u.e_huffman.nvals+16;
1537
1538
33.2k
    if (!tmp)
1539
0
        return -1;
1540
1541
33.2k
    if (prefix) {
1542
32.4k
        size_t l = strlen(prefix);
1543
32.4k
        BLOCK_APPEND(b, prefix, l);
1544
32.4k
        len += l;
1545
32.4k
    }
1546
1547
33.2k
    tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals);
1548
33.2k
    if (c->u.e_huffman.option == E_LONG) {
1549
0
        for (i = 0; i < c->u.e_huffman.nvals; i++) {
1550
0
            tp += c->vv->varint_put64(tp, tpend, codes[i].symbol);
1551
0
        }
1552
33.2k
    } else if (c->u.e_huffman.option == E_INT || c->u.e_huffman.option == E_BYTE) {
1553
66.4k
        for (i = 0; i < c->u.e_huffman.nvals; i++) {
1554
33.2k
            tp += c->vv->varint_put32(tp, tpend, codes[i].symbol);
1555
33.2k
        }
1556
33.2k
    } else {
1557
0
        return -1;
1558
0
    }
1559
1560
33.2k
    tp += c->vv->varint_put32(tp, tpend, c->u.e_huffman.nvals);
1561
66.4k
    for (i = 0; i < c->u.e_huffman.nvals; i++)
1562
33.2k
        tp += c->vv->varint_put32(tp, tpend, codes[i].len);
1563
1564
33.2k
    len += (n = c->vv->varint_put32_blk(b, c->codec)); r |= n;
1565
33.2k
    len += (n = c->vv->varint_put32_blk(b, tp-tmp));   r |= n;
1566
33.2k
    BLOCK_APPEND(b, tmp, tp-tmp);
1567
33.2k
    len += tp-tmp;
1568
1569
33.2k
    free(tmp);
1570
1571
33.2k
    if (r > 0)
1572
33.2k
        return len;
1573
1574
0
 block_err:
1575
0
    return -1;
1576
33.2k
}
1577
1578
cram_codec *cram_huffman_encode_init(cram_stats *st,
1579
                                     enum cram_encoding codec,
1580
                                     enum cram_external_type option,
1581
                                     void *dat,
1582
33.8k
                                     int version, varint_vec *vv) {
1583
33.8k
    int *vals = NULL, *freqs = NULL, *lens = NULL, code, len;
1584
33.8k
    int *new_vals, *new_freqs;
1585
33.8k
    int i, max_val = 0, min_val = INT_MAX, k;
1586
33.8k
    size_t nvals, vals_alloc = 0;
1587
33.8k
    cram_codec *c;
1588
33.8k
    cram_huffman_code *codes;
1589
1590
33.8k
    c = malloc(sizeof(*c));
1591
33.8k
    if (!c)
1592
0
        return NULL;
1593
33.8k
    c->codec = E_HUFFMAN;
1594
1595
    /* Count number of unique symbols */
1596
34.6M
    for (nvals = i = 0; i < MAX_STAT_VAL; i++) {
1597
34.6M
        if (!st->freqs[i])
1598
34.5M
            continue;
1599
24.8k
        if (nvals >= vals_alloc) {
1600
24.8k
            vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
1601
24.8k
            new_vals  = hts_realloc_p(vals, sizeof(*vals), vals_alloc);
1602
24.8k
            if (!new_vals) goto nomem;
1603
24.8k
            vals = new_vals;
1604
24.8k
            new_freqs = hts_realloc_p(freqs, sizeof(*freqs), vals_alloc);
1605
24.8k
            if (!new_freqs) goto nomem;
1606
24.8k
            freqs = new_freqs;
1607
24.8k
        }
1608
24.8k
        vals[nvals] = i;
1609
24.8k
        freqs[nvals] = st->freqs[i];
1610
24.8k
        assert(st->freqs[i] > 0);
1611
24.8k
        if (max_val < i) max_val = i;
1612
24.8k
        if (min_val > i) min_val = i;
1613
24.8k
        nvals++;
1614
24.8k
    }
1615
33.8k
    if (st->h) {
1616
8.93k
        khint_t k;
1617
1618
44.6k
        for (k = kh_begin(st->h); k != kh_end(st->h); k++) {
1619
35.7k
            if (!kh_exist(st->h, k))
1620
26.8k
                continue;
1621
8.93k
            if (nvals >= vals_alloc) {
1622
8.93k
                vals_alloc = vals_alloc ? vals_alloc*2 : 1024;
1623
8.93k
                new_vals  = hts_realloc_p(vals, sizeof(*vals), vals_alloc);
1624
8.93k
                if (!new_vals) goto nomem;
1625
8.93k
                vals = new_vals;
1626
8.93k
                new_freqs = hts_realloc_p(freqs, sizeof(*freqs), vals_alloc);
1627
8.93k
                if (!new_freqs) goto nomem;
1628
8.93k
                freqs = new_freqs;
1629
8.93k
            }
1630
8.93k
            vals[nvals]= kh_key(st->h, k);
1631
8.93k
            freqs[nvals] = kh_val(st->h, k);
1632
8.93k
            assert(freqs[nvals] > 0);
1633
8.93k
            if (max_val < i) max_val = i;
1634
8.93k
            if (min_val > i) min_val = i;
1635
8.93k
            nvals++;
1636
8.93k
        }
1637
8.93k
    }
1638
1639
33.8k
    assert(nvals > 0);
1640
1641
33.8k
    new_freqs = hts_realloc_p(freqs, 2 * sizeof(*freqs), nvals);
1642
33.8k
    if (!new_freqs) goto nomem;
1643
33.8k
    freqs = new_freqs;
1644
33.8k
    lens = calloc(nvals, 2 * sizeof(*lens));
1645
33.8k
    if (!lens) goto nomem;
1646
1647
    /* Inefficient, use pointers to form chain so we can insert and maintain
1648
     * a sorted list? This is currently O(nvals^2) complexity.
1649
     */
1650
33.8k
    for (;;) {
1651
33.8k
        int low1 = INT_MAX, low2 = INT_MAX;
1652
33.8k
        int ind1 = 0, ind2 = 0;
1653
67.6k
        for (i = 0; i < nvals; i++) {
1654
33.8k
            if (freqs[i] < 0)
1655
0
                continue;
1656
33.8k
            if (low1 > freqs[i])
1657
33.8k
                low2 = low1, ind2 = ind1, low1 = freqs[i], ind1 = i;
1658
0
            else if (low2 > freqs[i])
1659
0
                low2 = freqs[i], ind2 = i;
1660
33.8k
        }
1661
33.8k
        if (low2 == INT_MAX)
1662
33.8k
            break;
1663
1664
0
        freqs[nvals] = low1 + low2;
1665
0
        lens[ind1] = nvals;
1666
0
        lens[ind2] = nvals;
1667
0
        freqs[ind1] *= -1;
1668
0
        freqs[ind2] *= -1;
1669
0
        nvals++;
1670
0
    }
1671
33.8k
    nvals = nvals/2+1;
1672
1673
    /* Assign lengths */
1674
67.6k
    for (i = 0; i < nvals; i++) {
1675
33.8k
        int code_len = 0;
1676
33.8k
        for (k = lens[i]; k; k = lens[k])
1677
0
            code_len++;
1678
33.8k
        lens[i] = code_len;
1679
33.8k
        freqs[i] *= -1;
1680
        //fprintf(stderr, "%d / %d => %d\n", vals[i], freqs[i], lens[i]);
1681
33.8k
    }
1682
1683
1684
    /* Sort, need in a struct */
1685
33.8k
    if (!(codes = hts_malloc_p(sizeof(*codes), nvals)))
1686
0
        goto nomem;
1687
67.6k
    for (i = 0; i < nvals; i++) {
1688
33.8k
        codes[i].symbol = vals[i];
1689
33.8k
        codes[i].len = lens[i];
1690
33.8k
    }
1691
33.8k
    qsort(codes, nvals, sizeof(*codes), code_sort);
1692
1693
    /*
1694
     * Generate canonical codes from lengths.
1695
     * Sort by length.
1696
     * Start with 0.
1697
     * Every new code of same length is +1.
1698
     * Every new code of new length is +1 then <<1 per extra length.
1699
     *
1700
     * /\
1701
     * a/\
1702
     * /\/\
1703
     * bcd/\
1704
     *    ef
1705
     *
1706
     * a 1  0
1707
     * b 3  4 (0+1)<<2
1708
     * c 3  5
1709
     * d 3  6
1710
     * e 4  14  (6+1)<<1
1711
     * f 5  15
1712
     */
1713
33.8k
    code = 0; len = codes[0].len;
1714
67.6k
    for (i = 0; i < nvals; i++) {
1715
33.8k
        while (len != codes[i].len) {
1716
0
            code<<=1;
1717
0
            len++;
1718
0
        }
1719
33.8k
        codes[i].code = code++;
1720
1721
33.8k
        if (codes[i].symbol >= -1 && codes[i].symbol < MAX_HUFF)
1722
33.2k
            c->u.e_huffman.val2code[codes[i].symbol+1] = i;
1723
1724
        //fprintf(stderr, "sym %d, code %d, len %d\n",
1725
        //      codes[i].symbol, codes[i].code, codes[i].len);
1726
33.8k
    }
1727
1728
33.8k
    free(lens);
1729
33.8k
    free(vals);
1730
33.8k
    free(freqs);
1731
1732
33.8k
    c->u.e_huffman.codes = codes;
1733
33.8k
    c->u.e_huffman.nvals = nvals;
1734
33.8k
    c->u.e_huffman.option = option;
1735
1736
33.8k
    c->free = cram_huffman_encode_free;
1737
33.8k
    if (option == E_BYTE || option == E_BYTE_ARRAY) {
1738
277
        if (c->u.e_huffman.codes[0].len == 0)
1739
277
            c->encode = cram_huffman_encode_char0;
1740
0
        else
1741
0
            c->encode = cram_huffman_encode_char;
1742
33.5k
    } else if (option == E_INT) {
1743
33.5k
        if (c->u.e_huffman.codes[0].len == 0)
1744
33.5k
            c->encode = cram_huffman_encode_int0;
1745
0
        else
1746
0
            c->encode = cram_huffman_encode_int;
1747
33.5k
    } else if (option == E_LONG) {
1748
0
        if (c->u.e_huffman.codes[0].len == 0)
1749
0
            c->encode = cram_huffman_encode_long0;
1750
0
        else
1751
0
            c->encode = cram_huffman_encode_long;
1752
0
    } else {
1753
0
        return NULL;
1754
0
    }
1755
33.8k
    c->store = cram_huffman_encode_store;
1756
33.8k
    c->flush = NULL;
1757
1758
33.8k
    return c;
1759
1760
0
 nomem:
1761
0
    hts_log_error("Out of memory");
1762
0
    free(vals);
1763
0
    free(freqs);
1764
0
    free(lens);
1765
0
    free(c);
1766
0
    return NULL;
1767
33.8k
}
1768
1769
/*
1770
 * ---------------------------------------------------------------------------
1771
 * BYTE_ARRAY_LEN
1772
 */
1773
int cram_byte_array_len_decode(cram_slice *slice, cram_codec *c,
1774
                               cram_block *in, char *out,
1775
0
                               int *out_size) {
1776
    /* Fetch length */
1777
0
    int32_t len = 0, one = 1;
1778
0
    int r;
1779
1780
0
    cram_codec *len_codec = c->u.byte_array_len.len_codec;
1781
0
    cram_codec *val_codec = c->u.byte_array_len.val_codec;
1782
1783
0
    r = len_codec->decode(slice, len_codec, in, (char *)&len, &one);
1784
0
    if (len < 0 || (len > *out_size &&
1785
0
                    !(val_codec->codec == E_EXTERNAL &&
1786
0
                      val_codec->u.external.type == E_BYTE_ARRAY_BLOCK))) {
1787
0
        fprintf(stderr, "Error: overflow in cram_byte_array_len_decode.\n");
1788
0
        return -1;
1789
0
    }
1790
1791
0
    if (!r && val_codec) {
1792
0
        r = val_codec->decode(slice, val_codec, in, out, &len);
1793
0
    } else {
1794
0
        return -1;
1795
0
    }
1796
1797
0
    *out_size = len;
1798
1799
0
    return r;
1800
0
}
1801
1802
363
void cram_byte_array_len_decode_free(cram_codec *c) {
1803
363
    if (!c) return;
1804
1805
363
    if (c->u.byte_array_len.len_codec)
1806
330
        c->u.byte_array_len.len_codec->free(c->u.byte_array_len.len_codec);
1807
1808
363
    if (c->u.byte_array_len.val_codec)
1809
291
        c->u.byte_array_len.val_codec->free(c->u.byte_array_len.val_codec);
1810
1811
363
    free(c);
1812
363
}
1813
1814
0
int cram_byte_array_len_describe(cram_codec *c, kstring_t *ks) {
1815
0
    int r = 0;
1816
0
    r |= ksprintf(ks, "BYTE_ARRAY_LEN(len_codec={") < 0;
1817
0
    cram_byte_array_len_decoder *l = &c->u.byte_array_len;
1818
0
    r |=  l->len_codec->describe
1819
0
        ? l->len_codec->describe(l->len_codec, ks)
1820
0
        : (ksprintf(ks, "?")<0);
1821
0
    r |= ksprintf(ks, "},val_codec={") < 0;
1822
0
    r |=  l->val_codec->describe
1823
0
        ? l->val_codec->describe(l->val_codec, ks)
1824
0
        : (ksprintf(ks, "?")<0);
1825
0
    r |= ksprintf(ks, "}") < 0;
1826
1827
0
    return r;
1828
0
}
1829
1830
cram_codec *cram_byte_array_len_decode_init(cram_block_compression_hdr *hdr,
1831
                                            char *data, int size,
1832
                                            enum cram_encoding codec,
1833
                                            enum cram_external_type option,
1834
363
                                            int version, varint_vec *vv) {
1835
363
    cram_codec *c;
1836
363
    char *cp   = data;
1837
363
    char *endp = data + size;
1838
1839
363
    if (!(c = malloc(sizeof(*c))))
1840
0
        return NULL;
1841
1842
363
    c->codec  = E_BYTE_ARRAY_LEN;
1843
363
    c->decode = cram_byte_array_len_decode;
1844
363
    c->free   = cram_byte_array_len_decode_free;
1845
363
    c->describe = cram_byte_array_len_describe;
1846
363
    c->u.byte_array_len.len_codec = NULL;
1847
363
    c->u.byte_array_len.val_codec = NULL;
1848
1849
363
    int encoding = vv->varint_get32(&cp, endp, NULL);
1850
363
    int sub_size = vv->varint_get32(&cp, endp, NULL);
1851
363
    if (sub_size < 0 || endp - cp < sub_size)
1852
12
        goto malformed;
1853
351
    c->u.byte_array_len.len_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
1854
351
                                                      E_INT, version, vv);
1855
351
    if (c->u.byte_array_len.len_codec == NULL)
1856
21
        goto no_codec;
1857
330
    cp += sub_size;
1858
1859
330
    encoding = vv->varint_get32(&cp, endp, NULL);
1860
330
    sub_size = vv->varint_get32(&cp, endp, NULL);
1861
330
    if (sub_size < 0 || endp - cp < sub_size)
1862
6
        goto malformed;
1863
324
    c->u.byte_array_len.val_codec = cram_decoder_init(hdr, encoding, cp, sub_size,
1864
324
                                                      option, version, vv);
1865
324
    if (c->u.byte_array_len.val_codec == NULL)
1866
33
        goto no_codec;
1867
291
    cp += sub_size;
1868
1869
291
    if (cp - data != size)
1870
6
        goto malformed;
1871
1872
285
    return c;
1873
1874
24
 malformed:
1875
24
    hts_log_error("Malformed byte_array_len header stream");
1876
78
 no_codec:
1877
78
    cram_byte_array_len_decode_free(c);
1878
78
    return NULL;
1879
24
}
1880
1881
int cram_byte_array_len_encode(cram_slice *slice, cram_codec *c,
1882
7.02k
                               char *in, int in_size) {
1883
7.02k
    int32_t i32 = in_size;
1884
7.02k
    int r = 0;
1885
1886
7.02k
    r |= c->u.e_byte_array_len.len_codec->encode(slice,
1887
7.02k
                                                 c->u.e_byte_array_len.len_codec,
1888
7.02k
                                                 (char *)&i32, 1);
1889
7.02k
    r |= c->u.e_byte_array_len.val_codec->encode(slice,
1890
7.02k
                                                 c->u.e_byte_array_len.val_codec,
1891
7.02k
                                                 in, in_size);
1892
7.02k
    return r;
1893
7.02k
}
1894
1895
4.59k
void cram_byte_array_len_encode_free(cram_codec *c) {
1896
4.59k
    if (!c)
1897
0
        return;
1898
1899
4.59k
    if (c->u.e_byte_array_len.len_codec)
1900
4.59k
        c->u.e_byte_array_len.len_codec->free(c->u.e_byte_array_len.len_codec);
1901
1902
4.59k
    if (c->u.e_byte_array_len.val_codec)
1903
4.59k
        c->u.e_byte_array_len.val_codec->free(c->u.e_byte_array_len.val_codec);
1904
1905
4.59k
    free(c);
1906
4.59k
}
1907
1908
int cram_byte_array_len_encode_store(cram_codec *c, cram_block *b,
1909
4.08k
                                     char *prefix, int version) {
1910
4.08k
    int len = 0, len2, len3, r = 0, n;
1911
4.08k
    cram_codec *tc;
1912
4.08k
    cram_block *b_len = NULL, *b_val = NULL;
1913
1914
4.08k
    if (prefix) {
1915
2.89k
        size_t l = strlen(prefix);
1916
2.89k
        BLOCK_APPEND(b, prefix, l);
1917
2.89k
        len += l;
1918
2.89k
    }
1919
1920
4.08k
    tc = c->u.e_byte_array_len.len_codec;
1921
4.08k
    b_len = cram_new_block(0, 0);
1922
4.08k
    if (!b_len) goto block_err;
1923
4.08k
    len2 = tc->store(tc, b_len, NULL, version);
1924
4.08k
    if (len2 < 0) goto block_err;
1925
1926
4.08k
    tc = c->u.e_byte_array_len.val_codec;
1927
4.08k
    b_val = cram_new_block(0, 0);
1928
4.08k
    if (!b_val) goto block_err;
1929
4.08k
    len3 = tc->store(tc, b_val, NULL, version);
1930
4.08k
    if (len3 < 0) goto block_err;
1931
1932
4.08k
    len += (n = c->vv->varint_put32_blk(b, c->codec));  r |= n;
1933
4.08k
    len += (n = c->vv->varint_put32_blk(b, len2+len3)); r |= n;
1934
4.08k
    BLOCK_APPEND(b, BLOCK_DATA(b_len), BLOCK_SIZE(b_len));
1935
4.08k
    BLOCK_APPEND(b, BLOCK_DATA(b_val), BLOCK_SIZE(b_val));
1936
1937
4.08k
    cram_free_block(b_len);
1938
4.08k
    cram_free_block(b_val);
1939
1940
4.08k
    if (r > 0)
1941
4.08k
        return len + len2 + len3;
1942
1943
0
 block_err:
1944
0
    if (b_len) cram_free_block(b_len);
1945
0
    if (b_val) cram_free_block(b_val);
1946
0
    return -1;
1947
4.08k
}
1948
1949
cram_codec *cram_byte_array_len_encode_init(cram_stats *st,
1950
                                            enum cram_encoding codec,
1951
                                            enum cram_external_type option,
1952
                                            void *dat,
1953
4.59k
                                            int version, varint_vec *vv) {
1954
4.59k
    cram_codec *c;
1955
4.59k
    cram_byte_array_len_encoder *e = (cram_byte_array_len_encoder *)dat;
1956
1957
4.59k
    c = malloc(sizeof(*c));
1958
4.59k
    if (!c)
1959
0
        return NULL;
1960
4.59k
    c->codec = E_BYTE_ARRAY_LEN;
1961
4.59k
    c->free = cram_byte_array_len_encode_free;
1962
4.59k
    c->encode = cram_byte_array_len_encode;
1963
4.59k
    c->store = cram_byte_array_len_encode_store;
1964
4.59k
    c->flush = NULL;
1965
1966
4.59k
    c->u.e_byte_array_len.len_codec = cram_encoder_init(e->len_encoding,
1967
4.59k
                                                        st, E_INT,
1968
4.59k
                                                        e->len_dat,
1969
4.59k
                                                        version, vv);
1970
4.59k
    c->u.e_byte_array_len.val_codec = cram_encoder_init(e->val_encoding,
1971
4.59k
                                                        NULL, E_BYTE_ARRAY,
1972
4.59k
                                                        e->val_dat,
1973
4.59k
                                                        version, vv);
1974
1975
4.59k
    if (!c->u.e_byte_array_len.len_codec ||
1976
4.59k
        !c->u.e_byte_array_len.val_codec) {
1977
0
        cram_byte_array_len_encode_free(c);
1978
0
        return NULL;
1979
0
    }
1980
1981
4.59k
    return c;
1982
4.59k
}
1983
1984
/*
1985
 * ---------------------------------------------------------------------------
1986
 * BYTE_ARRAY_STOP
1987
 */
1988
static int cram_byte_array_stop_decode_char(cram_slice *slice, cram_codec *c,
1989
                                            cram_block *in, char *out,
1990
0
                                            int *out_size) {
1991
0
    uint8_t *cp;
1992
0
    cram_block *b = NULL;
1993
1994
0
    b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id);
1995
0
    if (!b)
1996
0
        return *out_size?-1:0;
1997
1998
0
    if (b->idx >= b->uncomp_size)
1999
0
        return -1;
2000
2001
0
    ssize_t term = b->uncomp_size - b->idx;
2002
0
    cp = b->data + b->idx;
2003
0
    if (out) {
2004
       // memccpy equivalent but without copying the terminating byte
2005
0
        if (term > *out_size)
2006
0
            term = *out_size;
2007
0
        while (--term >= 0 && *cp != c->u.byte_array_stop.stop) {
2008
0
            *out++ = *cp++;
2009
0
        }
2010
2011
0
    } else {
2012
        // Consume input, but produce no output
2013
0
        while (--term >= 0 && *cp != c->u.byte_array_stop.stop) {
2014
0
            cp++;
2015
0
        }
2016
0
    }
2017
2018
    // Attempted overrun on input or output
2019
0
    if (cp >= b->data + b->uncomp_size || *cp != c->u.byte_array_stop.stop)
2020
0
        return -1;
2021
2022
0
    *out_size = cp - (b->data + b->idx);
2023
0
    b->idx = cp - b->data + 1;
2024
2025
0
    return 0;
2026
0
}
2027
2028
int cram_byte_array_stop_decode_block(cram_slice *slice, cram_codec *c,
2029
                                      cram_block *in, char *out_,
2030
0
                                      int *out_size) {
2031
0
    cram_block *b;
2032
0
    cram_block *out = (cram_block *)out_;
2033
0
    unsigned char *cp, *cp_end;
2034
0
    unsigned char stop;
2035
2036
0
    b = cram_get_block_by_id(slice, c->u.byte_array_stop.content_id);
2037
0
    if (!b)
2038
0
        return *out_size?-1:0;
2039
2040
0
    if (b->idx >= b->uncomp_size)
2041
0
        return -1;
2042
0
    cp = b->data + b->idx;
2043
0
    cp_end = b->data + b->uncomp_size;
2044
2045
    // STOP byte is hard-coded as zero by our name tokeniser decoder
2046
    // implementation, so we may ignore what was requested.
2047
0
    stop = b->orig_method == TOK3 ? 0 : c->u.byte_array_stop.stop;
2048
2049
0
    if (cp_end - cp < out->alloc - out->byte) {
2050
0
        unsigned char *out_cp = BLOCK_END(out);
2051
0
        while (cp != cp_end && *cp != stop)
2052
0
            *out_cp++ = *cp++;
2053
0
        BLOCK_SIZE(out) = out_cp - BLOCK_DATA(out);
2054
0
    } else {
2055
0
        unsigned char *cp_start;
2056
0
        for (cp_start = cp; cp != cp_end && *cp != stop; cp++)
2057
0
            ;
2058
0
        BLOCK_APPEND(out, cp_start, cp - cp_start);
2059
0
        BLOCK_GROW(out, cp - cp_start);
2060
0
    }
2061
2062
0
    *out_size = cp - (b->data + b->idx);
2063
0
    b->idx = cp - b->data + 1;
2064
2065
0
    return 0;
2066
2067
0
 block_err:
2068
0
    return -1;
2069
0
}
2070
2071
456
void cram_byte_array_stop_decode_free(cram_codec *c) {
2072
456
    if (!c) return;
2073
2074
456
    free(c);
2075
456
}
2076
2077
0
int cram_byte_array_stop_describe(cram_codec *c, kstring_t *ks) {
2078
0
    return ksprintf(ks, "BYTE_ARRAY_STOP(stop=%d,id=%d)",
2079
0
                    c->u.byte_array_stop.stop,
2080
0
                    c->u.byte_array_stop.content_id)
2081
0
        < 0 ? -1 : 0;
2082
0
}
2083
2084
cram_codec *cram_byte_array_stop_decode_init(cram_block_compression_hdr *hdr,
2085
                                             char *data, int size,
2086
                                             enum cram_encoding codec,
2087
                                             enum cram_external_type option,
2088
462
                                             int version, varint_vec *vv) {
2089
462
    cram_codec *c = NULL;
2090
462
    unsigned char *cp = (unsigned char *)data;
2091
462
    int err = 0;
2092
2093
462
    if (size < (CRAM_MAJOR_VERS(version) == 1 ? 5 : 2))
2094
3
        goto malformed;
2095
2096
459
    if (!(c = malloc(sizeof(*c))))
2097
0
        return NULL;
2098
2099
459
    c->codec  = E_BYTE_ARRAY_STOP;
2100
459
    switch (option) {
2101
426
    case E_BYTE_ARRAY_BLOCK:
2102
426
        c->decode = cram_byte_array_stop_decode_block;
2103
426
        break;
2104
30
    case E_BYTE_ARRAY:
2105
30
        c->decode = cram_byte_array_stop_decode_char;
2106
30
        break;
2107
3
    default:
2108
3
        hts_log_error("The byte_array_stop codec only supports BYTE_ARRAYs");
2109
3
        free(c);
2110
3
        return NULL;
2111
459
    }
2112
456
    c->free   = cram_byte_array_stop_decode_free;
2113
456
    c->describe = cram_byte_array_stop_describe;
2114
2115
456
    c->u.byte_array_stop.stop = *cp++;
2116
456
    if (CRAM_MAJOR_VERS(version) == 1) {
2117
456
        c->u.byte_array_stop.content_id = cp[0] + (cp[1]<<8) + (cp[2]<<16)
2118
456
            + ((unsigned int) cp[3]<<24);
2119
456
        cp += 4;
2120
456
    } else {
2121
0
        c->u.byte_array_stop.content_id = vv->varint_get32((char **)&cp, data+size, &err);
2122
0
    }
2123
2124
456
    if ((char *)cp - data != size || err)
2125
0
        goto malformed;
2126
2127
456
    return c;
2128
2129
3
 malformed:
2130
3
    hts_log_error("Malformed byte_array_stop header stream");
2131
3
    free(c);
2132
3
    return NULL;
2133
456
}
2134
2135
int cram_byte_array_stop_encode(cram_slice *slice, cram_codec *c,
2136
2.46k
                                char *in, int in_size) {
2137
2.46k
    BLOCK_APPEND(c->out, in, in_size);
2138
2.46k
    BLOCK_APPEND_CHAR(c->out, c->u.e_byte_array_stop.stop);
2139
2.46k
    return 0;
2140
2141
0
 block_err:
2142
0
    return -1;
2143
2.46k
}
2144
2145
8.92k
void cram_byte_array_stop_encode_free(cram_codec *c) {
2146
8.92k
    if (!c)
2147
0
        return;
2148
8.92k
    free(c);
2149
8.92k
}
2150
2151
int cram_byte_array_stop_encode_store(cram_codec *c, cram_block *b,
2152
8.83k
                                      char *prefix, int version) {
2153
8.83k
    int len = 0;
2154
8.83k
    char buf[20], *cp = buf;
2155
2156
8.83k
    if (prefix) {
2157
8.68k
        size_t l = strlen(prefix);
2158
8.68k
        BLOCK_APPEND(b, prefix, l);
2159
8.68k
        len += l;
2160
8.68k
    }
2161
2162
8.83k
    cp += c->vv->varint_put32(cp, buf+20, c->codec);
2163
2164
8.83k
    if (CRAM_MAJOR_VERS(version) == 1) {
2165
0
        cp += c->vv->varint_put32(cp, buf+20, 5);
2166
0
        *cp++ = c->u.e_byte_array_stop.stop;
2167
0
        *cp++ = (c->u.e_byte_array_stop.content_id >>  0) & 0xff;
2168
0
        *cp++ = (c->u.e_byte_array_stop.content_id >>  8) & 0xff;
2169
0
        *cp++ = (c->u.e_byte_array_stop.content_id >> 16) & 0xff;
2170
0
        *cp++ = (c->u.e_byte_array_stop.content_id >> 24) & 0xff;
2171
8.83k
    } else {
2172
8.83k
        cp += c->vv->varint_put32(cp, buf+20, 1 +
2173
8.83k
                                  c->vv->varint_size(c->u.e_byte_array_stop.content_id));
2174
8.83k
        *cp++ = c->u.e_byte_array_stop.stop;
2175
8.83k
        cp += c->vv->varint_put32(cp, buf+20, c->u.e_byte_array_stop.content_id);
2176
8.83k
    }
2177
2178
8.83k
    BLOCK_APPEND(b, buf, cp-buf);
2179
8.83k
    len += cp-buf;
2180
2181
8.83k
    return len;
2182
2183
0
 block_err:
2184
0
    return -1;
2185
8.83k
}
2186
2187
cram_codec *cram_byte_array_stop_encode_init(cram_stats *st,
2188
                                             enum cram_encoding codec,
2189
                                             enum cram_external_type option,
2190
                                             void *dat,
2191
8.92k
                                             int version, varint_vec *vv) {
2192
8.92k
    cram_codec *c;
2193
2194
8.92k
    c = malloc(sizeof(*c));
2195
8.92k
    if (!c)
2196
0
        return NULL;
2197
8.92k
    c->codec = E_BYTE_ARRAY_STOP;
2198
8.92k
    c->free = cram_byte_array_stop_encode_free;
2199
8.92k
    c->encode = cram_byte_array_stop_encode;
2200
8.92k
    c->store = cram_byte_array_stop_encode_store;
2201
8.92k
    c->flush = NULL;
2202
2203
8.92k
    c->u.e_byte_array_stop.stop = ((int *)dat)[0];
2204
8.92k
    c->u.e_byte_array_stop.content_id = ((int *)dat)[1];
2205
2206
8.92k
    return c;
2207
8.92k
}
2208
2209
/*
2210
 * ---------------------------------------------------------------------------
2211
 */
2212
2213
552
const char *cram_encoding2str(enum cram_encoding t) {
2214
552
    switch (t) {
2215
9
    case E_NULL:            return "NULL";
2216
0
    case E_EXTERNAL:        return "EXTERNAL";
2217
3
    case E_GOLOMB:          return "GOLOMB";
2218
0
    case E_HUFFMAN:         return "HUFFMAN";
2219
0
    case E_BYTE_ARRAY_LEN:  return "BYTE_ARRAY_LEN";
2220
0
    case E_BYTE_ARRAY_STOP: return "BYTE_ARRAY_STOP";
2221
6
    case E_BETA:            return "BETA";
2222
0
    case E_SUBEXP:          return "SUBEXP";
2223
0
    case E_GOLOMB_RICE:     return "GOLOMB_RICE";
2224
0
    case E_GAMMA:           return "GAMMA";
2225
2226
0
    case E_NUM_CODECS:
2227
534
    default:                return "?";
2228
552
    }
2229
552
}
2230
2231
static cram_codec *(*decode_init[E_NUM_CODECS])(cram_block_compression_hdr *hdr,
2232
                                                char *data,
2233
                                                int size,
2234
                                                enum cram_encoding codec,
2235
                                                enum cram_external_type option,
2236
                                                int version, varint_vec *vv) = {
2237
    // CRAM 3.0 valid codecs
2238
    NULL, // null codec
2239
    cram_external_decode_init,
2240
    NULL, // golomb
2241
    cram_huffman_decode_init,
2242
    cram_byte_array_len_decode_init,
2243
    cram_byte_array_stop_decode_init,
2244
    cram_beta_decode_init,
2245
    cram_subexp_decode_init,
2246
    NULL, // golomb rice
2247
    cram_gamma_decode_init,
2248
};
2249
2250
cram_codec *cram_decoder_init(cram_block_compression_hdr *hdr,
2251
                              enum cram_encoding codec,
2252
                              char *data, int size,
2253
                              enum cram_external_type option,
2254
3.71k
                              int version, varint_vec *vv) {
2255
3.71k
    if (codec >= E_NULL && codec < E_NUM_CODECS && decode_init[codec]) {
2256
3.17k
        cram_codec *r = decode_init[codec](hdr, data, size, codec,
2257
3.17k
                                           option, version, vv);
2258
3.17k
        if (r) {
2259
2.92k
            r->vv = vv;
2260
2.92k
            r->codec_id = hdr->ncodecs++;
2261
2.92k
        }
2262
3.17k
        return r;
2263
3.17k
    } else {
2264
546
        hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
2265
546
        return NULL;
2266
546
    }
2267
3.71k
}
2268
2269
static cram_codec *(*encode_init[E_NUM_CODECS])(cram_stats *stx,
2270
                                                enum cram_encoding codec,
2271
                                                enum cram_external_type option,
2272
                                                void *opt,
2273
                                                int version, varint_vec *vv) = {
2274
    // CRAM 3.0 valid codecs
2275
    NULL, // null codec
2276
    cram_external_encode_init, // int/bytes in cram 3, byte only in cram 4
2277
    NULL, // golomb
2278
    cram_huffman_encode_init,
2279
    cram_byte_array_len_encode_init,
2280
    cram_byte_array_stop_encode_init,
2281
    cram_beta_encode_init,
2282
    NULL, // subexponential (we support decode only)
2283
    NULL, // golomb rice
2284
    NULL, // gamma (we support decode only)
2285
};
2286
2287
cram_codec *cram_encoder_init(enum cram_encoding codec,
2288
                              cram_stats *st,
2289
                              enum cram_external_type option,
2290
                              void *dat,
2291
89.5k
                              int version, varint_vec *vv) {
2292
89.5k
    if (st && !st->nvals)
2293
26.8k
        return NULL;
2294
2295
62.6k
    if (encode_init[codec]) {
2296
62.6k
        cram_codec *r;
2297
62.6k
        if ((r = encode_init[codec](st, codec, option, dat, version, vv)))
2298
62.6k
            r->out = NULL;
2299
62.6k
        if (!r) {
2300
6
            hts_log_error("Unable to initialise codec of type %s", cram_encoding2str(codec));
2301
6
            return NULL;
2302
6
        }
2303
62.6k
        r->vv = vv;
2304
62.6k
        return r;
2305
62.6k
    } else {
2306
0
        hts_log_error("Unimplemented codec of type %s", cram_encoding2str(codec));
2307
0
        abort();
2308
0
    }
2309
62.6k
}
2310
2311
/*
2312
 * Returns the content_id used by this codec, also in id2 if byte_array_len.
2313
 * Returns -1 for the CORE block and -2 for unneeded.
2314
 * id2 is only filled out for BYTE_ARRAY_LEN which uses 2 codecs.
2315
 */
2316
0
int cram_codec_to_id(cram_codec *c, int *id2) {
2317
0
    int bnum1, bnum2 = -2;
2318
2319
0
    switch (c->codec) {
2320
0
    case E_HUFFMAN:
2321
0
        bnum1 = c->u.huffman.ncodes == 1 ? -2 : -1;
2322
0
        break;
2323
2324
0
    case E_GOLOMB:
2325
0
    case E_BETA:
2326
0
    case E_SUBEXP:
2327
0
    case E_GOLOMB_RICE:
2328
0
    case E_GAMMA:
2329
        // CORE block
2330
0
        bnum1 = -1;
2331
0
        break;
2332
2333
0
    case E_EXTERNAL:
2334
0
        bnum1 = c->u.external.content_id;
2335
0
        break;
2336
2337
0
    case E_BYTE_ARRAY_LEN:
2338
0
        bnum1 = cram_codec_to_id(c->u.byte_array_len.len_codec, NULL);
2339
0
        bnum2 = cram_codec_to_id(c->u.byte_array_len.val_codec, NULL);
2340
0
        break;
2341
2342
0
    case E_BYTE_ARRAY_STOP:
2343
0
        bnum1 = c->u.byte_array_stop.content_id;
2344
0
        break;
2345
2346
0
    case E_NULL:
2347
0
        bnum1 = -2;
2348
0
        break;
2349
2350
0
    default:
2351
0
        hts_log_error("Unknown codec type %d", c->codec);
2352
0
        bnum1 = -1;
2353
0
    }
2354
2355
0
    if (id2)
2356
0
        *id2 = bnum2;
2357
0
    return bnum1;
2358
0
}
2359
2360
2361
/*
2362
 * cram_codec structures are specialised for decoding or encoding.
2363
 * Unfortunately this makes turning a decoder into an encoder (such as
2364
 * when transcoding files) problematic.
2365
 *
2366
 * This function converts a cram decoder codec into an encoder version
2367
 * in-place (ie it modifiers the codec itself).
2368
 *
2369
 * Returns 0 on success;
2370
 *        -1 on failure.
2371
 */
2372
0
int cram_codec_decoder2encoder(cram_fd *fd, cram_codec *c) {
2373
0
    int j;
2374
2375
0
    switch (c->codec) {
2376
0
    case E_EXTERNAL:
2377
        // shares struct with decode
2378
0
        c->free = cram_external_encode_free;
2379
0
        c->store = cram_external_encode_store;
2380
0
        if (c->decode == cram_external_decode_int)
2381
0
            c->encode = cram_external_encode_int;
2382
0
        else if (c->decode == cram_external_decode_long)
2383
0
            c->encode = cram_external_encode_long;
2384
0
        else if (c->decode == cram_external_decode_char)
2385
0
            c->encode = cram_external_encode_char;
2386
0
        else if (c->decode == cram_external_decode_block)
2387
0
            c->encode = cram_external_encode_char;
2388
0
        else
2389
0
            return -1;
2390
0
        break;
2391
2392
0
    case E_HUFFMAN: {
2393
        // New structure, so switch.
2394
        // FIXME: we huffman and e_huffman structs amended, we could
2395
        // unify this.
2396
0
        cram_codec *t = malloc(sizeof(*t));
2397
0
        if (!t) return -1;
2398
0
        t->vv     = c->vv;
2399
0
        t->codec = E_HUFFMAN;
2400
0
        t->free = cram_huffman_encode_free;
2401
0
        t->store = cram_huffman_encode_store;
2402
0
        t->u.e_huffman.codes = c->u.huffman.codes;
2403
0
        t->u.e_huffman.nvals = c->u.huffman.ncodes;
2404
0
        t->u.e_huffman.option = c->u.huffman.option;
2405
0
        for (j = 0; j < t->u.e_huffman.nvals; j++) {
2406
0
            int32_t sym = t->u.e_huffman.codes[j].symbol;
2407
0
            if (sym >= -1 && sym < MAX_HUFF)
2408
0
                t->u.e_huffman.val2code[sym+1] = j;
2409
0
        }
2410
2411
0
        if (c->decode == cram_huffman_decode_char0)
2412
0
            t->encode = cram_huffman_encode_char0;
2413
0
        else if (c->decode == cram_huffman_decode_char)
2414
0
            t->encode = cram_huffman_encode_char;
2415
0
        else if (c->decode == cram_huffman_decode_int0)
2416
0
            t->encode = cram_huffman_encode_int0;
2417
0
        else if (c->decode == cram_huffman_decode_int)
2418
0
            t->encode = cram_huffman_encode_int;
2419
0
        else if (c->decode == cram_huffman_decode_long0)
2420
0
            t->encode = cram_huffman_encode_long0;
2421
0
        else if (c->decode == cram_huffman_decode_long)
2422
0
            t->encode = cram_huffman_encode_long;
2423
0
        else {
2424
0
            free(t);
2425
0
            return -1;
2426
0
        }
2427
0
        *c = *t;
2428
0
        free(t);
2429
0
        break;
2430
0
    }
2431
2432
0
    case E_BETA:
2433
        // shares struct with decode
2434
0
        c->free = cram_beta_encode_free;
2435
0
        c->store = cram_beta_encode_store;
2436
0
        if (c->decode == cram_beta_decode_int)
2437
0
            c->encode = cram_beta_encode_int;
2438
0
        else if (c->decode == cram_beta_decode_long)
2439
0
            c->encode = cram_beta_encode_long;
2440
0
        else if (c->decode == cram_beta_decode_char)
2441
0
            c->encode = cram_beta_encode_char;
2442
0
        else
2443
0
            return -1;
2444
0
        break;
2445
2446
0
    case E_BYTE_ARRAY_LEN: {
2447
0
        cram_codec *t = malloc(sizeof(*t));
2448
0
        if (!t) return -1;
2449
0
        t->vv     = c->vv;
2450
0
        t->codec  = E_BYTE_ARRAY_LEN;
2451
0
        t->free   = cram_byte_array_len_encode_free;
2452
0
        t->store  = cram_byte_array_len_encode_store;
2453
0
        t->encode = cram_byte_array_len_encode;
2454
0
        t->u.e_byte_array_len.len_codec = c->u.byte_array_len.len_codec;
2455
0
        t->u.e_byte_array_len.val_codec = c->u.byte_array_len.val_codec;
2456
0
        if (cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.len_codec) == -1 ||
2457
0
            cram_codec_decoder2encoder(fd, t->u.e_byte_array_len.val_codec) == -1) {
2458
0
            t->free(t);
2459
0
            return -1;
2460
0
        }
2461
2462
        // {len,val}_{encoding,dat} are undefined, but unused.
2463
        // Leaving them unset here means we can test that assertion.
2464
0
        *c = *t;
2465
0
        free(t);
2466
0
        break;
2467
0
    }
2468
2469
0
    case E_BYTE_ARRAY_STOP:
2470
        // shares struct with decode
2471
0
        c->free   = cram_byte_array_stop_encode_free;
2472
0
        c->store  = cram_byte_array_stop_encode_store;
2473
0
        c->encode = cram_byte_array_stop_encode;
2474
0
        break;
2475
2476
0
    default:
2477
0
        return -1;
2478
0
    }
2479
2480
0
    return 0;
2481
0
}
2482
2483
0
int cram_codec_describe(cram_codec *c, kstring_t *ks) {
2484
0
    if (c && c->describe)
2485
0
        return c->describe(c, ks);
2486
0
    else
2487
0
        return ksprintf(ks, "?");
2488
0
}