Coverage Report

Created: 2025-07-12 06:16

/src/htslib/htscodecs/htscodecs/rANS_static.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2014-2022 Genome Research Ltd.
3
 * Author(s): James Bonfield
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *
8
 *    1. Redistributions of source code must retain the above copyright notice,
9
 *       this list of conditions and the following disclaimer.
10
 *
11
 *    2. Redistributions in binary form must reproduce the above
12
 *       copyright notice, this list of conditions and the following
13
 *       disclaimer in the documentation and/or other materials provided
14
 *       with the distribution.
15
 *
16
 *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17
 *       Institute nor the names of its contributors may be used to endorse
18
 *       or promote products derived from this software without specific
19
 *       prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25
 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
 */
33
34
#include "config.h"
35
36
// Use 11 for order-1?
37
8.34M
#define TF_SHIFT 12
38
10.3k
#define TOTFREQ (1<<TF_SHIFT)
39
40
#include "rANS_byte.h"
41
#include "utils.h"
42
43
/*-------------------------------------------------------------------------- */
44
/*
45
 * Example wrapper to use the rans_byte.h functions included above.
46
 *
47
 * This demonstrates how to use, and unroll, an order-0 and order-1 frequency
48
 * model.
49
 */
50
51
#include <stdint.h>
52
#include <stdlib.h>
53
#include <stdio.h>
54
#include <unistd.h>
55
#include <assert.h>
56
#include <string.h>
57
#include <limits.h>
58
#include <sys/time.h>
59
#ifndef NO_THREADS
60
#include <pthread.h>
61
#endif
62
63
#include "rANS_static.h"
64
65
#define ABS(a) ((a)>0?(a):-(a))
66
67
/*-----------------------------------------------------------------------------
68
 * Memory to memory compression functions.
69
 *
70
 * These are original versions without any manual loop unrolling. They
71
 * are easier to understand, but can be up to 2x slower.
72
 */
73
74
static
75
unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size,
76
0
                                unsigned int *out_size) {
77
0
    unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9);
78
0
    unsigned char *cp, *out_end;
79
0
    RansEncSymbol syms[256];
80
0
    RansState rans0;
81
0
    RansState rans2;
82
0
    RansState rans1;
83
0
    RansState rans3;
84
0
    uint8_t* ptr;
85
0
    int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0;
86
0
    int m = 0, M = 0;
87
0
    uint64_t tr;
88
89
0
    if (!out_buf)
90
0
        return NULL;
91
92
0
    ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9;
93
94
    // Compute statistics
95
0
    if (hist8(in, in_size, (uint32_t *)F) < 0) {
96
0
        free(out_buf);
97
0
        return NULL;
98
0
    }
99
0
    tr = in_size ? ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size : 0;
100
101
0
 normalise_harder:
102
    // Normalise so T[i] == TOTFREQ
103
0
    for (fsum = m = M = j = 0; j < 256; j++) {
104
0
        if (!F[j])
105
0
            continue;
106
107
0
        if (m < F[j])
108
0
            m = F[j], M = j;
109
110
0
        if ((F[j] = (F[j]*tr)>>31) == 0)
111
0
            F[j] = 1;
112
0
        fsum += F[j];
113
0
    }
114
115
0
    fsum++;
116
0
    if (fsum < TOTFREQ) {
117
0
        F[M] += TOTFREQ-fsum;
118
0
    } else if (fsum-TOTFREQ > F[M]/2) {
119
        // Corner case to avoid excessive frequency reduction
120
0
        tr = 2104533975; goto normalise_harder; // equiv to *0.98.
121
0
    } else {
122
0
        F[M] -= fsum-TOTFREQ;
123
0
    }
124
125
    //printf("F[%d]=%d\n", M, F[M]);
126
0
    assert(F[M]>0);
127
128
    // Encode statistics.
129
0
    cp = out_buf+9;
130
131
0
    for (x = rle = j = 0; j < 256; j++) {
132
0
        if (F[j]) {
133
            // j
134
0
            if (rle) {
135
0
                rle--;
136
0
            } else {
137
0
                *cp++ = j;
138
0
                if (!rle && j && F[j-1])  {
139
0
                    for(rle=j+1; rle<256 && F[rle]; rle++)
140
0
                        ;
141
0
                    rle -= j+1;
142
0
                    *cp++ = rle;
143
0
                }
144
                //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]);
145
0
            }
146
            
147
            // F[j]
148
0
            if (F[j]<128) {
149
0
                *cp++ = F[j];
150
0
            } else {
151
0
                *cp++ = 128 | (F[j]>>8);
152
0
                *cp++ = F[j]&0xff;
153
0
            }
154
0
            RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
155
0
            x += F[j];
156
0
        }
157
0
    }
158
0
    *cp++ = 0;
159
160
    //write(2, out_buf+4, cp-(out_buf+4));
161
0
    tab_size = cp-out_buf;
162
163
0
    RansEncInit(&rans0);
164
0
    RansEncInit(&rans1);
165
0
    RansEncInit(&rans2);
166
0
    RansEncInit(&rans3);
167
168
0
    switch (i=(in_size&3)) {
169
0
    case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]);
170
        // fall-through
171
0
    case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]);
172
        // fall-through
173
0
    case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]);
174
        // fall-through
175
0
    case 0:
176
0
        break;
177
0
    }
178
0
    for (i=(in_size &~3); likely(i>0); i-=4) {
179
0
        RansEncSymbol *s3 = &syms[in[i-1]];
180
0
        RansEncSymbol *s2 = &syms[in[i-2]];
181
0
        RansEncSymbol *s1 = &syms[in[i-3]];
182
0
        RansEncSymbol *s0 = &syms[in[i-4]];
183
184
0
        RansEncPutSymbol(&rans3, &ptr, s3);
185
0
        RansEncPutSymbol(&rans2, &ptr, s2);
186
0
        RansEncPutSymbol(&rans1, &ptr, s1);
187
0
        RansEncPutSymbol(&rans0, &ptr, s0);
188
0
    }
189
190
0
    RansEncFlush(&rans3, &ptr);
191
0
    RansEncFlush(&rans2, &ptr);
192
0
    RansEncFlush(&rans1, &ptr);
193
0
    RansEncFlush(&rans0, &ptr);
194
195
    // Finalise block size and return it
196
0
    *out_size = (out_end - ptr) + tab_size;
197
198
0
    cp = out_buf;
199
200
0
    *cp++ = 0; // order
201
0
    *cp++ = ((*out_size-9)>> 0) & 0xff;
202
0
    *cp++ = ((*out_size-9)>> 8) & 0xff;
203
0
    *cp++ = ((*out_size-9)>>16) & 0xff;
204
0
    *cp++ = ((*out_size-9)>>24) & 0xff;
205
206
0
    *cp++ = (in_size>> 0) & 0xff;
207
0
    *cp++ = (in_size>> 8) & 0xff;
208
0
    *cp++ = (in_size>>16) & 0xff;
209
0
    *cp++ = (in_size>>24) & 0xff;
210
211
0
    memmove(out_buf + tab_size, ptr, out_end-ptr);
212
213
0
    return out_buf;
214
0
}
215
216
typedef struct {
217
    unsigned char R[TOTFREQ];
218
} ari_decoder;
219
220
static
221
unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size,
222
202
                                  unsigned int *out_size) {
223
    /* Load in the static tables */
224
202
    unsigned char *cp = in + 9;
225
202
    unsigned char *cp_end = in + in_size;
226
202
    const uint32_t mask = (1u << TF_SHIFT)-1;
227
202
    int i, j, rle;
228
202
    unsigned int x, y;
229
202
    unsigned int out_sz, in_sz;
230
202
    char *out_buf;
231
202
    RansState R[4];
232
202
    RansState m[4];
233
202
    uint16_t sfreq[TOTFREQ+32];
234
202
    uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t
235
202
    uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t
236
237
202
    if (in_size < 26) // Need at least this many bytes just to start
238
4
        return NULL;
239
240
198
    if (*in++ != 0) // Order-0 check
241
0
        return NULL;
242
    
243
198
    in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24);
244
198
    out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24);
245
198
    if (in_sz != in_size-9)
246
8
        return NULL;
247
248
190
    if (out_sz >= INT_MAX)
249
4
        return NULL; // protect against some overflow cases
250
251
    // For speeding up the fuzzer only.
252
    // Small input can lead to large uncompressed data.
253
    // We reject this as it just slows things up instead of testing more code
254
    // paths (once we've verified a few times for large data).
255
186
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
256
186
    if (out_sz > 100000)
257
8
        return NULL;
258
178
#endif
259
260
178
    out_buf = malloc(out_sz);
261
178
    if (!out_buf)
262
0
        return NULL;
263
264
    //fprintf(stderr, "out_sz=%d\n", out_sz);
265
266
    // Precompute reverse lookup of frequency.
267
178
    rle = x = y = 0;
268
178
    j = *cp++;
269
4.25k
    do {
270
4.25k
        int F, C;
271
4.25k
        if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left
272
4.24k
        if ((F = *cp++) >= 128) {
273
320
            F &= ~128;
274
320
            F = ((F & 127) << 8) | *cp++;
275
320
        }
276
4.24k
        C = x;
277
278
4.24k
        if (x + F > TOTFREQ)
279
12
            goto cleanup;
280
281
655k
        for (y = 0; y < F; y++) {
282
651k
            ssym [y + C] = j;
283
651k
            sfreq[y + C] = F;
284
651k
            sbase[y + C] = y;
285
651k
        }
286
4.23k
        x += F;
287
288
4.23k
        if (!rle && j+1 == *cp) {
289
197
            j = *cp++;
290
197
            rle = *cp++;
291
4.03k
        } else if (rle) {
292
2.26k
            rle--;
293
2.26k
            j++;
294
2.26k
            if (j > 255)
295
1
                goto cleanup;
296
2.26k
        } else {
297
1.76k
            j = *cp++;
298
1.76k
        }
299
4.23k
    } while(j);
300
301
157
    if (x < TOTFREQ-1 || x > TOTFREQ)
302
15
        goto cleanup;
303
142
    if (x != TOTFREQ) {
304
        // Protection against accessing uninitialised memory in the case
305
        // where SUM(freqs) == 4095 and not 4096.
306
46
        ssym [x] = ssym [x-1];
307
46
        sfreq[x] = sfreq[x-1];
308
46
        sbase[x] = sbase[x-1]+1;
309
46
    }
310
311
    // 16 bytes of cp here. Also why cp - 16 in above loop.
312
142
    if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left
313
314
141
    RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup;
315
133
    RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup;
316
132
    RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup;
317
127
    RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup;
318
319
125
    int out_end = (out_sz&~3);
320
125
    cp_end -= 8; // within 8 for simplicity of loop below
321
    // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2
322
951k
    for (i=0; likely(i < out_end); i+=4) {
323
        //                              /curr code
324
        // gcc7  O2 513/497   562/556++ 556/547 ok
325
        // gcc7  O3 566/552   569/553   581/563+
326
        // gcc10 O2 544/538   563/547   541/537-?
327
        // gcc10 O3 531/519   546/530   575/546+
328
        // gcc11 O2 512/490   588/540   540/535 mid
329
        // gcc11 O3 482/471   553/541   549/535
330
        // gcc12 O2 533/526   544/534   539/535
331
        // gcc12 O3 548/533   502/497-- 553/527 ok
332
        // clang10  555/542   564/549   560/541
333
        // clang13  560/553   572/559   556/559
334
950k
        m[0] = R[0] & mask;
335
950k
        R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]];
336
337
950k
        m[1] = R[1] & mask;
338
950k
        R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]];
339
340
950k
        m[2] = R[2] & mask;
341
950k
        R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]];
342
343
950k
        m[3] = R[3] & mask;
344
950k
        R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]];
345
346
        // likely() here harms gcc12 -O3
347
950k
        if (cp<cp_end) {
348
33.5k
            RansDecRenorm2(&R[0], &R[1], &cp);
349
33.5k
            RansDecRenorm2(&R[2], &R[3], &cp);
350
917k
        } else {
351
917k
            RansDecRenormSafe(&R[0], &cp, cp_end+8);
352
917k
            RansDecRenormSafe(&R[1], &cp, cp_end+8);
353
917k
            RansDecRenormSafe(&R[2], &cp, cp_end+8);
354
917k
            RansDecRenormSafe(&R[3], &cp, cp_end+8);
355
917k
        }
356
357
950k
        out_buf[i+0] = ssym[m[0]];
358
950k
        out_buf[i+1] = ssym[m[1]];
359
950k
        out_buf[i+2] = ssym[m[2]];
360
950k
        out_buf[i+3] = ssym[m[3]];
361
950k
    }
362
363
364
125
    switch(out_sz&3) {
365
43
    case 3:
366
43
        out_buf[out_end + 2] = ssym[R[2] & mask];
367
        // fall-through
368
93
    case 2:
369
93
        out_buf[out_end + 1] = ssym[R[1] & mask];
370
        // fall-through
371
104
    case 1:
372
104
        out_buf[out_end] = ssym[R[0] & mask];
373
        // fall-through
374
125
    default:
375
125
        break;
376
125
    }
377
    
378
125
    *out_size = out_sz;
379
125
    return (unsigned char *)out_buf;
380
381
53
 cleanup:
382
53
    free(out_buf);
383
53
    return NULL;
384
125
}
385
386
static
387
unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size,
388
0
                                unsigned int *out_size) {
389
0
    unsigned char *out_buf = NULL, *out_end, *cp;
390
0
    unsigned int tab_size, rle_i, rle_j;
391
392
393
0
    if (in_size < 4)
394
0
        return rans_compress_O0(in, in_size, out_size);
395
396
0
    int (*F)[256];
397
0
    RansEncSymbol (*syms)[256];
398
399
0
    uint8_t *mem = htscodecs_tls_alloc(256 * (sizeof(*syms) + sizeof(*F)));
400
0
    if (!mem)
401
0
        return NULL;
402
0
    syms = (RansEncSymbol (*)[256])mem;
403
0
    F = (int (*)[256])(mem + 256*sizeof(*syms));
404
0
    memset(F, 0, 256*sizeof(*F));
405
406
0
    if (!syms) goto cleanup;
407
0
    int T[256+MAGIC] = {0};
408
0
    int i, j;
409
410
0
    out_buf = malloc(1.05*in_size + 257*257*3 + 9);
411
0
    if (!out_buf) goto cleanup;
412
413
0
    out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9;
414
0
    cp = out_buf+9;
415
416
0
    if (hist1_4(in, in_size, (uint32_t (*)[256])F, (uint32_t *)T) < 0) {
417
0
        free(out_buf);
418
0
        out_buf = NULL;
419
0
        goto cleanup;
420
0
    }
421
422
0
    F[0][in[1*(in_size>>2)]]++;
423
0
    F[0][in[2*(in_size>>2)]]++;
424
0
    F[0][in[3*(in_size>>2)]]++;
425
0
    T[0]+=3;
426
427
    
428
    // Normalise so T[i] == TOTFREQ
429
0
    for (rle_i = i = 0; i < 256; i++) {
430
0
        int t2, m, M;
431
0
        unsigned int x;
432
433
0
        if (T[i] == 0)
434
0
            continue;
435
436
        //uint64_t p = (TOTFREQ * TOTFREQ) / t;
437
0
        double p = ((double)TOTFREQ)/T[i];
438
0
    normalise_harder:
439
0
        for (t2 = m = M = j = 0; j < 256; j++) {
440
0
            if (!F[i][j])
441
0
                continue;
442
443
0
            if (m < F[i][j])
444
0
                m = F[i][j], M = j;
445
446
            //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0)
447
0
            if ((F[i][j] *= p) == 0)
448
0
                F[i][j] = 1;
449
0
            t2 += F[i][j];
450
0
        }
451
452
0
        t2++;
453
0
        if (t2 < TOTFREQ) {
454
0
            F[i][M] += TOTFREQ-t2;
455
0
        } else if (t2-TOTFREQ >= F[i][M]/2) {
456
            // Corner case to avoid excessive frequency reduction
457
0
            p = .98; goto normalise_harder;
458
0
        } else {
459
0
            F[i][M] -= t2-TOTFREQ;
460
0
        }
461
462
        // Store frequency table
463
        // i
464
0
        if (rle_i) {
465
0
            rle_i--;
466
0
        } else {
467
0
            *cp++ = i;
468
            // FIXME: could use order-0 statistics to observe which alphabet
469
            // symbols are present and base RLE on that ordering instead.
470
0
            if (i && T[i-1]) {
471
0
                for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++)
472
0
                    ;
473
0
                rle_i -= i+1;
474
0
                *cp++ = rle_i;
475
0
            }
476
0
        }
477
478
0
        int *F_i_ = F[i];
479
0
        x = 0;
480
0
        rle_j = 0;
481
0
        for (j = 0; j < 256; j++) {
482
0
            if (F_i_[j]) {
483
                //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x);
484
485
                // j
486
0
                if (rle_j) {
487
0
                    rle_j--;
488
0
                } else {
489
0
                    *cp++ = j;
490
0
                    if (!rle_j && j && F_i_[j-1]) {
491
0
                        for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++)
492
0
                            ;
493
0
                        rle_j -= j+1;
494
0
                        *cp++ = rle_j;
495
0
                    }
496
0
                }
497
498
                // F_i_[j]
499
0
                if (F_i_[j]<128) {
500
0
                    *cp++ = F_i_[j];
501
0
                } else {
502
0
                    *cp++ = 128 | (F_i_[j]>>8);
503
0
                    *cp++ = F_i_[j]&0xff;
504
0
                }
505
506
0
                RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT);
507
0
                x += F_i_[j];
508
0
            }
509
0
        }
510
0
        *cp++ = 0;
511
0
    }
512
0
    *cp++ = 0;
513
514
    //write(2, out_buf+4, cp-(out_buf+4));
515
0
    tab_size = cp - out_buf;
516
0
    assert(tab_size < 257*257*3);
517
    
518
0
    RansState rans0, rans1, rans2, rans3;
519
0
    RansEncInit(&rans0);
520
0
    RansEncInit(&rans1);
521
0
    RansEncInit(&rans2);
522
0
    RansEncInit(&rans3);
523
524
0
    uint8_t* ptr = out_end;
525
526
0
    int isz4 = in_size>>2;
527
0
    int i0 = 1*isz4-2;
528
0
    int i1 = 2*isz4-2;
529
0
    int i2 = 3*isz4-2;
530
0
    int i3 = 4*isz4-2;
531
532
0
    unsigned char l0 = in[i0+1];
533
0
    unsigned char l1 = in[i1+1];
534
0
    unsigned char l2 = in[i2+1];
535
0
    unsigned char l3 = in[i3+1];
536
537
    // Deal with the remainder
538
0
    l3 = in[in_size-1];
539
0
    for (i3 = in_size-2; i3 > 4*isz4-2; i3--) {
540
0
        unsigned char c3 = in[i3];
541
0
        RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]);
542
0
        l3 = c3;
543
0
    }
544
545
0
    for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) {
546
0
        unsigned char c3 = in[i3];
547
0
        unsigned char c2 = in[i2];
548
0
        unsigned char c1 = in[i1];
549
0
        unsigned char c0 = in[i0];
550
551
0
        RansEncSymbol *s3 = &syms[c3][l3];
552
0
        RansEncSymbol *s2 = &syms[c2][l2];
553
0
        RansEncSymbol *s1 = &syms[c1][l1];
554
0
        RansEncSymbol *s0 = &syms[c0][l0];
555
556
0
        RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr,
557
0
                          s3, s2, s1, s0);
558
559
0
        l3 = c3;
560
0
        l2 = c2;
561
0
        l1 = c1;
562
0
        l0 = c0;
563
0
    }
564
565
0
    RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]);
566
0
    RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]);
567
0
    RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]);
568
0
    RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]);
569
570
0
    RansEncFlush(&rans3, &ptr);
571
0
    RansEncFlush(&rans2, &ptr);
572
0
    RansEncFlush(&rans1, &ptr);
573
0
    RansEncFlush(&rans0, &ptr);
574
575
0
    *out_size = (out_end - ptr) + tab_size;
576
577
0
    cp = out_buf;
578
0
    *cp++ = 1; // order
579
580
0
    *cp++ = ((*out_size-9)>> 0) & 0xff;
581
0
    *cp++ = ((*out_size-9)>> 8) & 0xff;
582
0
    *cp++ = ((*out_size-9)>>16) & 0xff;
583
0
    *cp++ = ((*out_size-9)>>24) & 0xff;
584
585
0
    *cp++ = (in_size>> 0) & 0xff;
586
0
    *cp++ = (in_size>> 8) & 0xff;
587
0
    *cp++ = (in_size>>16) & 0xff;
588
0
    *cp++ = (in_size>>24) & 0xff;
589
590
0
    memmove(out_buf + tab_size, ptr, out_end-ptr);
591
592
0
 cleanup:
593
0
    htscodecs_tls_free(syms);
594
595
0
    return out_buf;
596
0
}
597
598
static
599
unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size,
600
224
                                  unsigned int *out_size) {
601
    /* Load in the static tables */
602
224
    unsigned char *cp = in + 9;
603
224
    unsigned char *ptr_end = in + in_size;
604
224
    int i, j = -999, rle_i, rle_j;
605
224
    unsigned int x;
606
224
    unsigned int out_sz, in_sz;
607
224
    char *out_buf = NULL;
608
609
    // Sanity checking
610
224
    if (in_size < 27) // Need at least this many bytes to start
611
3
        return NULL;
612
613
221
    if (*in++ != 1) // Order-1 check
614
4
        return NULL;
615
616
217
    in_sz  = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24);
617
217
    out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24);
618
217
    if (in_sz != in_size-9)
619
17
        return NULL;
620
621
200
    if (out_sz >= INT_MAX)
622
4
        return NULL; // protect against some overflow cases
623
624
    // For speeding up the fuzzer only.
625
    // Small input can lead to large uncompressed data.
626
    // We reject this as it just slows things up instead of testing more code
627
    // paths (once we've verified a few times for large data).
628
196
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
629
196
    if (out_sz > 100000)
630
8
        return NULL;
631
188
#endif
632
633
    // Allocate decoding lookup tables
634
188
    RansDecSymbol32 (*syms)[256];
635
188
    uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder)
636
188
                                        + sizeof(*syms));
637
188
    if (!mem)
638
0
        return NULL;
639
188
    ari_decoder *const D = (ari_decoder *)mem;
640
188
    syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder));
641
188
    int16_t map[256], map_i = 0;
642
    
643
188
    memset(map, -1, 256*sizeof(*map));
644
645
188
    if (!D) goto cleanup;
646
    /* These memsets prevent illegal memory access in syms due to
647
       broken compressed data.  As D is calloc'd, all illegal transitions
648
       will end up in either row or column 0 of syms. */
649
188
    memset(&syms[0], 0, sizeof(syms[0]));
650
48.3k
    for (i = 0; i < 256; i++)
651
48.1k
        memset(&syms[i][0], 0, sizeof(syms[0][0]));
652
653
    //fprintf(stderr, "out_sz=%d\n", out_sz);
654
655
    //i = *cp++;
656
188
    rle_i = 0;
657
188
    i = *cp++;
658
569
    do {
659
        // Map arbitrary a,b,c to 0,1,2 to improve cache locality.
660
569
        if (map[i] == -1)
661
546
            map[i] = map_i++;
662
569
        int m_i = map[i];
663
664
569
        rle_j = x = 0;
665
569
        j = *cp++;
666
3.71k
        do {
667
3.71k
            if (map[j] == -1)
668
1.94k
                map[j] = map_i++;
669
670
3.71k
            int F, C;
671
3.71k
            if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
672
3.70k
            if ((F = *cp++) >= 128) {
673
355
                F &= ~128;
674
355
                F = ((F & 127) << 8) | *cp++;
675
355
            }
676
3.70k
            C = x;
677
678
            //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C);
679
680
3.70k
            if (unlikely(!F))
681
449
                F = TOTFREQ;
682
683
3.70k
            RansDecSymbolInit32(&syms[m_i][j], C, F);
684
685
            /* Build reverse lookup table */
686
            //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ);
687
3.70k
            if (x + F > TOTFREQ)
688
34
                goto cleanup;
689
690
3.66k
            memset(&D[m_i].R[x], j, F);
691
3.66k
            x += F;
692
693
3.66k
            if (!rle_j && j+1 == *cp) {
694
126
                j = *cp++;
695
126
                rle_j = *cp++;
696
3.54k
            } else if (rle_j) {
697
789
                rle_j--;
698
789
                j++;
699
789
                if (j > 255)
700
1
                    goto cleanup;
701
2.75k
            } else {
702
2.75k
                j = *cp++;
703
2.75k
            }
704
3.66k
        } while(j);
705
706
524
        if (x < TOTFREQ-1 || x > TOTFREQ)
707
5
            goto cleanup;
708
519
        if (x < TOTFREQ) // historically we fill 4095, not 4096
709
18
            D[i].R[x] = D[i].R[x-1];
710
711
519
        if (!rle_i && i+1 == *cp) {
712
17
            i = *cp++;
713
17
            rle_i = *cp++;
714
502
        } else if (rle_i) {
715
336
            rle_i--;
716
336
            i++;
717
336
            if (i > 255)
718
0
                goto cleanup;
719
336
        } else {
720
166
            i = *cp++;
721
166
        }
722
519
    } while (i);
723
35.4k
    for (i = 0; i < 256; i++)
724
35.3k
        if (map[i] == -1)
725
33.9k
            map[i] = 0;
726
727
138
    RansState rans0, rans1, rans2, rans3;
728
138
    uint8_t *ptr = cp;
729
138
    if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left
730
137
    RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup;
731
133
    RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup;
732
126
    RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup;
733
125
    RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup;
734
735
120
    RansState R[4];
736
120
    R[0] = rans0;
737
120
    R[1] = rans1;
738
120
    R[2] = rans2;
739
120
    R[3] = rans3;
740
741
120
    unsigned int isz4 = out_sz>>2;
742
120
    uint32_t l0 = 0;
743
120
    uint32_t l1 = 0;
744
120
    uint32_t l2 = 0;
745
120
    uint32_t l3 = 0;
746
    
747
120
    unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4};
748
749
    /* Allocate output buffer */
750
120
    out_buf = malloc(out_sz);
751
120
    if (!out_buf) goto cleanup;
752
753
120
    uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)];
754
120
    uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)];
755
120
    uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)];
756
120
    uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)];
757
758
120
    ptr_end -= 8;
759
565k
    for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) {
760
        // seq4-head2: file q40b
761
        //          O3      O2
762
        // gcc7     296/291 290/260
763
        // gcc10    292/292 290/261
764
        // gcc11    293/293 290/265
765
        // gcc12    293/290 291/266
766
        // clang10  293/290 296/272
767
        // clang13  300/290 290/266
768
565k
        out_buf[i4[0]] = cc0;
769
565k
        out_buf[i4[1]] = cc1;
770
565k
        out_buf[i4[2]] = cc2;
771
565k
        out_buf[i4[3]] = cc3;
772
773
565k
        RansDecSymbol32 s[4] = {
774
565k
            syms[l0][cc0],
775
565k
            syms[l1][cc1],
776
565k
            syms[l2][cc2],
777
565k
            syms[l3][cc3],
778
565k
        };
779
565k
        RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT);
780
565k
        RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT);
781
565k
        RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT);
782
565k
        RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT);
783
784
        // Likely here helps speed of high-entropy data by 10-11%,
785
        // but harms low entropy-data speed by 3-4%.
786
565k
        if ((ptr < ptr_end)) {
787
24.0k
            RansDecRenorm2(&R[0], &R[1], &ptr);
788
24.0k
            RansDecRenorm2(&R[2], &R[3], &ptr);
789
541k
        } else {
790
541k
            RansDecRenormSafe(&R[0], &ptr, ptr_end+8);
791
541k
            RansDecRenormSafe(&R[1], &ptr, ptr_end+8);
792
541k
            RansDecRenormSafe(&R[2], &ptr, ptr_end+8);
793
541k
            RansDecRenormSafe(&R[3], &ptr, ptr_end+8);
794
541k
        }
795
796
565k
        l0 = map[cc0];
797
565k
        l1 = map[cc1];
798
565k
        l2 = map[cc2];
799
565k
        l3 = map[cc3];
800
801
565k
        cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)];
802
565k
        cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)];
803
565k
        cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)];
804
565k
        cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)];
805
565k
    }
806
807
    // Remainder
808
248
    for (; i4[3] < out_sz; i4[3]++) {
809
128
        unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)];
810
128
        out_buf[i4[3]] = c3;
811
812
128
        uint32_t m = R[3] & ((1u << TF_SHIFT)-1);
813
128
        R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start;
814
128
        RansDecRenormSafe(&R[3], &ptr, ptr_end+8);
815
128
        l3 = map[c3];
816
128
    }
817
    
818
120
    *out_size = out_sz;
819
820
188
 cleanup:
821
188
    htscodecs_tls_free(D);
822
823
188
    return (unsigned char *)out_buf;
824
120
}
825
826
/*-----------------------------------------------------------------------------
827
 * Simple interface to the order-0 vs order-1 encoders and decoders.
828
 */
829
unsigned char *rans_compress(unsigned char *in, unsigned int in_size,
830
0
                             unsigned int *out_size, int order) {
831
0
    if (in_size > INT_MAX) {
832
0
        *out_size = 0;
833
0
        return NULL;
834
0
    }
835
836
0
    return order
837
0
        ? rans_compress_O1(in, in_size, out_size)
838
0
        : rans_compress_O0(in, in_size, out_size);
839
0
}
840
841
unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size,
842
427
                               unsigned int *out_size) {
843
    /* Both rans_uncompress functions need to be able to read at least 9
844
       bytes. */
845
427
    if (in_size < 9)
846
1
        return NULL;
847
426
    return in[0]
848
426
        ? rans_uncompress_O1(in, in_size, out_size)
849
426
        : rans_uncompress_O0(in, in_size, out_size);
850
427
}