Coverage Report

Created: 2023-01-17 06:24

/src/htslib/htscodecs/htscodecs/rANS_static32x16pr.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (c) 2017-2021 Genome Research Ltd.
3
 * Author(s): James Bonfield
4
 *
5
 * Redistribution and use in source and binary forms, with or without
6
 * modification, are permitted provided that the following conditions are met:
7
 *
8
 *    1. Redistributions of source code must retain the above copyright notice,
9
 *       this list of conditions and the following disclaimer.
10
 *
11
 *    2. Redistributions in binary form must reproduce the above
12
 *       copyright notice, this list of conditions and the following
13
 *       disclaimer in the documentation and/or other materials provided
14
 *       with the distribution.
15
 *
16
 *    3. Neither the names Genome Research Ltd and Wellcome Trust Sanger
17
 *       Institute nor the names of its contributors may be used to endorse
18
 *       or promote products derived from this software without specific
19
 *       prior written permission.
20
 *
21
 * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS
22
 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
24
 * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH
25
 * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
26
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
27
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
28
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
29
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
30
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
31
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
32
 */
33
34
#include "config.h"
35
36
#include <stdint.h>
37
#include <stdlib.h>
38
#include <stdio.h>
39
#include <assert.h>
40
#include <string.h>
41
#include <limits.h>
42
43
#include "rANS_word.h"
44
#include "rANS_static4x16.h"
45
#include "rANS_static16_int.h"
46
#include "varint.h"
47
#include "utils.h"
48
49
0
#define TF_SHIFT 12
50
0
#define TOTFREQ (1<<TF_SHIFT)
51
52
53
// 9-11 is considerably faster in the O1 variant due to reduced table size.
54
// We auto-tune between 10 and 12 though.  Anywhere from 9 to 14 are viable.
55
#ifndef TF_SHIFT_O1
56
#define TF_SHIFT_O1 12
57
#endif
58
#ifndef TF_SHIFT_O1_FAST
59
#define TF_SHIFT_O1_FAST 10
60
#endif
61
0
#define TOTFREQ_O1 (1<<TF_SHIFT_O1)
62
0
#define TOTFREQ_O1_FAST (1<<TF_SHIFT_O1_FAST)
63
64
65
0
#define NX 32
66
67
unsigned char *rans_compress_O0_32x16(unsigned char *in,
68
                                      unsigned int in_size,
69
                                      unsigned char *out,
70
0
                                      unsigned int *out_size) {
71
0
    unsigned char *cp, *out_end, *out_free = NULL;
72
0
    RansEncSymbol syms[256];
73
0
    RansState ransN[NX];
74
0
    uint8_t* ptr;
75
0
    uint32_t F[256+MAGIC] = {0};
76
0
    int i, j, tab_size = 0, x, z;
77
    // -20 for order/size/meta
78
0
    unsigned int bound = rans_compress_bound_4x16(in_size,0)-20;
79
80
0
    if (!out) {
81
0
        *out_size = bound;
82
0
        out = out_free = malloc(*out_size);
83
0
    }
84
0
    if (!out || bound > *out_size)
85
0
        return NULL;
86
87
    // If "out" isn't word aligned, tweak out_end/ptr to ensure it is.
88
    // We already added more round in bound to allow for this.
89
0
    if (((size_t)out)&1)
90
0
        bound--;
91
0
    ptr = out_end = out + bound;
92
93
0
    if (in_size == 0)
94
0
        goto empty;
95
96
    // Compute statistics
97
0
    double e = hist8e(in, in_size, F);
98
0
    int low_ent = e < 2;
99
    //hist8(in, in_size, F); int low_ent = 0;
100
101
    // Normalise so frequences sum to power of 2
102
0
    uint32_t fsum = in_size;
103
0
    uint32_t max_val = round2(fsum);
104
0
    if (max_val > TOTFREQ)
105
0
        max_val = TOTFREQ;
106
107
0
    if (normalise_freq(F, fsum, max_val) < 0) {
108
0
        free(out_free);
109
0
        return NULL;
110
0
    }
111
0
    fsum=max_val;
112
113
0
    cp = out;
114
0
    cp += encode_freq(cp, F);
115
0
    tab_size = cp-out;
116
    //write(2, out+4, cp-(out+4));
117
118
0
    if (normalise_freq(F, fsum, TOTFREQ) < 0) {
119
0
        free(out_free);
120
0
        return NULL;
121
0
    }
122
123
    // Encode statistics.
124
0
    for (x = j = 0; j < 256; j++) {
125
0
        if (F[j]) {
126
0
            RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT);
127
0
            x += F[j];
128
0
        }
129
0
    }
130
131
0
    for (z = 0; z < NX; z++)
132
0
      RansEncInit(&ransN[z]);
133
134
0
    z = i = in_size&(NX-1);
135
0
    while (z-- > 0)
136
0
      RansEncPutSymbol(&ransN[z], &ptr, &syms[in[in_size-(i-z)]]);
137
138
0
    if (low_ent) {
139
        // orig
140
        // gcc   446
141
        // clang 427
142
0
        for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) {
143
0
            for (z = NX-1; z >= 0; z-=4) {
144
0
                RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
145
0
                RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
146
0
                RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
147
0
                RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
148
0
                RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0);
149
0
                RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1);
150
0
                RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2);
151
0
                RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3);
152
0
                if (NX%8 == 0) {
153
0
                    z -= 4;
154
0
                    RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
155
0
                    RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
156
0
                    RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
157
0
                    RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
158
0
                    RansEncPutSymbol_branched(&ransN[z-0], &ptr, s0);
159
0
                    RansEncPutSymbol_branched(&ransN[z-1], &ptr, s1);
160
0
                    RansEncPutSymbol_branched(&ransN[z-2], &ptr, s2);
161
0
                    RansEncPutSymbol_branched(&ransN[z-3], &ptr, s3);
162
0
                }
163
0
            }
164
0
            if (z < -1) abort();
165
0
        }
166
0
    } else {
167
        // Branchless version optimises poorly with gcc unless we have
168
        // AVX2 capability, so have a custom rewrite of it.
169
0
        uint16_t* ptr16 = (uint16_t *)ptr;
170
0
        for (i=(in_size &~(NX-1)); likely(i>0); i-=NX) {
171
            // Unrolled copy of below, because gcc doesn't optimise this
172
            // well in the original form.
173
            //
174
            // Gcc11:   328 MB/s (this) vs 208 MB/s (orig)
175
            // Clang10: 352 MB/s (this) vs 340 MB/s (orig)
176
            //
177
            // for (z = NX-1; z >= 0; z-=4) {
178
            //  RansEncSymbol *s0 = &syms[in[i-(NX-z+0)]];
179
            //  RansEncSymbol *s1 = &syms[in[i-(NX-z+1)]];
180
            //  RansEncSymbol *s2 = &syms[in[i-(NX-z+2)]];
181
            //  RansEncSymbol *s3 = &syms[in[i-(NX-z+3)]];
182
            //  RansEncPutSymbol(&ransN[z-0], &ptr, s0);
183
            //  RansEncPutSymbol(&ransN[z-1], &ptr, s1);
184
            //  RansEncPutSymbol(&ransN[z-2], &ptr, s2);
185
            //  RansEncPutSymbol(&ransN[z-3], &ptr, s3);
186
            // }
187
188
0
            for (z = NX-1; z >= 0; z-=4) {
189
                // RansEncPutSymbol added in-situ
190
0
                RansState *rp = &ransN[z]-3;
191
0
                RansEncSymbol *sy[4];
192
0
                uint8_t *C = &in[i-(NX-z)]-3;
193
194
0
                sy[0] = &syms[C[3]];
195
0
                sy[1] = &syms[C[2]];
196
197
0
                int c0  = rp[3-0] > sy[0]->x_max;
198
0
                int c1  = rp[3-1] > sy[1]->x_max;
199
200
0
#ifdef HTSCODECS_LITTLE_ENDIAN
201
0
                ptr16[-1] = rp[3-0]; ptr16 -= c0;
202
0
                ptr16[-1] = rp[3-1]; ptr16 -= c1;
203
#else
204
                ((uint8_t *)&ptr16[-1])[0] = rp[3-0];
205
                ((uint8_t *)&ptr16[-1])[1] = rp[3-0]>>8;
206
                ptr16 -= c0;
207
                ((uint8_t *)&ptr16[-1])[0] = rp[3-1];
208
                ((uint8_t *)&ptr16[-1])[1] = rp[3-1]>>8;
209
                ptr16 -= c1;
210
#endif
211
212
0
                rp[3-0] = c0 ? rp[3-0]>>16 : rp[3-0];
213
0
                rp[3-1] = c1 ? rp[3-1]>>16 : rp[3-1];
214
215
0
                sy[2] = &syms[C[1]];
216
0
                sy[3] = &syms[C[0]];
217
218
0
                int c2  = rp[3-2] > sy[2]->x_max;
219
0
                int c3  = rp[3-3] > sy[3]->x_max;
220
0
#ifdef HTSCODECS_LITTLE_ENDIAN
221
0
                ptr16[-1] = rp[3-2]; ptr16 -= c2;
222
0
                ptr16[-1] = rp[3-3]; ptr16 -= c3;
223
#else
224
                ((uint8_t *)&ptr16[-1])[0] = rp[3-2];
225
                ((uint8_t *)&ptr16[-1])[1] = rp[3-2]>>8;
226
                ptr16 -= c2;
227
                ((uint8_t *)&ptr16[-1])[0] = rp[3-3];
228
                ((uint8_t *)&ptr16[-1])[1] = rp[3-3]>>8;
229
                ptr16 -= c3;
230
#endif
231
0
                rp[3-2] = c2 ? rp[3-2]>>16 : rp[3-2];
232
0
                rp[3-3] = c3 ? rp[3-3]>>16 : rp[3-3];
233
234
0
                int k;
235
0
                for (k = 0; k < 4; k++) {
236
0
                    uint64_t r64 = (uint64_t)rp[3-k];
237
0
                    uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift;
238
0
                    rp[3-k] += sy[k]->bias + q*sy[k]->cmpl_freq;
239
0
                }
240
0
            }
241
0
            if (z < -1) abort();
242
0
        }
243
0
        ptr = (uint8_t *)ptr16;
244
0
    }
245
0
    for (z = NX-1; z >= 0; z--)
246
0
        RansEncFlush(&ransN[z], &ptr);
247
248
0
 empty:
249
    // Finalise block size and return it
250
0
    *out_size = (out_end - ptr) + tab_size;
251
252
0
    memmove(out + tab_size, ptr, out_end-ptr);
253
254
0
    return out;
255
0
}
256
257
unsigned char *rans_uncompress_O0_32x16(unsigned char *in,
258
                                        unsigned int in_size,
259
                                        unsigned char *out,
260
0
                                        unsigned int out_sz) {
261
0
    if (in_size < 16) // 4-states at least
262
0
        return NULL;
263
264
0
    if (out_sz >= INT_MAX)
265
0
        return NULL; // protect against some overflow cases
266
267
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
268
0
    if (out_sz > 100000)
269
0
        return NULL;
270
0
#endif
271
272
    /* Load in the static tables */
273
0
    unsigned char *cp = in, *out_free = NULL;
274
0
    unsigned char *cp_end = in + in_size;
275
0
    int i;
276
0
    uint32_t s3[TOTFREQ]; // For TF_SHIFT <= 12
277
278
0
    if (!out)
279
0
        out_free = out = malloc(out_sz);
280
0
    if (!out)
281
0
        return NULL;
282
283
    // Precompute reverse lookup of frequency.
284
0
    uint32_t F[256] = {0}, fsum;
285
0
    int fsz = decode_freq(cp, cp_end, F, &fsum);
286
0
    if (!fsz)
287
0
        goto err;
288
0
    cp += fsz;
289
290
0
    normalise_freq_shift(F, fsum, TOTFREQ);
291
292
    // Build symbols; fixme, do as part of decode, see the _d variant
293
0
    if (rans_F_to_s3(F, TF_SHIFT, s3))
294
0
        goto err;
295
296
0
    if (cp_end - cp < NX * 4)
297
0
        goto err;
298
299
0
    int z;
300
0
    RansState R[NX];
301
0
    for (z = 0; z < NX; z++) {
302
0
        RansDecInit(&R[z], &cp);
303
0
        if (R[z] < RANS_BYTE_L)
304
0
            goto err;
305
0
    }
306
307
0
    int out_end = (out_sz&~(NX-1));
308
0
    const uint32_t mask = (1u << TF_SHIFT)-1;
309
0
    cp_end -= NX*2; // worst case for renorm bytes
310
311
    // assume NX is divisible by 4
312
0
    assert(NX%4==0);
313
314
    // Unsafe loop with no ptr overflow checking within loop itself
315
0
    for (i=0; likely(i < out_end && cp < cp_end); i+=NX) {
316
0
        for (z = 0; z < NX; z+=4) {
317
0
            uint32_t S[4];
318
0
            S[0] = s3[R[z+0] & mask];
319
0
            S[1] = s3[R[z+1] & mask];
320
0
            S[2] = s3[R[z+2] & mask];
321
0
            S[3] = s3[R[z+3] & mask];
322
323
0
            R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
324
0
                + ((S[0]>>8) & mask);
325
0
            R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
326
0
                + ((S[1]>>8) & mask);
327
0
            R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
328
0
                + ((S[2]>>8) & mask);
329
0
            R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
330
0
                + ((S[3]>>8) & mask);
331
332
0
            out[i+z+0] = S[0];
333
0
            out[i+z+1] = S[1];
334
0
            out[i+z+2] = S[2];
335
0
            out[i+z+3] = S[3];
336
337
0
            RansDecRenorm(&R[z+0], &cp);
338
0
            RansDecRenorm(&R[z+1], &cp);
339
0
            RansDecRenorm(&R[z+2], &cp);
340
0
            RansDecRenorm(&R[z+3], &cp);
341
342
0
            if (NX%8==0) {
343
0
                z += 4;
344
0
                S[0] = s3[R[z+0] & mask];
345
0
                S[1] = s3[R[z+1] & mask];
346
0
                S[2] = s3[R[z+2] & mask];
347
0
                S[3] = s3[R[z+3] & mask];
348
349
0
                R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
350
0
                    + ((S[0]>>8) & mask);
351
0
                R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
352
0
                    + ((S[1]>>8) & mask);
353
0
                R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
354
0
                    + ((S[2]>>8) & mask);
355
0
                R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
356
0
                    + ((S[3]>>8) & mask);
357
358
0
                out[i+z+0] = S[0];
359
0
                out[i+z+1] = S[1];
360
0
                out[i+z+2] = S[2];
361
0
                out[i+z+3] = S[3];
362
363
0
                RansDecRenorm(&R[z+0], &cp);
364
0
                RansDecRenorm(&R[z+1], &cp);
365
0
                RansDecRenorm(&R[z+2], &cp);
366
0
                RansDecRenorm(&R[z+3], &cp);
367
0
            }
368
0
        }
369
0
    }
370
371
    // Safe loop
372
0
    for (; i < out_end; i+=NX) {
373
0
        for (z = 0; z < NX; z+=4) {
374
0
            uint32_t S[4];
375
0
            S[0] = s3[R[z+0] & mask];
376
0
            S[1] = s3[R[z+1] & mask];
377
0
            S[2] = s3[R[z+2] & mask];
378
0
            S[3] = s3[R[z+3] & mask];
379
380
0
            R[z+0] = (S[0]>>(TF_SHIFT+8)) * (R[z+0] >> TF_SHIFT)
381
0
                + ((S[0]>>8) & mask);
382
0
            R[z+1] = (S[1]>>(TF_SHIFT+8)) * (R[z+1] >> TF_SHIFT)
383
0
                + ((S[1]>>8) & mask);
384
0
            R[z+2] = (S[2]>>(TF_SHIFT+8)) * (R[z+2] >> TF_SHIFT)
385
0
                + ((S[2]>>8) & mask);
386
0
            R[z+3] = (S[3]>>(TF_SHIFT+8)) * (R[z+3] >> TF_SHIFT)
387
0
                + ((S[3]>>8) & mask);
388
389
0
            out[i+z+0] = S[0];
390
0
            out[i+z+1] = S[1];
391
0
            out[i+z+2] = S[2];
392
0
            out[i+z+3] = S[3];
393
394
0
            RansDecRenormSafe(&R[z+0], &cp, cp_end+NX*2);
395
0
            RansDecRenormSafe(&R[z+1], &cp, cp_end+NX*2);
396
0
            RansDecRenormSafe(&R[z+2], &cp, cp_end+NX*2);
397
0
            RansDecRenormSafe(&R[z+3], &cp, cp_end+NX*2);
398
0
        }
399
0
    }
400
401
0
    for (z = out_sz & (NX-1); z-- > 0; )
402
0
        out[out_end + z] = s3[R[z] & mask];
403
404
    //fprintf(stderr, "    0 Decoded %d bytes\n", (int)(cp-in)); //c-size
405
406
0
    return out;
407
408
0
 err:
409
0
    free(out_free);
410
0
    return NULL;
411
0
}
412
413
414
//-----------------------------------------------------------------------------
415
unsigned char *rans_compress_O1_32x16(unsigned char *in,
416
                                      unsigned int in_size,
417
                                      unsigned char *out,
418
0
                                      unsigned int *out_size) {
419
0
    unsigned char *cp, *out_end, *out_free = NULL;
420
0
    unsigned int tab_size;
421
0
    int bound = rans_compress_bound_4x16(in_size,1)-20, z;
422
0
    RansState ransN[NX];
423
424
0
    if (in_size < NX) // force O0 instead
425
0
        return NULL;
426
427
0
    if (!out) {
428
0
        *out_size = bound;
429
0
        out_free = out = malloc(*out_size);
430
0
    }
431
0
    if (!out || bound > *out_size)
432
0
        return NULL;
433
434
0
    if (((size_t)out)&1)
435
0
        bound--;
436
0
    out_end = out + bound;
437
438
0
    RansEncSymbol (*syms)[256] = htscodecs_tls_alloc(256 * (sizeof(*syms)));
439
0
    if (!syms) {
440
0
        free(out_free);
441
0
        return NULL;
442
0
    }
443
444
0
    cp = out;
445
0
    int shift = encode_freq1(in, in_size, 32, syms, &cp); 
446
0
    if (shift < 0) {
447
0
        free(out_free);
448
0
        htscodecs_tls_free(syms);
449
0
        return NULL;
450
0
    }
451
0
    tab_size = cp - out;
452
453
0
    for (z = 0; z < NX; z++)
454
0
      RansEncInit(&ransN[z]);
455
456
0
    uint8_t* ptr = out_end;
457
458
0
    int iN[NX], isz4 = in_size/NX, i;
459
0
    for (z = 0; z < NX; z++)
460
0
        iN[z] = (z+1)*isz4-2;
461
462
0
    unsigned char lN[NX];
463
0
    for (z = 0; z < NX; z++)
464
0
        lN[z] = in[iN[z]+1];
465
466
    // Deal with the remainder
467
0
    z = NX-1;
468
0
    lN[z] = in[in_size-1];
469
0
    for (iN[z] = in_size-2; iN[z] > NX*isz4-2; iN[z]--) {
470
0
        unsigned char c = in[iN[z]];
471
0
        RansEncPutSymbol(&ransN[z], &ptr, &syms[c][lN[z]]);
472
0
        lN[z] = c;
473
0
    }
474
475
0
    unsigned char *i32[NX];
476
0
    for (i = 0; i < NX; i++)
477
0
        i32[i] = &in[iN[i]];
478
479
0
    for (; likely(i32[0] >= in); ) {
480
0
        uint16_t *ptr16 = (uint16_t *)ptr;
481
0
        for (z = NX-1; z >= 0; z-=4) {
482
0
            RansEncSymbol *sy[4];
483
0
            int k;
484
485
0
            for (k = 0; k < 4; k++) {
486
0
                sy[k] = &syms[*i32[z-k]][lN[z-k]];
487
0
                lN[z-k] = *i32[z-k]--;
488
0
            }
489
490
            // RansEncPutSymbol added in-situ
491
0
            for (k = 0; k < 4; k++) {
492
0
                int c = ransN[z-k] > sy[k]->x_max;
493
0
#ifdef HTSCODECS_LITTLE_ENDIAN
494
0
                ptr16[-1] = ransN[z-k];
495
#else
496
                ((uint8_t *)&ptr16[-1])[0] = ransN[z-k];
497
                ((uint8_t *)&ptr16[-1])[1] = ransN[z-k]>>8;
498
#endif
499
0
                ptr16 -= c;
500
                //ransN[z-k] >>= c<<4;
501
0
                ransN[z-k] = c ? ransN[z-k]>>16 : ransN[z-k];
502
0
            }
503
504
0
            for (k = 0; k < 4; k++) {
505
0
                uint64_t r64 = ransN[z-k];
506
0
                uint32_t q = (r64 * sy[k]->rcp_freq) >> sy[k]->rcp_shift;
507
0
                ransN[z-k] += sy[k]->bias + q*sy[k]->cmpl_freq;
508
0
            }
509
0
        }
510
0
        ptr = (uint8_t *)ptr16;
511
0
    }
512
513
0
    for (z = NX-1; z>=0; z--)
514
0
        RansEncPutSymbol(&ransN[z], &ptr, &syms[0][lN[z]]);
515
516
0
    for (z = NX-1; z>=0; z--)
517
0
        RansEncFlush(&ransN[z], &ptr);
518
519
0
    *out_size = (out_end - ptr) + tab_size;
520
521
0
    cp = out;
522
0
    memmove(out + tab_size, ptr, out_end-ptr);
523
524
0
    htscodecs_tls_free(syms);
525
0
    return out;
526
0
}
527
528
//#define MAGIC2 111
529
0
#define MAGIC2 179
530
//#define MAGIC2 0
531
532
unsigned char *rans_uncompress_O1_32x16(unsigned char *in,
533
                                        unsigned int in_size,
534
                                        unsigned char *out,
535
0
                                        unsigned int out_sz) {
536
0
    if (in_size < NX*4) // 4-states at least
537
0
        return NULL;
538
539
0
    if (out_sz >= INT_MAX)
540
0
        return NULL; // protect against some overflow cases
541
542
0
#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
543
0
    if (out_sz > 100000)
544
0
        return NULL;
545
0
#endif
546
547
    /* Load in the static tables */
548
0
    unsigned char *cp = in, *cp_end = in+in_size, *out_free = NULL;
549
0
    unsigned char *c_freq = NULL;
550
0
    int i;
551
552
    /*
553
     * Somewhat complex memory layout.
554
     * With shift==12 (TF_SHIFT_O1) we fill out use both sfb and fb.
555
     * With shift==10 (...O1_FAST)  we fill out and use s3 only.
556
     *
557
     * sfb+fb is larger, therefore we allocate this much memory.
558
     */
559
0
    uint8_t *sfb_ = htscodecs_tls_alloc(256*
560
0
                                        ((TOTFREQ_O1+MAGIC2)*sizeof(*sfb_)
561
0
                                         +256 * sizeof(fb_t)));
562
0
    if (!sfb_)
563
0
        return NULL;
564
565
    // sfb and fb are consecutive
566
0
    uint8_t *sfb[257];
567
0
    if ((*cp >> 4) == TF_SHIFT_O1) {
568
0
        for (i = 0; i <= 256; i++)
569
0
            sfb[i]=  sfb_ + i*(TOTFREQ_O1+MAGIC2);
570
0
    } else {
571
0
        for (i = 0; i <= 256; i++)
572
0
            sfb[i]=  sfb_ + i*(TOTFREQ_O1_FAST+MAGIC2);
573
0
    }
574
0
    fb_t (*fb)[256] = (fb_t (*)[256]) sfb[256];
575
576
    // NOTE: s3 overlaps sfb/fb
577
0
    uint32_t (*s3)[TOTFREQ_O1_FAST] = (uint32_t (*)[TOTFREQ_O1_FAST])sfb_;
578
579
0
    if (!out)
580
0
        out_free = out = malloc(out_sz);
581
582
0
    if (!out)
583
0
        goto err;
584
585
    //fprintf(stderr, "out_sz=%d\n", out_sz);
586
587
    // compressed header? If so uncompress it
588
0
    unsigned char *tab_end = NULL;
589
0
    unsigned char *c_freq_end = cp_end;
590
0
    unsigned int shift = *cp >> 4;
591
0
    if (*cp++ & 1) {
592
0
        uint32_t u_freq_sz, c_freq_sz;
593
0
        cp += var_get_u32(cp, cp_end, &u_freq_sz);
594
0
        cp += var_get_u32(cp, cp_end, &c_freq_sz);
595
0
        if (c_freq_sz >= cp_end - cp - 16)
596
0
            goto err;
597
0
        tab_end = cp + c_freq_sz;
598
0
        if (!(c_freq = rans_uncompress_O0_4x16(cp, c_freq_sz, NULL,u_freq_sz)))
599
0
            goto err;
600
0
        cp = c_freq;
601
0
        c_freq_end = c_freq + u_freq_sz;
602
0
    }
603
604
    // Decode order-0 symbol list; avoids needing in order-1 tables
605
0
    cp += decode_freq1(cp, c_freq_end, shift, NULL, s3, sfb, fb);
606
607
0
    if (tab_end)
608
0
        cp = tab_end;
609
0
    free(c_freq);
610
0
    c_freq = NULL;
611
612
0
    if (cp_end - cp < NX * 4)
613
0
        goto err;
614
615
0
    RansState R[NX];
616
0
    uint8_t *ptr = cp, *ptr_end = in + in_size - 2*NX;
617
0
    int z;
618
0
    for (z = 0; z < NX; z++) {
619
0
        RansDecInit(&R[z], &ptr);
620
0
        if (R[z] < RANS_BYTE_L)
621
0
            goto err;
622
0
    }
623
624
0
    int isz4 = out_sz/NX;
625
0
    int i4[NX], l[NX] = {0};
626
0
    for (z = 0; z < NX; z++)
627
0
        i4[z] = z*isz4;
628
629
0
    const int low_ent = in_size < 0.2 * out_sz;
630
631
    // Around 15% faster to specialise for 10/12 than to have one
632
    // loop with shift as a variable.
633
0
    if (shift == TF_SHIFT_O1) {
634
        // TF_SHIFT_O1 = 12
635
0
        const uint32_t mask = ((1u << TF_SHIFT_O1)-1);
636
0
        for (; likely(i4[0] < isz4);) {
637
0
            for (z = 0; z < NX; z+=4) {
638
0
                uint16_t m[4], c[4];
639
640
0
                c[0] = sfb[l[z+0]][m[0] = R[z+0] & mask];
641
0
                c[1] = sfb[l[z+1]][m[1] = R[z+1] & mask];
642
0
                c[2] = sfb[l[z+2]][m[2] = R[z+2] & mask];
643
0
                c[3] = sfb[l[z+3]][m[3] = R[z+3] & mask];
644
645
0
                R[z+0] = fb[l[z+0]][c[0]].f * (R[z+0]>>TF_SHIFT_O1);
646
0
                R[z+0] += m[0] - fb[l[z+0]][c[0]].b;
647
648
0
                R[z+1] = fb[l[z+1]][c[1]].f * (R[z+1]>>TF_SHIFT_O1);
649
0
                R[z+1] += m[1] - fb[l[z+1]][c[1]].b;
650
651
0
                R[z+2] = fb[l[z+2]][c[2]].f * (R[z+2]>>TF_SHIFT_O1);
652
0
                R[z+2] += m[2] - fb[l[z+2]][c[2]].b;
653
654
0
                R[z+3] = fb[l[z+3]][c[3]].f * (R[z+3]>>TF_SHIFT_O1);
655
0
                R[z+3] += m[3] - fb[l[z+3]][c[3]].b;
656
657
0
                out[i4[z+0]++] = l[z+0] = c[0];
658
0
                out[i4[z+1]++] = l[z+1] = c[1];
659
0
                out[i4[z+2]++] = l[z+2] = c[2];
660
0
                out[i4[z+3]++] = l[z+3] = c[3];
661
662
0
                if (!low_ent && likely(ptr < ptr_end)) {
663
0
                    RansDecRenorm(&R[z+0], &ptr);
664
0
                    RansDecRenorm(&R[z+1], &ptr);
665
0
                    RansDecRenorm(&R[z+2], &ptr);
666
0
                    RansDecRenorm(&R[z+3], &ptr);
667
0
                } else {
668
0
                    RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX);
669
0
                    RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX);
670
0
                    RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX);
671
0
                    RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX);
672
0
                }
673
0
            }
674
0
        }
675
676
        // Remainder
677
0
        for (; i4[NX-1] < out_sz; i4[NX-1]++) {
678
0
            uint32_t m = R[NX-1] & ((1u<<TF_SHIFT_O1)-1);
679
0
            unsigned char c = sfb[l[NX-1]][m];
680
0
            out[i4[NX-1]] = c;
681
0
            R[NX-1] = fb[l[NX-1]][c].f * (R[NX-1]>>TF_SHIFT_O1) +
682
0
                m - fb[l[NX-1]][c].b;
683
0
            RansDecRenormSafe(&R[NX-1], &ptr, ptr_end + 2*NX);
684
0
            l[NX-1] = c;
685
0
        }
686
0
    } else {
687
        // TF_SHIFT_O1 = 10
688
0
        const uint32_t mask = ((1u << TF_SHIFT_O1_FAST)-1);
689
0
        for (; likely(i4[0] < isz4);) {
690
0
            for (z = 0; z < NX; z+=4) {
691
                // Merged sfb and fb into single s3 lookup.
692
                // The m[4] array completely vanishes in this method.
693
0
                uint32_t S[4] = {
694
0
                    s3[l[z+0]][R[z+0] & mask],
695
0
                    s3[l[z+1]][R[z+1] & mask],
696
0
                    s3[l[z+2]][R[z+2] & mask],
697
0
                    s3[l[z+3]][R[z+3] & mask],
698
0
                };
699
700
0
                l[z+0] = out[i4[z+0]++] = S[0];
701
0
                l[z+1] = out[i4[z+1]++] = S[1];
702
0
                l[z+2] = out[i4[z+2]++] = S[2];
703
0
                l[z+3] = out[i4[z+3]++] = S[3];
704
705
0
                uint32_t F[4] = {
706
0
                    S[0]>>(TF_SHIFT_O1_FAST+8),
707
0
                    S[1]>>(TF_SHIFT_O1_FAST+8),
708
0
                    S[2]>>(TF_SHIFT_O1_FAST+8),
709
0
                    S[3]>>(TF_SHIFT_O1_FAST+8),
710
0
                };
711
0
                uint32_t B[4] = {
712
0
                    (S[0]>>8) & mask,
713
0
                    (S[1]>>8) & mask,
714
0
                    (S[2]>>8) & mask,
715
0
                    (S[3]>>8) & mask,
716
0
                };
717
718
0
                R[z+0] = F[0] * (R[z+0]>>TF_SHIFT_O1_FAST) + B[0];
719
0
                R[z+1] = F[1] * (R[z+1]>>TF_SHIFT_O1_FAST) + B[1];
720
0
                R[z+2] = F[2] * (R[z+2]>>TF_SHIFT_O1_FAST) + B[2];
721
0
                R[z+3] = F[3] * (R[z+3]>>TF_SHIFT_O1_FAST) + B[3];
722
723
0
                if (!low_ent && (ptr < ptr_end)) {
724
                    // branchless & asm
725
0
                    RansDecRenorm(&R[z+0], &ptr);
726
0
                    RansDecRenorm(&R[z+1], &ptr);
727
0
                    RansDecRenorm(&R[z+2], &ptr);
728
0
                    RansDecRenorm(&R[z+3], &ptr);
729
0
                } else {
730
                    // branched, but better when predictable
731
0
                    RansDecRenormSafe(&R[z+0], &ptr, ptr_end+2*NX);
732
0
                    RansDecRenormSafe(&R[z+1], &ptr, ptr_end+2*NX);
733
0
                    RansDecRenormSafe(&R[z+2], &ptr, ptr_end+2*NX);
734
0
                    RansDecRenormSafe(&R[z+3], &ptr, ptr_end+2*NX);
735
0
                }
736
0
            }
737
0
        }
738
739
        // Remainder
740
0
        for (; i4[NX-1] < out_sz; i4[NX-1]++) {
741
0
            uint32_t S = s3[l[NX-1]][R[NX-1] & ((1u<<TF_SHIFT_O1_FAST)-1)];
742
0
            out[i4[NX-1]] = l[NX-1] = S&0xff;
743
0
            R[NX-1] = (S>>(TF_SHIFT_O1_FAST+8)) * (R[NX-1]>>TF_SHIFT_O1_FAST)
744
0
                + ((S>>8) & ((1u<<TF_SHIFT_O1_FAST)-1));
745
0
            RansDecRenormSafe(&R[NX-1], &ptr, ptr_end + 2*NX);
746
0
        }
747
0
    }
748
    //fprintf(stderr, "    1 Decoded %d bytes\n", (int)(ptr-in)); //c-size
749
750
0
    htscodecs_tls_free(sfb_);
751
0
    return out;
752
753
0
 err:
754
0
    htscodecs_tls_free(sfb_);
755
0
    free(out_free);
756
0
    free(c_freq);
757
758
0
    return NULL;
759
0
}