/src/htslib/htscodecs/htscodecs/rANS_static.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Copyright (c) 2014-2022 Genome Research Ltd. |
3 | | * Author(s): James Bonfield |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above copyright notice, |
9 | | * this list of conditions and the following disclaimer. |
10 | | * |
11 | | * 2. Redistributions in binary form must reproduce the above |
12 | | * copyright notice, this list of conditions and the following |
13 | | * disclaimer in the documentation and/or other materials provided |
14 | | * with the distribution. |
15 | | * |
16 | | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger |
17 | | * Institute nor the names of its contributors may be used to endorse |
18 | | * or promote products derived from this software without specific |
19 | | * prior written permission. |
20 | | * |
21 | | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS |
22 | | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
23 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
24 | | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH |
25 | | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
26 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
27 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
28 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
29 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
30 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
31 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #include "config.h" |
35 | | |
36 | | // Use 11 for order-1? |
37 | 8.34M | #define TF_SHIFT 12 |
38 | 10.3k | #define TOTFREQ (1<<TF_SHIFT) |
39 | | |
40 | | #include "rANS_byte.h" |
41 | | #include "utils.h" |
42 | | |
43 | | /*-------------------------------------------------------------------------- */ |
44 | | /* |
45 | | * Example wrapper to use the rans_byte.h functions included above. |
46 | | * |
47 | | * This demonstrates how to use, and unroll, an order-0 and order-1 frequency |
48 | | * model. |
49 | | */ |
50 | | |
51 | | #include <stdint.h> |
52 | | #include <stdlib.h> |
53 | | #include <stdio.h> |
54 | | #include <unistd.h> |
55 | | #include <assert.h> |
56 | | #include <string.h> |
57 | | #include <limits.h> |
58 | | #include <sys/time.h> |
59 | | #ifndef NO_THREADS |
60 | | #include <pthread.h> |
61 | | #endif |
62 | | |
63 | | #include "rANS_static.h" |
64 | | |
65 | | #define ABS(a) ((a)>0?(a):-(a)) |
66 | | |
67 | | /*----------------------------------------------------------------------------- |
68 | | * Memory to memory compression functions. |
69 | | * |
70 | | * These are original versions without any manual loop unrolling. They |
71 | | * are easier to understand, but can be up to 2x slower. |
72 | | */ |
73 | | |
74 | | static |
75 | | unsigned char *rans_compress_O0(unsigned char *in, unsigned int in_size, |
76 | 0 | unsigned int *out_size) { |
77 | 0 | unsigned char *out_buf = malloc(1.05*in_size + 257*257*3 + 9); |
78 | 0 | unsigned char *cp, *out_end; |
79 | 0 | RansEncSymbol syms[256]; |
80 | 0 | RansState rans0; |
81 | 0 | RansState rans2; |
82 | 0 | RansState rans1; |
83 | 0 | RansState rans3; |
84 | 0 | uint8_t* ptr; |
85 | 0 | int F[256+MAGIC] = {0}, i, j, tab_size, rle, x, fsum = 0; |
86 | 0 | int m = 0, M = 0; |
87 | 0 | uint64_t tr; |
88 | |
|
89 | 0 | if (!out_buf) |
90 | 0 | return NULL; |
91 | | |
92 | 0 | ptr = out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9; |
93 | | |
94 | | // Compute statistics |
95 | 0 | if (hist8(in, in_size, (uint32_t *)F) < 0) { |
96 | 0 | free(out_buf); |
97 | 0 | return NULL; |
98 | 0 | } |
99 | 0 | tr = in_size ? ((uint64_t)TOTFREQ<<31)/in_size + (1<<30)/in_size : 0; |
100 | |
|
101 | 0 | normalise_harder: |
102 | | // Normalise so T[i] == TOTFREQ |
103 | 0 | for (fsum = m = M = j = 0; j < 256; j++) { |
104 | 0 | if (!F[j]) |
105 | 0 | continue; |
106 | | |
107 | 0 | if (m < F[j]) |
108 | 0 | m = F[j], M = j; |
109 | |
|
110 | 0 | if ((F[j] = (F[j]*tr)>>31) == 0) |
111 | 0 | F[j] = 1; |
112 | 0 | fsum += F[j]; |
113 | 0 | } |
114 | |
|
115 | 0 | fsum++; |
116 | 0 | if (fsum < TOTFREQ) { |
117 | 0 | F[M] += TOTFREQ-fsum; |
118 | 0 | } else if (fsum-TOTFREQ > F[M]/2) { |
119 | | // Corner case to avoid excessive frequency reduction |
120 | 0 | tr = 2104533975; goto normalise_harder; // equiv to *0.98. |
121 | 0 | } else { |
122 | 0 | F[M] -= fsum-TOTFREQ; |
123 | 0 | } |
124 | | |
125 | | //printf("F[%d]=%d\n", M, F[M]); |
126 | 0 | assert(F[M]>0); |
127 | | |
128 | | // Encode statistics. |
129 | 0 | cp = out_buf+9; |
130 | |
|
131 | 0 | for (x = rle = j = 0; j < 256; j++) { |
132 | 0 | if (F[j]) { |
133 | | // j |
134 | 0 | if (rle) { |
135 | 0 | rle--; |
136 | 0 | } else { |
137 | 0 | *cp++ = j; |
138 | 0 | if (!rle && j && F[j-1]) { |
139 | 0 | for(rle=j+1; rle<256 && F[rle]; rle++) |
140 | 0 | ; |
141 | 0 | rle -= j+1; |
142 | 0 | *cp++ = rle; |
143 | 0 | } |
144 | | //fprintf(stderr, "%d: %d %d\n", j, rle, N[j]); |
145 | 0 | } |
146 | | |
147 | | // F[j] |
148 | 0 | if (F[j]<128) { |
149 | 0 | *cp++ = F[j]; |
150 | 0 | } else { |
151 | 0 | *cp++ = 128 | (F[j]>>8); |
152 | 0 | *cp++ = F[j]&0xff; |
153 | 0 | } |
154 | 0 | RansEncSymbolInit(&syms[j], x, F[j], TF_SHIFT); |
155 | 0 | x += F[j]; |
156 | 0 | } |
157 | 0 | } |
158 | 0 | *cp++ = 0; |
159 | | |
160 | | //write(2, out_buf+4, cp-(out_buf+4)); |
161 | 0 | tab_size = cp-out_buf; |
162 | |
|
163 | 0 | RansEncInit(&rans0); |
164 | 0 | RansEncInit(&rans1); |
165 | 0 | RansEncInit(&rans2); |
166 | 0 | RansEncInit(&rans3); |
167 | |
|
168 | 0 | switch (i=(in_size&3)) { |
169 | 0 | case 3: RansEncPutSymbol(&rans2, &ptr, &syms[in[in_size-(i-2)]]); |
170 | | // fall-through |
171 | 0 | case 2: RansEncPutSymbol(&rans1, &ptr, &syms[in[in_size-(i-1)]]); |
172 | | // fall-through |
173 | 0 | case 1: RansEncPutSymbol(&rans0, &ptr, &syms[in[in_size-(i-0)]]); |
174 | | // fall-through |
175 | 0 | case 0: |
176 | 0 | break; |
177 | 0 | } |
178 | 0 | for (i=(in_size &~3); likely(i>0); i-=4) { |
179 | 0 | RansEncSymbol *s3 = &syms[in[i-1]]; |
180 | 0 | RansEncSymbol *s2 = &syms[in[i-2]]; |
181 | 0 | RansEncSymbol *s1 = &syms[in[i-3]]; |
182 | 0 | RansEncSymbol *s0 = &syms[in[i-4]]; |
183 | |
|
184 | 0 | RansEncPutSymbol(&rans3, &ptr, s3); |
185 | 0 | RansEncPutSymbol(&rans2, &ptr, s2); |
186 | 0 | RansEncPutSymbol(&rans1, &ptr, s1); |
187 | 0 | RansEncPutSymbol(&rans0, &ptr, s0); |
188 | 0 | } |
189 | |
|
190 | 0 | RansEncFlush(&rans3, &ptr); |
191 | 0 | RansEncFlush(&rans2, &ptr); |
192 | 0 | RansEncFlush(&rans1, &ptr); |
193 | 0 | RansEncFlush(&rans0, &ptr); |
194 | | |
195 | | // Finalise block size and return it |
196 | 0 | *out_size = (out_end - ptr) + tab_size; |
197 | |
|
198 | 0 | cp = out_buf; |
199 | |
|
200 | 0 | *cp++ = 0; // order |
201 | 0 | *cp++ = ((*out_size-9)>> 0) & 0xff; |
202 | 0 | *cp++ = ((*out_size-9)>> 8) & 0xff; |
203 | 0 | *cp++ = ((*out_size-9)>>16) & 0xff; |
204 | 0 | *cp++ = ((*out_size-9)>>24) & 0xff; |
205 | |
|
206 | 0 | *cp++ = (in_size>> 0) & 0xff; |
207 | 0 | *cp++ = (in_size>> 8) & 0xff; |
208 | 0 | *cp++ = (in_size>>16) & 0xff; |
209 | 0 | *cp++ = (in_size>>24) & 0xff; |
210 | |
|
211 | 0 | memmove(out_buf + tab_size, ptr, out_end-ptr); |
212 | |
|
213 | 0 | return out_buf; |
214 | 0 | } |
215 | | |
216 | | typedef struct { |
217 | | unsigned char R[TOTFREQ]; |
218 | | } ari_decoder; |
219 | | |
220 | | static |
221 | | unsigned char *rans_uncompress_O0(unsigned char *in, unsigned int in_size, |
222 | 202 | unsigned int *out_size) { |
223 | | /* Load in the static tables */ |
224 | 202 | unsigned char *cp = in + 9; |
225 | 202 | unsigned char *cp_end = in + in_size; |
226 | 202 | const uint32_t mask = (1u << TF_SHIFT)-1; |
227 | 202 | int i, j, rle; |
228 | 202 | unsigned int x, y; |
229 | 202 | unsigned int out_sz, in_sz; |
230 | 202 | char *out_buf; |
231 | 202 | RansState R[4]; |
232 | 202 | RansState m[4]; |
233 | 202 | uint16_t sfreq[TOTFREQ+32]; |
234 | 202 | uint16_t ssym [TOTFREQ+32]; // faster, but only needs uint8_t |
235 | 202 | uint32_t sbase[TOTFREQ+16]; // faster, but only needs uint16_t |
236 | | |
237 | 202 | if (in_size < 26) // Need at least this many bytes just to start |
238 | 4 | return NULL; |
239 | | |
240 | 198 | if (*in++ != 0) // Order-0 check |
241 | 0 | return NULL; |
242 | | |
243 | 198 | in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); |
244 | 198 | out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); |
245 | 198 | if (in_sz != in_size-9) |
246 | 8 | return NULL; |
247 | | |
248 | 190 | if (out_sz >= INT_MAX) |
249 | 4 | return NULL; // protect against some overflow cases |
250 | | |
251 | | // For speeding up the fuzzer only. |
252 | | // Small input can lead to large uncompressed data. |
253 | | // We reject this as it just slows things up instead of testing more code |
254 | | // paths (once we've verified a few times for large data). |
255 | 186 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
256 | 186 | if (out_sz > 100000) |
257 | 8 | return NULL; |
258 | 178 | #endif |
259 | | |
260 | 178 | out_buf = malloc(out_sz); |
261 | 178 | if (!out_buf) |
262 | 0 | return NULL; |
263 | | |
264 | | //fprintf(stderr, "out_sz=%d\n", out_sz); |
265 | | |
266 | | // Precompute reverse lookup of frequency. |
267 | 178 | rle = x = y = 0; |
268 | 178 | j = *cp++; |
269 | 4.25k | do { |
270 | 4.25k | int F, C; |
271 | 4.25k | if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left |
272 | 4.24k | if ((F = *cp++) >= 128) { |
273 | 320 | F &= ~128; |
274 | 320 | F = ((F & 127) << 8) | *cp++; |
275 | 320 | } |
276 | 4.24k | C = x; |
277 | | |
278 | 4.24k | if (x + F > TOTFREQ) |
279 | 12 | goto cleanup; |
280 | | |
281 | 655k | for (y = 0; y < F; y++) { |
282 | 651k | ssym [y + C] = j; |
283 | 651k | sfreq[y + C] = F; |
284 | 651k | sbase[y + C] = y; |
285 | 651k | } |
286 | 4.23k | x += F; |
287 | | |
288 | 4.23k | if (!rle && j+1 == *cp) { |
289 | 197 | j = *cp++; |
290 | 197 | rle = *cp++; |
291 | 4.03k | } else if (rle) { |
292 | 2.26k | rle--; |
293 | 2.26k | j++; |
294 | 2.26k | if (j > 255) |
295 | 1 | goto cleanup; |
296 | 2.26k | } else { |
297 | 1.76k | j = *cp++; |
298 | 1.76k | } |
299 | 4.23k | } while(j); |
300 | | |
301 | 157 | if (x < TOTFREQ-1 || x > TOTFREQ) |
302 | 15 | goto cleanup; |
303 | 142 | if (x != TOTFREQ) { |
304 | | // Protection against accessing uninitialised memory in the case |
305 | | // where SUM(freqs) == 4095 and not 4096. |
306 | 46 | ssym [x] = ssym [x-1]; |
307 | 46 | sfreq[x] = sfreq[x-1]; |
308 | 46 | sbase[x] = sbase[x-1]+1; |
309 | 46 | } |
310 | | |
311 | | // 16 bytes of cp here. Also why cp - 16 in above loop. |
312 | 142 | if (cp > cp_end - 16) goto cleanup; // Not enough input bytes left |
313 | | |
314 | 141 | RansDecInit(&R[0], &cp); if (R[0] < RANS_BYTE_L) goto cleanup; |
315 | 133 | RansDecInit(&R[1], &cp); if (R[1] < RANS_BYTE_L) goto cleanup; |
316 | 132 | RansDecInit(&R[2], &cp); if (R[2] < RANS_BYTE_L) goto cleanup; |
317 | 127 | RansDecInit(&R[3], &cp); if (R[3] < RANS_BYTE_L) goto cleanup; |
318 | | |
319 | 125 | int out_end = (out_sz&~3); |
320 | 125 | cp_end -= 8; // within 8 for simplicity of loop below |
321 | | // 2 x likely() here harms gcc 7.5 by about 8% rate drop, but only in O2 |
322 | 951k | for (i=0; likely(i < out_end); i+=4) { |
323 | | // /curr code |
324 | | // gcc7 O2 513/497 562/556++ 556/547 ok |
325 | | // gcc7 O3 566/552 569/553 581/563+ |
326 | | // gcc10 O2 544/538 563/547 541/537-? |
327 | | // gcc10 O3 531/519 546/530 575/546+ |
328 | | // gcc11 O2 512/490 588/540 540/535 mid |
329 | | // gcc11 O3 482/471 553/541 549/535 |
330 | | // gcc12 O2 533/526 544/534 539/535 |
331 | | // gcc12 O3 548/533 502/497-- 553/527 ok |
332 | | // clang10 555/542 564/549 560/541 |
333 | | // clang13 560/553 572/559 556/559 |
334 | 950k | m[0] = R[0] & mask; |
335 | 950k | R[0] = sfreq[m[0]] * (R[0] >> TF_SHIFT) + sbase[m[0]]; |
336 | | |
337 | 950k | m[1] = R[1] & mask; |
338 | 950k | R[1] = sfreq[m[1]] * (R[1] >> TF_SHIFT) + sbase[m[1]]; |
339 | | |
340 | 950k | m[2] = R[2] & mask; |
341 | 950k | R[2] = sfreq[m[2]] * (R[2] >> TF_SHIFT) + sbase[m[2]]; |
342 | | |
343 | 950k | m[3] = R[3] & mask; |
344 | 950k | R[3] = sfreq[m[3]] * (R[3] >> TF_SHIFT) + sbase[m[3]]; |
345 | | |
346 | | // likely() here harms gcc12 -O3 |
347 | 950k | if (cp<cp_end) { |
348 | 33.5k | RansDecRenorm2(&R[0], &R[1], &cp); |
349 | 33.5k | RansDecRenorm2(&R[2], &R[3], &cp); |
350 | 917k | } else { |
351 | 917k | RansDecRenormSafe(&R[0], &cp, cp_end+8); |
352 | 917k | RansDecRenormSafe(&R[1], &cp, cp_end+8); |
353 | 917k | RansDecRenormSafe(&R[2], &cp, cp_end+8); |
354 | 917k | RansDecRenormSafe(&R[3], &cp, cp_end+8); |
355 | 917k | } |
356 | | |
357 | 950k | out_buf[i+0] = ssym[m[0]]; |
358 | 950k | out_buf[i+1] = ssym[m[1]]; |
359 | 950k | out_buf[i+2] = ssym[m[2]]; |
360 | 950k | out_buf[i+3] = ssym[m[3]]; |
361 | 950k | } |
362 | | |
363 | | |
364 | 125 | switch(out_sz&3) { |
365 | 43 | case 3: |
366 | 43 | out_buf[out_end + 2] = ssym[R[2] & mask]; |
367 | | // fall-through |
368 | 93 | case 2: |
369 | 93 | out_buf[out_end + 1] = ssym[R[1] & mask]; |
370 | | // fall-through |
371 | 104 | case 1: |
372 | 104 | out_buf[out_end] = ssym[R[0] & mask]; |
373 | | // fall-through |
374 | 125 | default: |
375 | 125 | break; |
376 | 125 | } |
377 | | |
378 | 125 | *out_size = out_sz; |
379 | 125 | return (unsigned char *)out_buf; |
380 | | |
381 | 53 | cleanup: |
382 | 53 | free(out_buf); |
383 | 53 | return NULL; |
384 | 125 | } |
385 | | |
386 | | static |
387 | | unsigned char *rans_compress_O1(unsigned char *in, unsigned int in_size, |
388 | 0 | unsigned int *out_size) { |
389 | 0 | unsigned char *out_buf = NULL, *out_end, *cp; |
390 | 0 | unsigned int tab_size, rle_i, rle_j; |
391 | | |
392 | |
|
393 | 0 | if (in_size < 4) |
394 | 0 | return rans_compress_O0(in, in_size, out_size); |
395 | | |
396 | 0 | int (*F)[256]; |
397 | 0 | RansEncSymbol (*syms)[256]; |
398 | |
|
399 | 0 | uint8_t *mem = htscodecs_tls_alloc(256 * (sizeof(*syms) + sizeof(*F))); |
400 | 0 | if (!mem) |
401 | 0 | return NULL; |
402 | 0 | syms = (RansEncSymbol (*)[256])mem; |
403 | 0 | F = (int (*)[256])(mem + 256*sizeof(*syms)); |
404 | 0 | memset(F, 0, 256*sizeof(*F)); |
405 | |
|
406 | 0 | if (!syms) goto cleanup; |
407 | 0 | int T[256+MAGIC] = {0}; |
408 | 0 | int i, j; |
409 | |
|
410 | 0 | out_buf = malloc(1.05*in_size + 257*257*3 + 9); |
411 | 0 | if (!out_buf) goto cleanup; |
412 | | |
413 | 0 | out_end = out_buf + (uint32_t)(1.05*in_size) + 257*257*3 + 9; |
414 | 0 | cp = out_buf+9; |
415 | |
|
416 | 0 | if (hist1_4(in, in_size, (uint32_t (*)[256])F, (uint32_t *)T) < 0) { |
417 | 0 | free(out_buf); |
418 | 0 | out_buf = NULL; |
419 | 0 | goto cleanup; |
420 | 0 | } |
421 | | |
422 | 0 | F[0][in[1*(in_size>>2)]]++; |
423 | 0 | F[0][in[2*(in_size>>2)]]++; |
424 | 0 | F[0][in[3*(in_size>>2)]]++; |
425 | 0 | T[0]+=3; |
426 | | |
427 | | |
428 | | // Normalise so T[i] == TOTFREQ |
429 | 0 | for (rle_i = i = 0; i < 256; i++) { |
430 | 0 | int t2, m, M; |
431 | 0 | unsigned int x; |
432 | |
|
433 | 0 | if (T[i] == 0) |
434 | 0 | continue; |
435 | | |
436 | | //uint64_t p = (TOTFREQ * TOTFREQ) / t; |
437 | 0 | double p = ((double)TOTFREQ)/T[i]; |
438 | 0 | normalise_harder: |
439 | 0 | for (t2 = m = M = j = 0; j < 256; j++) { |
440 | 0 | if (!F[i][j]) |
441 | 0 | continue; |
442 | | |
443 | 0 | if (m < F[i][j]) |
444 | 0 | m = F[i][j], M = j; |
445 | | |
446 | | //if ((F[i][j] = (F[i][j] * p) / TOTFREQ) == 0) |
447 | 0 | if ((F[i][j] *= p) == 0) |
448 | 0 | F[i][j] = 1; |
449 | 0 | t2 += F[i][j]; |
450 | 0 | } |
451 | |
|
452 | 0 | t2++; |
453 | 0 | if (t2 < TOTFREQ) { |
454 | 0 | F[i][M] += TOTFREQ-t2; |
455 | 0 | } else if (t2-TOTFREQ >= F[i][M]/2) { |
456 | | // Corner case to avoid excessive frequency reduction |
457 | 0 | p = .98; goto normalise_harder; |
458 | 0 | } else { |
459 | 0 | F[i][M] -= t2-TOTFREQ; |
460 | 0 | } |
461 | | |
462 | | // Store frequency table |
463 | | // i |
464 | 0 | if (rle_i) { |
465 | 0 | rle_i--; |
466 | 0 | } else { |
467 | 0 | *cp++ = i; |
468 | | // FIXME: could use order-0 statistics to observe which alphabet |
469 | | // symbols are present and base RLE on that ordering instead. |
470 | 0 | if (i && T[i-1]) { |
471 | 0 | for(rle_i=i+1; rle_i<256 && T[rle_i]; rle_i++) |
472 | 0 | ; |
473 | 0 | rle_i -= i+1; |
474 | 0 | *cp++ = rle_i; |
475 | 0 | } |
476 | 0 | } |
477 | |
|
478 | 0 | int *F_i_ = F[i]; |
479 | 0 | x = 0; |
480 | 0 | rle_j = 0; |
481 | 0 | for (j = 0; j < 256; j++) { |
482 | 0 | if (F_i_[j]) { |
483 | | //fprintf(stderr, "F[%d][%d]=%d, x=%d\n", i, j, F_i_[j], x); |
484 | | |
485 | | // j |
486 | 0 | if (rle_j) { |
487 | 0 | rle_j--; |
488 | 0 | } else { |
489 | 0 | *cp++ = j; |
490 | 0 | if (!rle_j && j && F_i_[j-1]) { |
491 | 0 | for(rle_j=j+1; rle_j<256 && F_i_[rle_j]; rle_j++) |
492 | 0 | ; |
493 | 0 | rle_j -= j+1; |
494 | 0 | *cp++ = rle_j; |
495 | 0 | } |
496 | 0 | } |
497 | | |
498 | | // F_i_[j] |
499 | 0 | if (F_i_[j]<128) { |
500 | 0 | *cp++ = F_i_[j]; |
501 | 0 | } else { |
502 | 0 | *cp++ = 128 | (F_i_[j]>>8); |
503 | 0 | *cp++ = F_i_[j]&0xff; |
504 | 0 | } |
505 | |
|
506 | 0 | RansEncSymbolInit(&syms[i][j], x, F_i_[j], TF_SHIFT); |
507 | 0 | x += F_i_[j]; |
508 | 0 | } |
509 | 0 | } |
510 | 0 | *cp++ = 0; |
511 | 0 | } |
512 | 0 | *cp++ = 0; |
513 | | |
514 | | //write(2, out_buf+4, cp-(out_buf+4)); |
515 | 0 | tab_size = cp - out_buf; |
516 | 0 | assert(tab_size < 257*257*3); |
517 | | |
518 | 0 | RansState rans0, rans1, rans2, rans3; |
519 | 0 | RansEncInit(&rans0); |
520 | 0 | RansEncInit(&rans1); |
521 | 0 | RansEncInit(&rans2); |
522 | 0 | RansEncInit(&rans3); |
523 | |
|
524 | 0 | uint8_t* ptr = out_end; |
525 | |
|
526 | 0 | int isz4 = in_size>>2; |
527 | 0 | int i0 = 1*isz4-2; |
528 | 0 | int i1 = 2*isz4-2; |
529 | 0 | int i2 = 3*isz4-2; |
530 | 0 | int i3 = 4*isz4-2; |
531 | |
|
532 | 0 | unsigned char l0 = in[i0+1]; |
533 | 0 | unsigned char l1 = in[i1+1]; |
534 | 0 | unsigned char l2 = in[i2+1]; |
535 | 0 | unsigned char l3 = in[i3+1]; |
536 | | |
537 | | // Deal with the remainder |
538 | 0 | l3 = in[in_size-1]; |
539 | 0 | for (i3 = in_size-2; i3 > 4*isz4-2; i3--) { |
540 | 0 | unsigned char c3 = in[i3]; |
541 | 0 | RansEncPutSymbol(&rans3, &ptr, &syms[c3][l3]); |
542 | 0 | l3 = c3; |
543 | 0 | } |
544 | |
|
545 | 0 | for (; likely(i0 >= 0); i0--, i1--, i2--, i3--) { |
546 | 0 | unsigned char c3 = in[i3]; |
547 | 0 | unsigned char c2 = in[i2]; |
548 | 0 | unsigned char c1 = in[i1]; |
549 | 0 | unsigned char c0 = in[i0]; |
550 | |
|
551 | 0 | RansEncSymbol *s3 = &syms[c3][l3]; |
552 | 0 | RansEncSymbol *s2 = &syms[c2][l2]; |
553 | 0 | RansEncSymbol *s1 = &syms[c1][l1]; |
554 | 0 | RansEncSymbol *s0 = &syms[c0][l0]; |
555 | |
|
556 | 0 | RansEncPutSymbol4(&rans3, &rans2, &rans1, &rans0, &ptr, |
557 | 0 | s3, s2, s1, s0); |
558 | |
|
559 | 0 | l3 = c3; |
560 | 0 | l2 = c2; |
561 | 0 | l1 = c1; |
562 | 0 | l0 = c0; |
563 | 0 | } |
564 | |
|
565 | 0 | RansEncPutSymbol(&rans3, &ptr, &syms[0][l3]); |
566 | 0 | RansEncPutSymbol(&rans2, &ptr, &syms[0][l2]); |
567 | 0 | RansEncPutSymbol(&rans1, &ptr, &syms[0][l1]); |
568 | 0 | RansEncPutSymbol(&rans0, &ptr, &syms[0][l0]); |
569 | |
|
570 | 0 | RansEncFlush(&rans3, &ptr); |
571 | 0 | RansEncFlush(&rans2, &ptr); |
572 | 0 | RansEncFlush(&rans1, &ptr); |
573 | 0 | RansEncFlush(&rans0, &ptr); |
574 | |
|
575 | 0 | *out_size = (out_end - ptr) + tab_size; |
576 | |
|
577 | 0 | cp = out_buf; |
578 | 0 | *cp++ = 1; // order |
579 | |
|
580 | 0 | *cp++ = ((*out_size-9)>> 0) & 0xff; |
581 | 0 | *cp++ = ((*out_size-9)>> 8) & 0xff; |
582 | 0 | *cp++ = ((*out_size-9)>>16) & 0xff; |
583 | 0 | *cp++ = ((*out_size-9)>>24) & 0xff; |
584 | |
|
585 | 0 | *cp++ = (in_size>> 0) & 0xff; |
586 | 0 | *cp++ = (in_size>> 8) & 0xff; |
587 | 0 | *cp++ = (in_size>>16) & 0xff; |
588 | 0 | *cp++ = (in_size>>24) & 0xff; |
589 | |
|
590 | 0 | memmove(out_buf + tab_size, ptr, out_end-ptr); |
591 | |
|
592 | 0 | cleanup: |
593 | 0 | htscodecs_tls_free(syms); |
594 | |
|
595 | 0 | return out_buf; |
596 | 0 | } |
597 | | |
598 | | static |
599 | | unsigned char *rans_uncompress_O1(unsigned char *in, unsigned int in_size, |
600 | 224 | unsigned int *out_size) { |
601 | | /* Load in the static tables */ |
602 | 224 | unsigned char *cp = in + 9; |
603 | 224 | unsigned char *ptr_end = in + in_size; |
604 | 224 | int i, j = -999, rle_i, rle_j; |
605 | 224 | unsigned int x; |
606 | 224 | unsigned int out_sz, in_sz; |
607 | 224 | char *out_buf = NULL; |
608 | | |
609 | | // Sanity checking |
610 | 224 | if (in_size < 27) // Need at least this many bytes to start |
611 | 3 | return NULL; |
612 | | |
613 | 221 | if (*in++ != 1) // Order-1 check |
614 | 4 | return NULL; |
615 | | |
616 | 217 | in_sz = ((in[0])<<0) | ((in[1])<<8) | ((in[2])<<16) | (((uint32_t)in[3])<<24); |
617 | 217 | out_sz = ((in[4])<<0) | ((in[5])<<8) | ((in[6])<<16) | (((uint32_t)in[7])<<24); |
618 | 217 | if (in_sz != in_size-9) |
619 | 17 | return NULL; |
620 | | |
621 | 200 | if (out_sz >= INT_MAX) |
622 | 4 | return NULL; // protect against some overflow cases |
623 | | |
624 | | // For speeding up the fuzzer only. |
625 | | // Small input can lead to large uncompressed data. |
626 | | // We reject this as it just slows things up instead of testing more code |
627 | | // paths (once we've verified a few times for large data). |
628 | 196 | #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION |
629 | 196 | if (out_sz > 100000) |
630 | 8 | return NULL; |
631 | 188 | #endif |
632 | | |
633 | | // Allocate decoding lookup tables |
634 | 188 | RansDecSymbol32 (*syms)[256]; |
635 | 188 | uint8_t *mem = htscodecs_tls_calloc(256, sizeof(ari_decoder) |
636 | 188 | + sizeof(*syms)); |
637 | 188 | if (!mem) |
638 | 0 | return NULL; |
639 | 188 | ari_decoder *const D = (ari_decoder *)mem; |
640 | 188 | syms = (RansDecSymbol32 (*)[256])(mem + 256*sizeof(ari_decoder)); |
641 | 188 | int16_t map[256], map_i = 0; |
642 | | |
643 | 188 | memset(map, -1, 256*sizeof(*map)); |
644 | | |
645 | 188 | if (!D) goto cleanup; |
646 | | /* These memsets prevent illegal memory access in syms due to |
647 | | broken compressed data. As D is calloc'd, all illegal transitions |
648 | | will end up in either row or column 0 of syms. */ |
649 | 188 | memset(&syms[0], 0, sizeof(syms[0])); |
650 | 48.3k | for (i = 0; i < 256; i++) |
651 | 48.1k | memset(&syms[i][0], 0, sizeof(syms[0][0])); |
652 | | |
653 | | //fprintf(stderr, "out_sz=%d\n", out_sz); |
654 | | |
655 | | //i = *cp++; |
656 | 188 | rle_i = 0; |
657 | 188 | i = *cp++; |
658 | 569 | do { |
659 | | // Map arbitrary a,b,c to 0,1,2 to improve cache locality. |
660 | 569 | if (map[i] == -1) |
661 | 546 | map[i] = map_i++; |
662 | 569 | int m_i = map[i]; |
663 | | |
664 | 569 | rle_j = x = 0; |
665 | 569 | j = *cp++; |
666 | 3.71k | do { |
667 | 3.71k | if (map[j] == -1) |
668 | 1.94k | map[j] = map_i++; |
669 | | |
670 | 3.71k | int F, C; |
671 | 3.71k | if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left |
672 | 3.70k | if ((F = *cp++) >= 128) { |
673 | 355 | F &= ~128; |
674 | 355 | F = ((F & 127) << 8) | *cp++; |
675 | 355 | } |
676 | 3.70k | C = x; |
677 | | |
678 | | //fprintf(stderr, "i=%d j=%d F=%d C=%d\n", i, j, F, C); |
679 | | |
680 | 3.70k | if (unlikely(!F)) |
681 | 449 | F = TOTFREQ; |
682 | | |
683 | 3.70k | RansDecSymbolInit32(&syms[m_i][j], C, F); |
684 | | |
685 | | /* Build reverse lookup table */ |
686 | | //if (!D[i].R) D[i].R = (unsigned char *)malloc(TOTFREQ); |
687 | 3.70k | if (x + F > TOTFREQ) |
688 | 34 | goto cleanup; |
689 | | |
690 | 3.66k | memset(&D[m_i].R[x], j, F); |
691 | 3.66k | x += F; |
692 | | |
693 | 3.66k | if (!rle_j && j+1 == *cp) { |
694 | 126 | j = *cp++; |
695 | 126 | rle_j = *cp++; |
696 | 3.54k | } else if (rle_j) { |
697 | 789 | rle_j--; |
698 | 789 | j++; |
699 | 789 | if (j > 255) |
700 | 1 | goto cleanup; |
701 | 2.75k | } else { |
702 | 2.75k | j = *cp++; |
703 | 2.75k | } |
704 | 3.66k | } while(j); |
705 | | |
706 | 524 | if (x < TOTFREQ-1 || x > TOTFREQ) |
707 | 5 | goto cleanup; |
708 | 519 | if (x < TOTFREQ) // historically we fill 4095, not 4096 |
709 | 18 | D[i].R[x] = D[i].R[x-1]; |
710 | | |
711 | 519 | if (!rle_i && i+1 == *cp) { |
712 | 17 | i = *cp++; |
713 | 17 | rle_i = *cp++; |
714 | 502 | } else if (rle_i) { |
715 | 336 | rle_i--; |
716 | 336 | i++; |
717 | 336 | if (i > 255) |
718 | 0 | goto cleanup; |
719 | 336 | } else { |
720 | 166 | i = *cp++; |
721 | 166 | } |
722 | 519 | } while (i); |
723 | 35.4k | for (i = 0; i < 256; i++) |
724 | 35.3k | if (map[i] == -1) |
725 | 33.9k | map[i] = 0; |
726 | | |
727 | 138 | RansState rans0, rans1, rans2, rans3; |
728 | 138 | uint8_t *ptr = cp; |
729 | 138 | if (cp > ptr_end - 16) goto cleanup; // Not enough input bytes left |
730 | 137 | RansDecInit(&rans0, &ptr); if (rans0 < RANS_BYTE_L) goto cleanup; |
731 | 133 | RansDecInit(&rans1, &ptr); if (rans1 < RANS_BYTE_L) goto cleanup; |
732 | 126 | RansDecInit(&rans2, &ptr); if (rans2 < RANS_BYTE_L) goto cleanup; |
733 | 125 | RansDecInit(&rans3, &ptr); if (rans3 < RANS_BYTE_L) goto cleanup; |
734 | | |
735 | 120 | RansState R[4]; |
736 | 120 | R[0] = rans0; |
737 | 120 | R[1] = rans1; |
738 | 120 | R[2] = rans2; |
739 | 120 | R[3] = rans3; |
740 | | |
741 | 120 | unsigned int isz4 = out_sz>>2; |
742 | 120 | uint32_t l0 = 0; |
743 | 120 | uint32_t l1 = 0; |
744 | 120 | uint32_t l2 = 0; |
745 | 120 | uint32_t l3 = 0; |
746 | | |
747 | 120 | unsigned int i4[] = {0*isz4, 1*isz4, 2*isz4, 3*isz4}; |
748 | | |
749 | | /* Allocate output buffer */ |
750 | 120 | out_buf = malloc(out_sz); |
751 | 120 | if (!out_buf) goto cleanup; |
752 | | |
753 | 120 | uint8_t cc0 = D[map[l0]].R[R[0] & ((1u << TF_SHIFT)-1)]; |
754 | 120 | uint8_t cc1 = D[map[l1]].R[R[1] & ((1u << TF_SHIFT)-1)]; |
755 | 120 | uint8_t cc2 = D[map[l2]].R[R[2] & ((1u << TF_SHIFT)-1)]; |
756 | 120 | uint8_t cc3 = D[map[l3]].R[R[3] & ((1u << TF_SHIFT)-1)]; |
757 | | |
758 | 120 | ptr_end -= 8; |
759 | 565k | for (; likely(i4[0] < isz4); i4[0]++, i4[1]++, i4[2]++, i4[3]++) { |
760 | | // seq4-head2: file q40b |
761 | | // O3 O2 |
762 | | // gcc7 296/291 290/260 |
763 | | // gcc10 292/292 290/261 |
764 | | // gcc11 293/293 290/265 |
765 | | // gcc12 293/290 291/266 |
766 | | // clang10 293/290 296/272 |
767 | | // clang13 300/290 290/266 |
768 | 565k | out_buf[i4[0]] = cc0; |
769 | 565k | out_buf[i4[1]] = cc1; |
770 | 565k | out_buf[i4[2]] = cc2; |
771 | 565k | out_buf[i4[3]] = cc3; |
772 | | |
773 | 565k | RansDecSymbol32 s[4] = { |
774 | 565k | syms[l0][cc0], |
775 | 565k | syms[l1][cc1], |
776 | 565k | syms[l2][cc2], |
777 | 565k | syms[l3][cc3], |
778 | 565k | }; |
779 | 565k | RansDecAdvanceStep(&R[0], s[0].start, s[0].freq, TF_SHIFT); |
780 | 565k | RansDecAdvanceStep(&R[1], s[1].start, s[1].freq, TF_SHIFT); |
781 | 565k | RansDecAdvanceStep(&R[2], s[2].start, s[2].freq, TF_SHIFT); |
782 | 565k | RansDecAdvanceStep(&R[3], s[3].start, s[3].freq, TF_SHIFT); |
783 | | |
784 | | // Likely here helps speed of high-entropy data by 10-11%, |
785 | | // but harms low entropy-data speed by 3-4%. |
786 | 565k | if ((ptr < ptr_end)) { |
787 | 24.0k | RansDecRenorm2(&R[0], &R[1], &ptr); |
788 | 24.0k | RansDecRenorm2(&R[2], &R[3], &ptr); |
789 | 541k | } else { |
790 | 541k | RansDecRenormSafe(&R[0], &ptr, ptr_end+8); |
791 | 541k | RansDecRenormSafe(&R[1], &ptr, ptr_end+8); |
792 | 541k | RansDecRenormSafe(&R[2], &ptr, ptr_end+8); |
793 | 541k | RansDecRenormSafe(&R[3], &ptr, ptr_end+8); |
794 | 541k | } |
795 | | |
796 | 565k | l0 = map[cc0]; |
797 | 565k | l1 = map[cc1]; |
798 | 565k | l2 = map[cc2]; |
799 | 565k | l3 = map[cc3]; |
800 | | |
801 | 565k | cc0 = D[l0].R[R[0] & ((1u << TF_SHIFT)-1)]; |
802 | 565k | cc1 = D[l1].R[R[1] & ((1u << TF_SHIFT)-1)]; |
803 | 565k | cc2 = D[l2].R[R[2] & ((1u << TF_SHIFT)-1)]; |
804 | 565k | cc3 = D[l3].R[R[3] & ((1u << TF_SHIFT)-1)]; |
805 | 565k | } |
806 | | |
807 | | // Remainder |
808 | 248 | for (; i4[3] < out_sz; i4[3]++) { |
809 | 128 | unsigned char c3 = D[l3].R[RansDecGet(&R[3], TF_SHIFT)]; |
810 | 128 | out_buf[i4[3]] = c3; |
811 | | |
812 | 128 | uint32_t m = R[3] & ((1u << TF_SHIFT)-1); |
813 | 128 | R[3] = syms[l3][c3].freq * (R[3]>>TF_SHIFT) + m - syms[l3][c3].start; |
814 | 128 | RansDecRenormSafe(&R[3], &ptr, ptr_end+8); |
815 | 128 | l3 = map[c3]; |
816 | 128 | } |
817 | | |
818 | 120 | *out_size = out_sz; |
819 | | |
820 | 188 | cleanup: |
821 | 188 | htscodecs_tls_free(D); |
822 | | |
823 | 188 | return (unsigned char *)out_buf; |
824 | 120 | } |
825 | | |
826 | | /*----------------------------------------------------------------------------- |
827 | | * Simple interface to the order-0 vs order-1 encoders and decoders. |
828 | | */ |
829 | | unsigned char *rans_compress(unsigned char *in, unsigned int in_size, |
830 | 0 | unsigned int *out_size, int order) { |
831 | 0 | if (in_size > INT_MAX) { |
832 | 0 | *out_size = 0; |
833 | 0 | return NULL; |
834 | 0 | } |
835 | | |
836 | 0 | return order |
837 | 0 | ? rans_compress_O1(in, in_size, out_size) |
838 | 0 | : rans_compress_O0(in, in_size, out_size); |
839 | 0 | } |
840 | | |
841 | | unsigned char *rans_uncompress(unsigned char *in, unsigned int in_size, |
842 | 427 | unsigned int *out_size) { |
843 | | /* Both rans_uncompress functions need to be able to read at least 9 |
844 | | bytes. */ |
845 | 427 | if (in_size < 9) |
846 | 1 | return NULL; |
847 | 426 | return in[0] |
848 | 426 | ? rans_uncompress_O1(in, in_size, out_size) |
849 | 426 | : rans_uncompress_O0(in, in_size, out_size); |
850 | 427 | } |