/src/htslib/htscodecs/htscodecs/pack.c
Line | Count | Source |
1 | | /* |
2 | | * Copyright (c) 2019-2020, 2022 Genome Research Ltd. |
3 | | * Author(s): James Bonfield |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions are met: |
7 | | * |
8 | | * 1. Redistributions of source code must retain the above copyright notice, |
9 | | * this list of conditions and the following disclaimer. |
10 | | * |
11 | | * 2. Redistributions in binary form must reproduce the above |
12 | | * copyright notice, this list of conditions and the following |
13 | | * disclaimer in the documentation and/or other materials provided |
14 | | * with the distribution. |
15 | | * |
16 | | * 3. Neither the names Genome Research Ltd and Wellcome Trust Sanger |
17 | | * Institute nor the names of its contributors may be used to endorse |
18 | | * or promote products derived from this software without specific |
19 | | * prior written permission. |
20 | | * |
21 | | * THIS SOFTWARE IS PROVIDED BY GENOME RESEARCH LTD AND CONTRIBUTORS "AS |
22 | | * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED |
23 | | * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A |
24 | | * PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL GENOME RESEARCH |
25 | | * LTD OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
26 | | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
27 | | * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
28 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
29 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
30 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE |
31 | | * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
32 | | */ |
33 | | |
34 | | #include "config.h" |
35 | | |
36 | | #include <stdint.h> |
37 | | #include <string.h> |
38 | | #include <stdlib.h> |
39 | | #include <stdio.h> |
40 | | |
41 | | #include "pack.h" |
42 | | |
43 | | //----------------------------------------------------------------------------- |
44 | | |
45 | | /* |
46 | | * Packs multiple symbols into a single byte if the total alphabet of symbols |
47 | | * used is <= 16. Each new symbol takes up 1, 2, 4 or 8 bits, or 0 if the |
48 | | * alphabet used is 1 (constant). |
49 | | * |
50 | | * If successful, out_meta/out_meta_len are set to hold the mapping table |
51 | | * to be used during decompression. |
52 | | * |
53 | | * Returns the packed buffer on success with new length in out_len, |
54 | | * NULL of failure |
55 | | */ |
56 | | uint8_t *hts_pack(uint8_t *data, int64_t len, |
57 | 104k | uint8_t *out_meta, int *out_meta_len, uint64_t *out_len) { |
58 | 104k | int p[256] = {0}, n; |
59 | 104k | uint64_t i, j; |
60 | | |
61 | | // count syms |
62 | 2.31G | for (i = 0; i < len; i++) |
63 | 2.31G | p[data[i]]=1; |
64 | | |
65 | 26.8M | for (i = n = 0; i < 256; i++) { |
66 | 26.7M | if (p[i]) { |
67 | 351k | p[i] = n++; // p[i] is now the code number |
68 | 351k | out_meta[n] = i; |
69 | 351k | } |
70 | 26.7M | } |
71 | 104k | out_meta[0] = n; // 256 wraps to 0 |
72 | 104k | j = n+1; |
73 | | |
74 | | // 1 value per byte |
75 | 104k | if (n > 16) |
76 | 3.11k | return NULL; |
77 | | |
78 | 101k | uint8_t *out = malloc(len+1); |
79 | 101k | if (!out) |
80 | 0 | return NULL; |
81 | | |
82 | | // Work out how many values per byte to encode. |
83 | 101k | int val_per_byte; |
84 | 101k | if (n > 4) |
85 | 11.0k | val_per_byte = 2; |
86 | 90.4k | else if (n > 2) |
87 | 19.4k | val_per_byte = 4; |
88 | 71.0k | else if (n > 1) |
89 | 23.3k | val_per_byte = 8; |
90 | 47.6k | else |
91 | 47.6k | val_per_byte = 0; // infinite |
92 | | |
93 | 101k | *out_meta_len = j; |
94 | 101k | j = 0; |
95 | | |
96 | 101k | switch (val_per_byte) { |
97 | 11.0k | case 2: |
98 | 193M | for (i = 0; i < (len & ~1); i+=2) |
99 | 193M | out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<4); |
100 | 11.0k | switch (len-i) { |
101 | 4.46k | case 1: out[j++] = p[data[i]]; |
102 | 11.0k | } |
103 | 11.0k | *out_len = j; |
104 | 11.0k | return out; |
105 | | |
106 | 19.4k | case 4: { |
107 | 15.9M | for (i = 0; i < (len & ~3); i+=4) |
108 | 15.9M | out[j++] = (p[data[i]]<<0) | (p[data[i+1]]<<2) | (p[data[i+2]]<<4) | (p[data[i+3]]<<6); |
109 | 19.4k | out[j] = 0; |
110 | 19.4k | int s = len-i, x = 0; |
111 | 19.4k | switch (s) { |
112 | 5.08k | case 3: out[j] |= p[data[i++]] << x; x+=2; // fall-through |
113 | 9.14k | case 2: out[j] |= p[data[i++]] << x; x+=2; // fall-through |
114 | 12.3k | case 1: out[j] |= p[data[i++]] << x; x+=2; |
115 | 12.3k | j++; |
116 | 19.4k | } |
117 | 19.4k | *out_len = j; |
118 | 19.4k | return out; |
119 | 19.4k | } |
120 | | |
121 | 23.3k | case 8: { |
122 | 8.98M | for (i = 0; i < (len & ~7); i+=8) |
123 | 8.96M | out[j++] = (p[data[i+0]]<<0) | (p[data[i+1]]<<1) | (p[data[i+2]]<<2) | (p[data[i+3]]<<3) |
124 | 8.96M | | (p[data[i+4]]<<4) | (p[data[i+5]]<<5) | (p[data[i+6]]<<6) | (p[data[i+7]]<<7); |
125 | 23.3k | out[j] = 0; |
126 | 23.3k | int s = len-i, x = 0; |
127 | 23.3k | switch (s) { |
128 | 1.84k | case 7: out[j] |= p[data[i++]] << x++; // fall-through |
129 | 3.99k | case 6: out[j] |= p[data[i++]] << x++; // fall-through |
130 | 5.44k | case 5: out[j] |= p[data[i++]] << x++; // fall-through |
131 | 11.4k | case 4: out[j] |= p[data[i++]] << x++; // fall-through |
132 | 13.0k | case 3: out[j] |= p[data[i++]] << x++; // fall-through |
133 | 18.4k | case 2: out[j] |= p[data[i++]] << x++; // fall-through |
134 | 19.5k | case 1: out[j] |= p[data[i++]] << x++; |
135 | 19.5k | j++; |
136 | 23.3k | } |
137 | 23.3k | *out_len = j; |
138 | 23.3k | return out; |
139 | 23.3k | } |
140 | | |
141 | 47.6k | case 0: |
142 | 47.6k | *out_len = j; |
143 | 47.6k | return out; |
144 | 101k | } |
145 | | |
146 | 0 | return NULL; |
147 | 101k | } |
148 | | |
149 | | |
150 | | /* |
151 | | * Unpacks the meta-data portions of the hts_pack algorithm. |
152 | | * This consists of the count of symbols and their values. |
153 | | * |
154 | | * The "map" array is filled out with the used symbols. |
155 | | * "nsym" is set to contain the number of symbols per byte; |
156 | | * 0, 1, 2, 4 or 8. |
157 | | * |
158 | | * Returns number of bytes of data[] consumed on success, |
159 | | * zero on failure. |
160 | | */ |
161 | | uint8_t hts_unpack_meta(uint8_t *data, uint32_t data_len, |
162 | 1.56k | uint64_t udata_len, uint8_t *map, int *nsym) { |
163 | 1.56k | if (data_len == 0) |
164 | 3 | return 0; |
165 | | |
166 | | // Number of symbols used |
167 | 1.56k | unsigned int n = data[0]; |
168 | 1.56k | if (n == 0) |
169 | 239 | n = 256; |
170 | | |
171 | | // Symbols per byte |
172 | 1.56k | if (n <= 1) |
173 | 167 | *nsym = 0; |
174 | 1.39k | else if (n <= 2) |
175 | 189 | *nsym = 8; |
176 | 1.20k | else if (n <= 4) |
177 | 4 | *nsym = 4; |
178 | 1.20k | else if (n <= 16) |
179 | 113 | *nsym = 2; |
180 | 1.08k | else { |
181 | 1.08k | *nsym = 1; // no packing |
182 | 1.08k | return 1; |
183 | 1.08k | } |
184 | | |
185 | 473 | if (data_len <= 1) |
186 | 0 | return 0; |
187 | | |
188 | 473 | int j = 1, c = 0; |
189 | 1.65k | do { |
190 | 1.65k | map[c++] = data[j++]; |
191 | 1.65k | } while (c < n && j < data_len); |
192 | | |
193 | 473 | return c < n ? 0 : j; |
194 | 473 | } |
195 | | |
196 | | /* |
197 | | * Unpacks a packed data steam (given the unpacked meta-data). |
198 | | * |
199 | | * "map" is the pack map, mapping 0->n to the expanded symbols. |
200 | | * The "out" buffer must be preallocated by the caller to be the correct |
201 | | * size. For error checking purposes, out_len is set to the size of |
202 | | * this buffer. |
203 | | * |
204 | | * Returns uncompressed data (out) on success, |
205 | | * NULL on failure. |
206 | | */ |
207 | 1.52k | uint8_t *hts_unpack(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { |
208 | | //uint8_t *out; |
209 | 1.52k | uint8_t c = 0; |
210 | 1.52k | int64_t i, j = 0, olen; |
211 | | |
212 | 1.52k | if (nsym == 1) { |
213 | | // raw data; FIXME: shortcut the need for malloc & memcpy here |
214 | 1.06k | memcpy(out, data, len); |
215 | 1.06k | return out; |
216 | 1.06k | } |
217 | | |
218 | 462 | switch(nsym) { |
219 | 189 | case 8: { |
220 | 189 | union { |
221 | 189 | uint64_t w; |
222 | 189 | uint8_t c[8]; |
223 | 189 | } map[256]; |
224 | 189 | int x; |
225 | 48.5k | for (x = 0; x < 256; x++) { |
226 | 48.3k | map[x].c[0] = p[x>>0&1]; |
227 | 48.3k | map[x].c[1] = p[x>>1&1]; |
228 | 48.3k | map[x].c[2] = p[x>>2&1]; |
229 | 48.3k | map[x].c[3] = p[x>>3&1]; |
230 | 48.3k | map[x].c[4] = p[x>>4&1]; |
231 | 48.3k | map[x].c[5] = p[x>>5&1]; |
232 | 48.3k | map[x].c[6] = p[x>>6&1]; |
233 | 48.3k | map[x].c[7] = p[x>>7&1]; |
234 | 48.3k | } |
235 | 189 | if ((out_len+7)/8 > len) |
236 | 0 | return NULL; |
237 | 189 | olen = out_len & ~7; |
238 | | |
239 | 303 | for (i = 0; i < olen; i+=8) |
240 | 114 | memcpy(&out[i], &map[data[j++]].w, 8); |
241 | | |
242 | 189 | if (out_len != olen) { |
243 | 12 | c = data[j++]; |
244 | 84 | while (i < out_len) { |
245 | 72 | out[i++] = p[c & 1]; |
246 | 72 | c >>= 1; |
247 | 72 | } |
248 | 12 | } |
249 | 189 | break; |
250 | 189 | } |
251 | | |
252 | 4 | case 4: { |
253 | 4 | union { |
254 | 4 | uint32_t w; |
255 | 4 | uint8_t c[4]; |
256 | 4 | } map[256]; |
257 | | |
258 | 4 | int x, y, z, _, P=0; |
259 | 20 | for (x = 0; x < 4; x++) |
260 | 80 | for (y = 0; y < 4; y++) |
261 | 320 | for (z = 0; z < 4; z++) |
262 | 1.28k | for (_ = 0; _ < 4; _++, P++) { |
263 | 1.02k | map[P].c[0] = p[_]; |
264 | 1.02k | map[P].c[1] = p[z]; |
265 | 1.02k | map[P].c[2] = p[y]; |
266 | 1.02k | map[P].c[3] = p[x]; |
267 | 1.02k | } |
268 | | |
269 | 4 | if ((out_len+3)/4 > len) |
270 | 0 | return NULL; |
271 | 4 | olen = out_len & ~3; |
272 | | |
273 | 4 | for (i = 0; i < olen-12; i+=16) { |
274 | 0 | uint32_t w[] = { |
275 | 0 | map[data[j+0]].w, |
276 | 0 | map[data[j+1]].w, |
277 | 0 | map[data[j+2]].w, |
278 | 0 | map[data[j+3]].w |
279 | 0 | }; |
280 | 0 | j += 4; |
281 | 0 | memcpy(&out[i], &w, 16); |
282 | 0 | } |
283 | | |
284 | 10 | for (; i < olen; i+=4) |
285 | 6 | memcpy(&out[i], &map[data[j++]].w, 4); |
286 | | |
287 | 4 | if (out_len != olen) { |
288 | 4 | c = data[j++]; |
289 | 16 | while (i < out_len) { |
290 | 12 | out[i++] = p[c & 3]; |
291 | 12 | c >>= 2; |
292 | 12 | } |
293 | 4 | } |
294 | 4 | break; |
295 | 4 | } |
296 | | |
297 | 110 | case 2: { |
298 | 110 | union { |
299 | 110 | uint16_t w; |
300 | 110 | uint8_t c[2]; |
301 | 110 | } map[256]; |
302 | | |
303 | 110 | int x, y; |
304 | 1.87k | for (x = 0; x < 16; x++) { |
305 | 29.9k | for (y = 0; y < 16; y++) { |
306 | 28.1k | map[x*16+y].c[0] = p[y]; |
307 | 28.1k | map[x*16+y].c[1] = p[x]; |
308 | 28.1k | } |
309 | 1.76k | } |
310 | | |
311 | 110 | if ((out_len+1)/2 > len) |
312 | 5 | return NULL; |
313 | 105 | olen = out_len & ~1; |
314 | | |
315 | 825 | for (i = j = 0; i+2 < olen; i+=4) { |
316 | 720 | uint16_t w[] = { |
317 | 720 | map[data[j+0]].w, |
318 | 720 | map[data[j+1]].w |
319 | 720 | }; |
320 | 720 | memcpy(&out[i], &w, 4); |
321 | | |
322 | 720 | j += 2; |
323 | 720 | } |
324 | | |
325 | 129 | for (; i < olen; i+=2) |
326 | 24 | memcpy(&out[i], &map[data[j++]].w, 2); |
327 | | |
328 | 105 | if (out_len != olen) { |
329 | 24 | c = data[j++]; |
330 | 24 | out[i+0] = p[c&15]; |
331 | 24 | } |
332 | 105 | break; |
333 | 110 | } |
334 | | |
335 | 159 | case 0: |
336 | 159 | memset(out, p[0], out_len); |
337 | 159 | break; |
338 | | |
339 | 0 | default: |
340 | 0 | return NULL; |
341 | 462 | } |
342 | | |
343 | 457 | return out; |
344 | 462 | } |
345 | | |
346 | | |
347 | 0 | uint8_t *hts_unpack_(uint8_t *data, int64_t len, uint8_t *out, uint64_t out_len, int nsym, uint8_t *p) { |
348 | | //uint8_t *out; |
349 | 0 | uint8_t c = 0; |
350 | 0 | int64_t i, j = 0, olen; |
351 | |
|
352 | 0 | if (nsym == 1) { |
353 | | // raw data; FIXME: shortcut the need for malloc & memcpy here |
354 | 0 | memcpy(out, data, len); |
355 | 0 | return out; |
356 | 0 | } |
357 | | |
358 | 0 | switch(nsym) { |
359 | 0 | case 2: { |
360 | 0 | uint16_t map[256], x, y; |
361 | 0 | for (x = 0; x < 16; x++) |
362 | 0 | for (y = 0; y < 16; y++) |
363 | 0 | map[x*16+y] = p[x]*256+p[y]; |
364 | |
|
365 | 0 | if ((out_len+1)/2 > len) |
366 | 0 | return NULL; |
367 | 0 | olen = out_len & ~1; |
368 | |
|
369 | 0 | uint16_t *o16 = (uint16_t *)out; |
370 | 0 | for (i = 0; i+4 < olen/2; i+=4) { |
371 | 0 | int k; |
372 | 0 | for (k = 0; k < 4; k++) |
373 | 0 | o16[i+k] = map[data[i+k]]; |
374 | 0 | } |
375 | 0 | j = i; i *= 2; |
376 | |
|
377 | 0 | for (; i < olen; i+=2) { |
378 | 0 | uint16_t w1 = map[data[j++]]; |
379 | 0 | *(uint16_t *)&out[i] = w1; |
380 | 0 | } |
381 | |
|
382 | 0 | if (out_len != olen) { |
383 | 0 | c = data[j++]; |
384 | 0 | out[i+0] = p[c&15]; |
385 | 0 | } |
386 | 0 | break; |
387 | 0 | } |
388 | | |
389 | 0 | default: |
390 | 0 | return NULL; |
391 | 0 | } |
392 | | |
393 | 0 | return out; |
394 | 0 | } |