/src/c-blosc2/plugins/codecs/ndlz/ndlz8x8.c
Line | Count | Source |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Copyright (c) 2021 Blosc Development Team <blosc@blosc.org> |
5 | | https://blosc.org |
6 | | License: BSD 3-Clause (see LICENSE.txt) |
7 | | |
8 | | See LICENSE.txt for details about copyright and rights to use. |
9 | | **********************************************************************/ |
10 | | |
11 | | /********************************************************************* |
12 | | This codec is meant to leverage multidimensionality for getting |
13 | | better compression ratios. The idea is to look for similarities |
14 | | in places that are closer in a euclidean metric, not the typical |
15 | | linear one. |
16 | | **********************************************************************/ |
17 | | |
18 | | #include "ndlz8x8.h" |
19 | | #include "xxhash.h" |
20 | | #include "b2nd.h" |
21 | | |
22 | | #include <stdlib.h> |
23 | | #include <string.h> |
24 | | |
25 | | /* |
26 | | * Give hints to the compiler for branch prediction optimization. |
27 | | */ |
28 | | #if defined(__GNUC__) && (__GNUC__ > 2) |
29 | | #define NDLZ_EXPECT_CONDITIONAL(c) (__builtin_expect((c), 1)) |
30 | 1.39k | #define NDLZ_UNEXPECT_CONDITIONAL(c) (__builtin_expect((c), 0)) |
31 | | #else |
32 | | #define NDLZ_EXPECT_CONDITIONAL(c) (c) |
33 | | #define NDLZ_UNEXPECT_CONDITIONAL(c) (c) |
34 | | #endif |
35 | | |
36 | | /* |
37 | | * Use inlined functions for supported systems. |
38 | | */ |
39 | | #if defined(_MSC_VER) && !defined(__cplusplus) /* Visual Studio */ |
40 | | #define inline __inline /* Visual C is not C99, but supports some kind of inline */ |
41 | | #endif |
42 | | |
43 | | #define MAX_COPY 32U |
44 | 0 | #define MAX_DISTANCE 65535 |
45 | | |
46 | | |
47 | | #ifdef BLOSC_STRICT_ALIGN |
48 | | #define NDLZ_READU16(p) ((p)[0] | (p)[1]<<8) |
49 | | #define NDLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24) |
50 | | #else |
51 | | #define NDLZ_READU16(p) *((const uint16_t*)(p)) |
52 | | #define NDLZ_READU32(p) *((const uint32_t*)(p)) |
53 | | #endif |
54 | | |
55 | | #define HASH_LOG (12) |
56 | | |
57 | | |
58 | | int ndlz8_compress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, |
59 | 0 | uint8_t meta, blosc2_cparams *cparams) { |
60 | 0 | BLOSC_UNUSED_PARAM(meta); |
61 | 0 | BLOSC_ERROR_NULL(cparams, BLOSC2_ERROR_NULL_POINTER); |
62 | 0 | BLOSC_ERROR_NULL(cparams->schunk, BLOSC2_ERROR_NULL_POINTER); |
63 | 0 | uint8_t *smeta; |
64 | 0 | int32_t smeta_len; |
65 | |
|
66 | 0 | if (blosc2_meta_get(cparams->schunk, "b2nd", &smeta, &smeta_len) < 0) { |
67 | 0 | BLOSC_TRACE_ERROR("b2nd layer not found!"); |
68 | 0 | return BLOSC2_ERROR_FAILURE; |
69 | 0 | } |
70 | | |
71 | 0 | const int cell_shape = 8; |
72 | 0 | const int cell_size = 64; |
73 | 0 | int8_t ndim; |
74 | 0 | int64_t *shape = malloc(8 * sizeof(int64_t)); |
75 | 0 | int32_t *chunkshape = malloc(8 * sizeof(int32_t)); |
76 | 0 | int32_t *blockshape = malloc(8 * sizeof(int32_t)); |
77 | 0 | b2nd_deserialize_meta(smeta, smeta_len, &ndim, shape, chunkshape, blockshape, NULL, NULL); |
78 | 0 | free(smeta); |
79 | |
|
80 | 0 | if (ndim != 2) { |
81 | 0 | BLOSC_TRACE_ERROR("This codec only works for ndim = 2"); |
82 | 0 | return BLOSC2_ERROR_FAILURE; |
83 | 0 | } |
84 | | |
85 | 0 | if (input_len != (blockshape[0] * blockshape[1])) { |
86 | 0 | BLOSC_TRACE_ERROR("Length not equal to blocksize"); |
87 | 0 | return BLOSC2_ERROR_FAILURE; |
88 | 0 | } |
89 | | |
90 | 0 | if (NDLZ_UNEXPECT_CONDITIONAL(output_len < (int) (1 + ndim * sizeof(int32_t)))) { |
91 | 0 | BLOSC_TRACE_ERROR("Output too small"); |
92 | 0 | return BLOSC2_ERROR_FAILURE; |
93 | 0 | } |
94 | | |
95 | 0 | uint8_t *ip = (uint8_t *) input; |
96 | 0 | uint8_t *op = (uint8_t *) output; |
97 | 0 | uint8_t *op_limit; |
98 | 0 | uint32_t hval, hash_cell; |
99 | 0 | uint32_t hash_triple[6] = {0}; |
100 | 0 | uint32_t hash_pair[7] = {0}; |
101 | 0 | uint8_t *bufarea = malloc(cell_size); |
102 | 0 | uint8_t *buf_cell = bufarea; |
103 | 0 | uint8_t *buf_aux; |
104 | 0 | uint32_t tab_cell[1U << 12U] = {0}; |
105 | 0 | uint32_t tab_triple[1U << 12U] = {0}; |
106 | 0 | uint32_t tab_pair[1U << 12U] = {0}; |
107 | 0 | uint32_t update_triple[6] = {0}; |
108 | 0 | uint32_t update_pair[7] = {0}; |
109 | | |
110 | | // Minimum cratios before issuing and _early giveup_ |
111 | | // Remind that ndlz is not meant for cratios <= 2 (too costly to decompress) |
112 | |
|
113 | 0 | op_limit = op + output_len; |
114 | | |
115 | | // Initialize the hash table to distances of 0 |
116 | 0 | for (unsigned i = 0; i < (1U << 12U); i++) { |
117 | 0 | tab_cell[i] = 0; |
118 | 0 | tab_triple[i] = 0; |
119 | 0 | tab_pair[i] = 0; |
120 | 0 | } |
121 | | |
122 | | /* input and output buffer cannot be less than 64 (cells are 8x8) */ |
123 | 0 | int overhead = 17 + (blockshape[0] * blockshape[1] / cell_size - 1) * 2; |
124 | 0 | if (input_len < cell_size || output_len < overhead) { |
125 | 0 | BLOSC_TRACE_ERROR("Incorrect length or maxout"); |
126 | 0 | return 0; |
127 | 0 | } |
128 | | |
129 | 0 | uint8_t *obase = op; |
130 | | |
131 | | /* we start with literal copy */ |
132 | 0 | *op++ = ndim; |
133 | 0 | memcpy(op, &blockshape[0], 4); |
134 | 0 | op += 4; |
135 | 0 | memcpy(op, &blockshape[1], 4); |
136 | 0 | op += 4; |
137 | |
|
138 | 0 | uint32_t i_stop[2]; |
139 | 0 | for (int i = 0; i < 2; ++i) { |
140 | 0 | i_stop[i] = (blockshape[i] + cell_shape - 1) / cell_shape; |
141 | 0 | } |
142 | | |
143 | | |
144 | | /* main loop */ |
145 | 0 | uint32_t padding[2]; |
146 | 0 | uint32_t ii[2]; |
147 | 0 | for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) { |
148 | 0 | for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) { // for each cell |
149 | 0 | for (int h = 0; h < 7; h++) { // new cell -> new possible references |
150 | 0 | update_pair[h] = 0; |
151 | 0 | if (h != 6) { |
152 | 0 | update_triple[h] = 0; |
153 | 0 | } |
154 | 0 | } |
155 | |
|
156 | 0 | if (NDLZ_UNEXPECT_CONDITIONAL(op + cell_size + 1 > op_limit)) { |
157 | 0 | free(shape); |
158 | 0 | free(chunkshape); |
159 | 0 | free(blockshape); |
160 | 0 | free(bufarea); |
161 | 0 | return 0; |
162 | 0 | } |
163 | | |
164 | 0 | uint32_t orig = ii[0] * cell_shape * blockshape[1] + ii[1] * cell_shape; |
165 | 0 | if (((blockshape[0] % cell_shape != 0) && (ii[0] == i_stop[0] - 1)) || |
166 | 0 | ((blockshape[1] % cell_shape != 0) && (ii[1] == i_stop[1] - 1))) { |
167 | 0 | uint8_t token = 0; // padding -> literal copy |
168 | 0 | *op++ = token; |
169 | 0 | if (ii[0] == i_stop[0] - 1) { |
170 | 0 | padding[0] = (blockshape[0] % cell_shape == 0) ? cell_shape : blockshape[0] % cell_shape; |
171 | 0 | } else { |
172 | 0 | padding[0] = cell_shape; |
173 | 0 | } |
174 | 0 | if (ii[1] == i_stop[1] - 1) { |
175 | 0 | padding[1] = (blockshape[1] % cell_shape == 0) ? cell_shape : blockshape[1] % cell_shape; |
176 | 0 | } else { |
177 | 0 | padding[1] = cell_shape; |
178 | 0 | } |
179 | 0 | for (uint32_t i = 0; i < padding[0]; i++) { |
180 | 0 | memcpy(op, &ip[orig + i * blockshape[1]], padding[1]); |
181 | 0 | op += padding[1]; |
182 | 0 | } |
183 | 0 | } else { |
184 | 0 | for (uint64_t i = 0; i < (uint64_t) cell_shape; i++) { // fill cell buffer |
185 | 0 | uint64_t ind = orig + i * blockshape[1]; |
186 | 0 | memcpy(buf_cell, &ip[ind], cell_shape); |
187 | 0 | buf_cell += cell_shape; |
188 | 0 | } |
189 | 0 | buf_cell -= cell_size; |
190 | |
|
191 | 0 | const uint8_t *ref; |
192 | 0 | uint32_t distance; |
193 | 0 | uint8_t *anchor = op; /* comparison starting-point */ |
194 | | |
195 | | /* find potential match */ |
196 | 0 | hash_cell = XXH32(buf_cell, cell_size, 1); // calculate cell hash |
197 | 0 | hash_cell >>= 32U - 12U; |
198 | 0 | ref = obase + tab_cell[hash_cell]; |
199 | | |
200 | | /* calculate distance to the match */ |
201 | 0 | if (tab_cell[hash_cell] == 0) { |
202 | 0 | distance = 0; |
203 | 0 | } else { |
204 | 0 | bool same = true; |
205 | 0 | buf_aux = obase + tab_cell[hash_cell]; |
206 | 0 | for (int i = 0; i < cell_size; i++) { |
207 | 0 | if (buf_cell[i] != buf_aux[i]) { |
208 | 0 | same = false; |
209 | 0 | break; |
210 | 0 | } |
211 | 0 | } |
212 | 0 | if (same) { |
213 | 0 | distance = (int32_t) (anchor - ref); |
214 | 0 | } else { |
215 | 0 | distance = 0; |
216 | 0 | } |
217 | 0 | } |
218 | |
|
219 | 0 | bool alleq = true; |
220 | 0 | for (int i = 1; i < cell_size; i++) { |
221 | 0 | if (buf_cell[i] != buf_cell[0]) { |
222 | 0 | alleq = false; |
223 | 0 | break; |
224 | 0 | } |
225 | 0 | } |
226 | 0 | if (alleq) { // all elements of the cell equal |
227 | 0 | uint8_t token = (uint8_t) (1U << 6U); |
228 | 0 | *op++ = token; |
229 | 0 | *op++ = buf_cell[0]; |
230 | |
|
231 | 0 | } else if (distance == 0 || (distance >= MAX_DISTANCE)) { // no cell match |
232 | 0 | bool literal = true; |
233 | | |
234 | | // rows triples matches |
235 | 0 | for (int i = 0; i < 6; i++) { |
236 | 0 | int triple_start = i * cell_shape; |
237 | 0 | hval = XXH32(&buf_cell[triple_start], 24, 1); // calculate triple hash |
238 | 0 | hval >>= 32U - 12U; |
239 | | /* calculate distance to the match */ |
240 | 0 | bool same = true; |
241 | 0 | uint16_t offset; |
242 | 0 | if (tab_triple[hval] != 0) { |
243 | 0 | buf_aux = obase + tab_triple[hval]; |
244 | 0 | for (int l = 0; l < 24; l++) { |
245 | 0 | if (buf_cell[triple_start + l] != buf_aux[l]) { |
246 | 0 | same = false; |
247 | 0 | break; |
248 | 0 | } |
249 | 0 | } |
250 | 0 | offset = (uint16_t) (anchor - obase - tab_triple[hval]); |
251 | 0 | } else { |
252 | 0 | same = false; |
253 | 0 | update_triple[i] = (uint32_t) (anchor + 1 + triple_start - obase); /* update hash table */ |
254 | 0 | hash_triple[i] = hval; |
255 | 0 | } |
256 | 0 | ref = obase + tab_triple[hval]; |
257 | 0 | if (same) { |
258 | 0 | distance = (int32_t) (anchor + triple_start - ref); |
259 | 0 | } else { |
260 | 0 | distance = 0; |
261 | 0 | } |
262 | 0 | if ((distance != 0) && (distance < MAX_DISTANCE)) { // 3 rows match |
263 | 0 | literal = false; |
264 | 0 | uint8_t token = (uint8_t) ((21 << 3U) | i); |
265 | 0 | *op++ = token; |
266 | 0 | memcpy(op, &offset, 2); |
267 | 0 | op += 2; |
268 | 0 | for (int l = 0; l < 8; l++) { |
269 | 0 | if ((l < i) || (l > i + 2)) { |
270 | 0 | memcpy(op, &buf_cell[l * cell_shape], cell_shape); |
271 | 0 | op += cell_shape; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | goto match; |
275 | 0 | } |
276 | 0 | } |
277 | | |
278 | | // rows pairs matches |
279 | 0 | for (int i = 0; i < 7; i++) { |
280 | 0 | int pair_start = i * cell_shape; |
281 | 0 | hval = XXH32(&buf_cell[pair_start], 16, 1); // calculate rows pair hash |
282 | 0 | hval >>= 32U - 12U; |
283 | 0 | ref = obase + tab_pair[hval]; |
284 | | /* calculate distance to the match */ |
285 | 0 | bool same = true; |
286 | 0 | uint16_t offset; |
287 | 0 | if (tab_pair[hval] != 0) { |
288 | 0 | buf_aux = obase + tab_pair[hval]; |
289 | 0 | for (int k = 0; k < 16; k++) { |
290 | 0 | if (buf_cell[pair_start + k] != buf_aux[k]) { |
291 | 0 | same = false; |
292 | 0 | break; |
293 | 0 | } |
294 | 0 | } |
295 | 0 | offset = (uint16_t) (anchor - obase - tab_pair[hval]); |
296 | 0 | } else { |
297 | 0 | same = false; |
298 | 0 | update_pair[i] = (uint32_t) (anchor + 1 + pair_start - obase); /* update hash table */ |
299 | 0 | hash_pair[i] = hval; |
300 | 0 | } |
301 | 0 | if (same) { |
302 | 0 | distance = (int32_t) (anchor + pair_start - ref); |
303 | 0 | } else { |
304 | 0 | distance = 0; |
305 | 0 | } |
306 | 0 | if ((distance != 0) && (distance < MAX_DISTANCE)) { /* 1 rows pair match */ |
307 | 0 | literal = false; |
308 | 0 | uint8_t token = (uint8_t) ((17 << 3U) | i); |
309 | 0 | *op++ = token; |
310 | 0 | offset = (uint16_t) (anchor - obase - tab_pair[hval]); |
311 | 0 | memcpy(op, &offset, 2); |
312 | 0 | op += 2; |
313 | 0 | for (int l = 0; l < 8; l++) { |
314 | 0 | if ((l < i) || (l > i + 1)) { |
315 | 0 | memcpy(op, &buf_cell[l * cell_shape], cell_shape); |
316 | 0 | op += cell_shape; |
317 | 0 | } |
318 | 0 | } |
319 | 0 | goto match; |
320 | 0 | } |
321 | 0 | } |
322 | | |
323 | 0 | match: |
324 | 0 | if (literal) { |
325 | 0 | tab_cell[hash_cell] = (uint32_t) (anchor + 1 - obase); /* update hash tables */ |
326 | |
|
327 | 0 | if (update_triple[0] != 0) { |
328 | 0 | for (int h = 0; h < 6; h++) { |
329 | 0 | tab_triple[hash_triple[h]] = update_triple[h]; |
330 | 0 | } |
331 | 0 | } |
332 | 0 | if (update_pair[0] != 0) { |
333 | 0 | for (int h = 0; h < 7; h++) { |
334 | 0 | tab_pair[hash_pair[h]] = update_pair[h]; |
335 | 0 | } |
336 | 0 | } |
337 | 0 | uint8_t token = 0; |
338 | 0 | *op++ = token; |
339 | 0 | memcpy(op, buf_cell, cell_size); |
340 | 0 | op += cell_size; |
341 | |
|
342 | 0 | } |
343 | |
|
344 | 0 | } else { // cell match |
345 | 0 | uint8_t token = (uint8_t) ((1U << 7U) | (1U << 6U)); |
346 | 0 | *op++ = token; |
347 | 0 | uint16_t offset = (uint16_t) (anchor - obase - tab_cell[hash_cell]); |
348 | 0 | memcpy(op, &offset, 2); |
349 | 0 | op += 2; |
350 | |
|
351 | 0 | } |
352 | |
|
353 | 0 | } |
354 | 0 | if ((op - obase) > input_len) { |
355 | 0 | free(shape); |
356 | 0 | free(chunkshape); |
357 | 0 | free(blockshape); |
358 | 0 | free(bufarea); |
359 | 0 | BLOSC_TRACE_ERROR("Compressed data is bigger than input!"); |
360 | 0 | return 0; |
361 | 0 | } |
362 | 0 | } |
363 | 0 | } |
364 | | |
365 | 0 | free(shape); |
366 | 0 | free(chunkshape); |
367 | 0 | free(blockshape); |
368 | 0 | free(bufarea); |
369 | |
|
370 | 0 | return (int) (op - obase); |
371 | 0 | } |
372 | | |
373 | | |
374 | | // See https://habr.com/en/company/yandex/blog/457612/ |
375 | | #ifdef __AVX2__ |
376 | | |
377 | | #if defined(_MSC_VER) |
378 | | #define ALIGNED_(x) __declspec(align(x)) |
379 | | #else |
380 | | #if defined(__GNUC__) |
381 | | #define ALIGNED_(x) __attribute__ ((aligned(x))) |
382 | | #endif |
383 | | #endif |
384 | | #define ALIGNED_TYPE_(t, x) t ALIGNED_(x) |
385 | | |
386 | | static unsigned char* copy_match_16(unsigned char *op, const unsigned char *match, int32_t len) |
387 | | { |
388 | | size_t offset = op - match; |
389 | | while (len >= 16) { |
390 | | |
391 | | static const ALIGNED_TYPE_(uint8_t, 16) masks[] = |
392 | | { |
393 | | 0, 1, 2, 1, 4, 1, 4, 2, 8, 7, 6, 5, 4, 3, 2, 1, // offset = 0, not used as mask, but for shift |
394 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // offset = 1 |
395 | | 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, |
396 | | 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2, 0, |
397 | | 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, |
398 | | 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 0, |
399 | | 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, |
400 | | 0, 1, 2, 3, 4, 5, 6, 0, 1, 2, 3, 4, 5, 6, 0, 1, |
401 | | 0, 1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, |
402 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 1, 2, 3, 4, 5, 6, |
403 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, |
404 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 0, 1, 2, 3, 4, |
405 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 0, 1, 2, 3, |
406 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 2, |
407 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 0, 1, |
408 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 0, |
409 | | 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, // offset = 16 |
410 | | }; |
411 | | |
412 | | _mm_storeu_si128((__m128i *)(op), |
413 | | _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(match)), |
414 | | _mm_load_si128((const __m128i *)(masks) + offset))); |
415 | | |
416 | | match += masks[offset]; |
417 | | |
418 | | op += 16; |
419 | | len -= 16; |
420 | | } |
421 | | // Deal with remainders |
422 | | for (; len > 0; len--) { |
423 | | *op++ = *match++; |
424 | | } |
425 | | return op; |
426 | | } |
427 | | #endif |
428 | | |
429 | | |
430 | | int ndlz8_decompress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len, |
431 | 507 | uint8_t meta, blosc2_dparams *dparams) { |
432 | 507 | BLOSC_UNUSED_PARAM(meta); |
433 | 507 | BLOSC_UNUSED_PARAM(dparams); |
434 | 507 | BLOSC_ERROR_NULL(input, BLOSC2_ERROR_NULL_POINTER); |
435 | 507 | BLOSC_ERROR_NULL(output, BLOSC2_ERROR_NULL_POINTER); |
436 | | |
437 | 507 | const int cell_shape = 8; |
438 | 507 | const int cell_size = 64; |
439 | 507 | uint8_t *ip = (uint8_t *) input; |
440 | 507 | uint8_t *ip_limit = ip + input_len; |
441 | 507 | uint8_t *op = (uint8_t *) output; |
442 | 507 | uint8_t ndim; |
443 | 507 | int32_t blockshape[2]; |
444 | 507 | int32_t eshape[2]; |
445 | 507 | uint8_t *buffercpy; |
446 | 507 | uint8_t token; |
447 | 507 | if (NDLZ_UNEXPECT_CONDITIONAL(input_len < 8)) { |
448 | 8 | return 0; |
449 | 8 | } |
450 | | |
451 | | /* we start with literal copy */ |
452 | 499 | ndim = *ip; |
453 | 499 | ip++; |
454 | 499 | if (ndim != 2) { |
455 | 2 | BLOSC_TRACE_ERROR("This codec only works for ndim = 2"); |
456 | 2 | return BLOSC2_ERROR_FAILURE; |
457 | 2 | } |
458 | 497 | memcpy(&blockshape[0], ip, 4); |
459 | 497 | ip += 4; |
460 | 497 | memcpy(&blockshape[1], ip, 4); |
461 | 497 | ip += 4; |
462 | | |
463 | | // Sanity check. See https://www.cve.org/CVERecord?id=CVE-2024-3203 |
464 | 497 | if (output_len < 0 || blockshape[0] < 0 || blockshape[1] < 0) { |
465 | 65 | BLOSC_TRACE_ERROR("Output length or blockshape is negative"); |
466 | 65 | return BLOSC2_ERROR_FAILURE; |
467 | 65 | } |
468 | | |
469 | 432 | eshape[0] = ((blockshape[0] + 7) / cell_shape) * cell_shape; |
470 | 432 | eshape[1] = ((blockshape[1] + 7) / cell_shape) * cell_shape; |
471 | | |
472 | 432 | if (NDLZ_UNEXPECT_CONDITIONAL((int64_t)output_len < (int64_t)blockshape[0] * (int64_t)blockshape[1])) { |
473 | 104 | BLOSC_TRACE_ERROR("The blockshape is bigger than the output buffer"); |
474 | 104 | return 0; |
475 | 104 | } |
476 | 328 | memset(op, 0, blockshape[0] * blockshape[1]); |
477 | | |
478 | 328 | int32_t i_stop[2]; |
479 | 984 | for (int i = 0; i < 2; ++i) { |
480 | 656 | i_stop[i] = eshape[i] / cell_shape; |
481 | 656 | } |
482 | | |
483 | | /* main loop */ |
484 | 328 | int32_t ii[2]; |
485 | 328 | int32_t padding[2] = {0}; |
486 | 328 | int32_t ind = 0; |
487 | 328 | uint8_t *local_buffer = malloc(cell_size); |
488 | 328 | uint8_t *cell_aux = malloc(cell_size); |
489 | 77.6M | for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) { |
490 | 77.6M | for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) { // for each cell |
491 | 458 | if (NDLZ_UNEXPECT_CONDITIONAL(ip > ip_limit)) { |
492 | 15 | free(local_buffer); |
493 | 15 | free(cell_aux); |
494 | 15 | BLOSC_TRACE_ERROR("Exceeding input length"); |
495 | 15 | return BLOSC2_ERROR_FAILURE; |
496 | 15 | } |
497 | 443 | if (ii[0] == i_stop[0] - 1) { |
498 | 392 | padding[0] = (blockshape[0] % cell_shape == 0) ? cell_shape : blockshape[0] % cell_shape; |
499 | 392 | } else { |
500 | 51 | padding[0] = cell_shape; |
501 | 51 | } |
502 | 443 | if (ii[1] == i_stop[1] - 1) { |
503 | 337 | padding[1] = (blockshape[1] % cell_shape == 0) ? cell_shape : blockshape[1] % cell_shape; |
504 | 337 | } else { |
505 | 106 | padding[1] = cell_shape; |
506 | 106 | } |
507 | 443 | token = *ip++; |
508 | 443 | uint8_t match_type = (token >> 3U); |
509 | 443 | if (token == 0) { // no match |
510 | 174 | buffercpy = ip; |
511 | 174 | ip += padding[0] * padding[1]; |
512 | 269 | } else if (token == (uint8_t) ((1U << 7U) | (1U << 6U))) { // cell match |
513 | 82 | uint16_t offset = *((uint16_t *) ip); |
514 | 82 | buffercpy = ip - offset - 1; |
515 | 82 | ip += 2; |
516 | 187 | } else if (token == (uint8_t) (1U << 6U)) { // whole cell of same element |
517 | 124 | buffercpy = cell_aux; |
518 | 124 | memset(buffercpy, *ip, cell_size); |
519 | 124 | ip++; |
520 | 124 | } else if (match_type == 21) { // triple match |
521 | 16 | buffercpy = local_buffer; |
522 | 16 | int row = (int) (token & 7); |
523 | 16 | uint16_t offset = *((uint16_t *) ip); |
524 | 16 | ip += 2; |
525 | 64 | for (int l = 0; l < 3; l++) { |
526 | 48 | memcpy(&buffercpy[(row + l) * cell_shape], |
527 | 48 | ip - sizeof(token) - sizeof(offset) - offset + l * cell_shape, cell_shape); |
528 | 48 | } |
529 | 144 | for (int l = 0; l < cell_shape; l++) { |
530 | 128 | if ((l < row) || (l > row + 2)) { |
531 | 82 | memcpy(&buffercpy[l * cell_shape], ip, cell_shape); |
532 | 82 | ip += cell_shape; |
533 | 82 | } |
534 | 128 | } |
535 | 47 | } else if (match_type == 17) { // pair match |
536 | 20 | buffercpy = local_buffer; |
537 | 20 | int row = (int) (token & 7); |
538 | 20 | uint16_t offset = *((uint16_t *) ip); |
539 | 20 | ip += 2; |
540 | 60 | for (int l = 0; l < 2; l++) { |
541 | 40 | memcpy(&buffercpy[(row + l) * cell_shape], |
542 | 40 | ip - sizeof(token) - sizeof(offset) - offset + l * cell_shape, cell_shape); |
543 | 40 | } |
544 | 180 | for (int l = 0; l < cell_shape; l++) { |
545 | 160 | if ((l < row) || (l > row + 1)) { |
546 | 120 | memcpy(&buffercpy[l * cell_shape], ip, cell_shape); |
547 | 120 | ip += cell_shape; |
548 | 120 | } |
549 | 160 | } |
550 | 27 | } else { |
551 | 27 | free(local_buffer); |
552 | 27 | free(cell_aux); |
553 | 27 | BLOSC_TRACE_ERROR("Invalid token: %u at cell [%d, %d]\n", token, ii[0], ii[1]); |
554 | 27 | return BLOSC2_ERROR_FAILURE; |
555 | 27 | } |
556 | | |
557 | 416 | int32_t orig = ii[0] * cell_shape * blockshape[1] + ii[1] * cell_shape; |
558 | 3.74k | for (int32_t i = 0; i < (int32_t) cell_shape; i++) { |
559 | 3.32k | if (i < padding[0]) { |
560 | 1.90k | ind = orig + i * blockshape[1]; |
561 | 1.90k | memcpy(&op[ind], buffercpy, padding[1]); |
562 | 1.90k | } |
563 | 3.32k | buffercpy += padding[1]; |
564 | 3.32k | } |
565 | 416 | if (ind > output_len) { |
566 | 0 | free(local_buffer); |
567 | 0 | free(cell_aux); |
568 | 0 | BLOSC_TRACE_ERROR("Exceeding output size"); |
569 | 0 | return BLOSC2_ERROR_FAILURE; |
570 | 0 | } |
571 | 416 | } |
572 | 77.6M | } |
573 | 286 | ind += padding[1]; |
574 | | |
575 | 286 | free(cell_aux); |
576 | 286 | free(local_buffer); |
577 | | |
578 | 286 | if (ind != (blockshape[0] * blockshape[1])) { |
579 | 0 | BLOSC_TRACE_ERROR("Output size is not compatible with embedded blockshape"); |
580 | 0 | return BLOSC2_ERROR_FAILURE; |
581 | 0 | } |
582 | 286 | if (ind > output_len) { |
583 | 0 | BLOSC_TRACE_ERROR("Exceeding output size"); |
584 | 0 | return BLOSC2_ERROR_FAILURE; |
585 | 0 | } |
586 | | |
587 | 286 | return (int) ind; |
588 | 286 | } |