Coverage Report

Created: 2026-03-07 07:09

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/c-blosc2/plugins/codecs/ndlz/ndlz4x4.c
Line
Count
Source
1
/*********************************************************************
2
  Blosc - Blocked Shuffling and Compression Library
3
4
  Copyright (c) 2021  Blosc Development Team <blosc@blosc.org>
5
  https://blosc.org
6
  License: BSD 3-Clause (see LICENSE.txt)
7
8
  See LICENSE.txt for details about copyright and rights to use.
9
**********************************************************************/
10
11
/*********************************************************************
12
  This codec is meant to leverage multidimensionality for getting
13
  better compression ratios.  The idea is to look for similarities
14
  in places that are closer in a euclidean metric, not the typical
15
  linear one.
16
**********************************************************************/
17
18
#include "ndlz4x4.h"
19
#include "xxhash.h"
20
#include "b2nd.h"
21
22
#include <stdlib.h>
23
#include <string.h>
24
25
/*
26
 * Give hints to the compiler for branch prediction optimization.
27
 */
28
#if defined(__GNUC__) && (__GNUC__ > 2)
29
#define NDLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1))
30
3.79k
#define NDLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0))
31
#else
32
#define NDLZ_EXPECT_CONDITIONAL(c)    (c)
33
#define NDLZ_UNEXPECT_CONDITIONAL(c)  (c)
34
#endif
35
36
/*
37
 * Use inlined functions for supported systems.
38
 */
39
#if defined(_MSC_VER) && !defined(__cplusplus)   /* Visual Studio */
40
#define inline __inline  /* Visual C is not C99, but supports some kind of inline */
41
#endif
42
43
#define MAX_COPY 32U
44
0
#define MAX_DISTANCE 65535
45
46
47
#ifdef BLOSC_STRICT_ALIGN
48
#define NDLZ_READU16(p) ((p)[0] | (p)[1]<<8)
49
#define NDLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
50
#else
51
#define NDLZ_READU16(p) *((const uint16_t*)(p))
52
#define NDLZ_READU32(p) *((const uint32_t*)(p))
53
#endif
54
55
#define HASH_LOG (12)
56
57
58
int ndlz4_compress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
59
0
                   uint8_t meta, blosc2_cparams *cparams) {
60
0
  BLOSC_UNUSED_PARAM(meta);
61
0
  BLOSC_ERROR_NULL(cparams, BLOSC2_ERROR_NULL_POINTER);
62
0
  BLOSC_ERROR_NULL(cparams->schunk, BLOSC2_ERROR_NULL_POINTER);
63
0
  uint8_t *smeta;
64
0
  int32_t smeta_len;
65
66
0
  if (blosc2_meta_get(cparams->schunk, "b2nd", &smeta, &smeta_len) < 0) {
67
0
    BLOSC_TRACE_ERROR("b2nd layer not found!");
68
0
    return BLOSC2_ERROR_FAILURE;
69
0
  }
70
71
0
  int8_t ndim;
72
0
  int64_t *shape = malloc(8 * sizeof(int64_t));
73
0
  int32_t *chunkshape = malloc(8 * sizeof(int32_t));
74
0
  int32_t *blockshape = malloc(8 * sizeof(int32_t));
75
0
  b2nd_deserialize_meta(smeta, smeta_len, &ndim, shape, chunkshape, blockshape, NULL, NULL);
76
0
  free(smeta);
77
78
0
  if (ndim != 2) {
79
0
    BLOSC_TRACE_ERROR("This codec only works for ndim = 2");
80
0
    return BLOSC2_ERROR_FAILURE;
81
0
  }
82
83
0
  if (input_len != (blockshape[0] * blockshape[1])) {
84
0
    BLOSC_TRACE_ERROR("Length not equal to blocksize");
85
0
    return BLOSC2_ERROR_FAILURE;
86
0
  }
87
88
0
  if (NDLZ_UNEXPECT_CONDITIONAL(output_len < (int) (1 + ndim * sizeof(int32_t)))) {
89
0
    BLOSC_TRACE_ERROR("Output too small");
90
0
    return BLOSC2_ERROR_FAILURE;
91
0
  }
92
93
0
  uint8_t *ip = (uint8_t *) input;
94
0
  uint8_t *op = (uint8_t *) output;
95
0
  uint8_t *op_limit;
96
0
  uint32_t hval, hash_cell;
97
0
  uint32_t hash_triple[2] = {0};
98
0
  uint32_t hash_pair[3] = {0};
99
0
  uint8_t bufarea[16];
100
0
  uint8_t *buf_cell = bufarea;
101
0
  uint8_t buf_triple[12];
102
0
  uint8_t buf_pair[8];
103
0
  uint8_t *buf_aux;
104
0
  uint32_t tab_cell[1U << 12U] = {0};
105
0
  uint32_t tab_triple[1U << 12U] = {0};
106
0
  uint32_t tab_pair[1U << 12U] = {0};
107
0
  uint32_t update_triple[2] = {0};
108
0
  uint32_t update_pair[3] = {0};
109
110
  // Minimum cratios before issuing and _early giveup_
111
  // Remind that ndlz is not meant for cratios <= 2 (too costly to decompress)
112
113
0
  op_limit = op + output_len;
114
115
  // Initialize the hash table to distances of 0
116
0
  for (unsigned i = 0; i < (1U << 12U); i++) {
117
0
    tab_cell[i] = 0;
118
0
  }
119
120
  /* input and output buffer cannot be less than 16 and 66 bytes or we can get into trouble */
121
0
  int overhead = 17 + (blockshape[0] * blockshape[1] / 16 - 1) * 2;
122
0
  if (input_len < 16 || output_len < overhead) {
123
0
    BLOSC_TRACE_ERROR("Incorrect length or maxout");
124
0
    return 0;
125
0
  }
126
127
0
  uint8_t *obase = op;
128
129
  /* we start with literal copy */
130
0
  *op++ = ndim;
131
0
  memcpy(op, &blockshape[0], 4);
132
0
  op += 4;
133
0
  memcpy(op, &blockshape[1], 4);
134
0
  op += 4;
135
136
0
  uint32_t i_stop[2];
137
0
  for (int i = 0; i < 2; ++i) {
138
0
    i_stop[i] = (blockshape[i] + 3) / 4;
139
0
  }
140
141
  /* main loop */
142
0
  uint32_t padding[2];
143
0
  uint32_t ii[2];
144
0
  for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) {
145
0
    for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) {      // for each cell
146
0
      uint8_t token;
147
0
      for (int h = 0; h < 2; h++) {         // new cell -> new possible references
148
0
        update_triple[h] = 0;
149
0
        update_pair[h] = 0;
150
0
      }
151
0
      update_pair[2] = 0;
152
153
0
      if (NDLZ_UNEXPECT_CONDITIONAL(op + 16 + 1 > op_limit)) {
154
0
        free(shape);
155
0
        free(chunkshape);
156
0
        free(blockshape);
157
0
        return 0;
158
0
      }
159
160
0
      uint32_t orig = ii[0] * 4 * blockshape[1] + ii[1] * 4;
161
0
      if (((blockshape[0] % 4 != 0) && (ii[0] == i_stop[0] - 1)) ||
162
0
          ((blockshape[1] % 4 != 0) && (ii[1] == i_stop[1] - 1))) {
163
0
        token = 0;                                   // padding -> literal copy
164
0
        *op++ = token;
165
0
        if (ii[0] == i_stop[0] - 1) {
166
0
          padding[0] = (blockshape[0] % 4 == 0) ? 4 : blockshape[0] % 4;
167
0
        } else {
168
0
          padding[0] = 4;
169
0
        }
170
0
        if (ii[1] == i_stop[1] - 1) {
171
0
          padding[1] = (blockshape[1] % 4 == 0) ? 4 : blockshape[1] % 4;
172
0
        } else {
173
0
          padding[1] = 4;
174
0
        }
175
0
        for (uint32_t i = 0; i < padding[0]; i++) {
176
0
          memcpy(op, &ip[orig + i * blockshape[1]], padding[1]);
177
0
          op += padding[1];
178
0
        }
179
0
      } else {
180
0
        for (uint64_t i = 0; i < 4; i++) {           // fill cell buffer
181
0
          uint64_t ind = orig + i * blockshape[1];
182
0
          memcpy(buf_cell, &ip[ind], 4);
183
0
          buf_cell += 4;
184
0
        }
185
0
        buf_cell -= 16;
186
187
0
        const uint8_t *ref;
188
0
        uint32_t distance;
189
0
        uint8_t *anchor = op;    /* comparison starting-point */
190
191
        /* find potential match */
192
0
        hash_cell = XXH32(buf_cell, 16, 1);        // calculate cell hash
193
0
        hash_cell >>= 32U - 12U;
194
0
        ref = obase + tab_cell[hash_cell];
195
196
        /* calculate distance to the match */
197
0
        if (tab_cell[hash_cell] == 0) {
198
0
          distance = 0;
199
0
        } else {
200
0
          bool same = true;
201
0
          buf_aux = obase + tab_cell[hash_cell];
202
0
          for (int i = 0; i < 16; i++) {
203
0
            if (buf_cell[i] != buf_aux[i]) {
204
0
              same = false;
205
0
              break;
206
0
            }
207
0
          }
208
0
          if (same) {
209
0
            distance = (int32_t) (anchor - ref);
210
0
          } else {
211
0
            distance = 0;
212
0
          }
213
0
        }
214
215
0
        bool alleq = true;
216
0
        for (int i = 1; i < 16; i++) {
217
0
          if (buf_cell[i] != buf_cell[0]) {
218
0
            alleq = false;
219
0
            break;
220
0
          }
221
0
        }
222
0
        if (alleq) {                              // all elements of the cell equal
223
0
          token = (uint8_t) (1U << 6U);
224
0
          *op++ = token;
225
0
          *op++ = buf_cell[0];
226
227
0
        } else if (distance == 0 || (distance >= MAX_DISTANCE)) {   // no cell match
228
0
          bool literal = true;
229
230
          // 2 rows pairs matches
231
0
          for (int j = 1; j < 4; j++) {
232
0
            memcpy(buf_pair, buf_cell, 4);
233
0
            memcpy(&buf_pair[4], &buf_cell[j * 4], 4);
234
0
            hval = XXH32(buf_pair, 8, 1);        // calculate rows pair hash
235
0
            hval >>= 32U - 12U;
236
0
            ref = obase + tab_pair[hval];
237
            /* calculate distance to the match */
238
0
            bool same = true;
239
0
            uint16_t offset;
240
0
            if (tab_pair[hval] != 0) {
241
0
              buf_aux = obase + tab_pair[hval];
242
0
              for (int k = 0; k < 8; k++) {
243
0
                if (buf_pair[k] != buf_aux[k]) {
244
0
                  same = false;
245
0
                  break;
246
0
                }
247
0
              }
248
0
              offset = (uint16_t) (anchor - obase - tab_pair[hval]);
249
0
            } else {
250
0
              same = false;
251
0
            }
252
0
            if (same) {
253
0
              distance = (int32_t) (anchor - ref);
254
0
            } else {
255
0
              distance = 0;
256
0
            }
257
0
            if ((distance != 0) && (distance < MAX_DISTANCE)) {     /* rows pair match */
258
0
              int k, m, l = -1;
259
0
              for (k = 1; k < 4; k++) {
260
0
                if (k != j) {
261
0
                  if (l == -1) {
262
0
                    l = k;
263
0
                  } else {
264
0
                    m = k;
265
0
                  }
266
0
                }
267
0
              }
268
0
              memcpy(buf_pair, &buf_cell[l * 4], 4);
269
0
              memcpy(&buf_pair[4], &buf_cell[m * 4], 4);
270
0
              hval = XXH32(buf_pair, 8, 1);        // calculate rows pair hash
271
0
              hval >>= 32U - 12U;
272
0
              ref = obase + tab_pair[hval];
273
0
              same = true;
274
0
              if (tab_pair[hval] != 0) {
275
0
                buf_aux = obase + tab_pair[hval];
276
0
                for (k = 0; k < 8; k++) {
277
0
                  if (buf_pair[k] != buf_aux[k]) {
278
0
                    same = false;
279
0
                    break;
280
0
                  }
281
0
                }
282
0
              } else {
283
0
                same = false;
284
0
              }
285
0
              if (same) {
286
0
                distance = (int32_t) (anchor + l * 4 - ref);
287
0
              } else {
288
0
                distance = 0;
289
0
              }
290
0
              if ((distance != 0) && (distance < MAX_DISTANCE)) {   /* 2 pair matches */
291
0
                literal = false;
292
0
                token = (uint8_t) ((1U << 5U) | (j << 3U));
293
0
                *op++ = token;
294
0
                uint16_t offset_2 = (uint16_t) (anchor - obase - tab_pair[hval]);
295
0
                *(uint16_t *) op = offset;
296
0
                op += sizeof(offset);
297
0
                *(uint16_t *) op = offset_2;
298
0
                op += sizeof(offset_2);
299
0
                goto match;
300
0
              }
301
0
            }
302
0
          }
303
304
          // rows triples
305
0
          for (int i = 0; i < 2; i++) {
306
0
            memcpy(buf_triple, &buf_cell[i * 4], 4);
307
0
            for (int j = i + 1; j < 3; j++) {
308
0
              memcpy(&buf_triple[4], &buf_cell[j * 4], 4);
309
0
              for (int k = j + 1; k < 4; k++) {
310
0
                memcpy(&buf_triple[8], &buf_cell[k * 4], 4);
311
0
                hval = XXH32(buf_triple, 12, 1);        // calculate triple hash
312
0
                hval >>= 32U - 12U;
313
                /* calculate distance to the match */
314
0
                bool same = true;
315
0
                uint16_t offset;
316
0
                if (tab_triple[hval] != 0) {
317
0
                  buf_aux = obase + tab_triple[hval];
318
0
                  for (int l = 0; l < 12; l++) {
319
0
                    if (buf_triple[l] != buf_aux[l]) {
320
0
                      same = false;
321
0
                      break;
322
0
                    }
323
0
                  }
324
0
                  offset = (uint16_t) (anchor - obase - tab_triple[hval]);
325
0
                } else {
326
0
                  same = false;
327
0
                  if ((j - i == 1) && (k - j == 1)) {
328
0
                    update_triple[i] = (uint32_t) (anchor + 1 + i * 4 - obase);     /* update hash table */
329
0
                    hash_triple[i] = hval;
330
0
                  }
331
0
                }
332
0
                ref = obase + tab_triple[hval];
333
334
0
                if (same) {
335
0
                  distance = (int32_t) (anchor + i * 4 - ref);
336
0
                } else {
337
0
                  distance = 0;
338
0
                }
339
0
                if ((distance != 0) && (distance < MAX_DISTANCE)) {
340
0
                  literal = false;
341
0
                  if (i == 1) {
342
0
                    token = (uint8_t) (7U << 5U);
343
0
                  } else {
344
0
                    token = (uint8_t) ((7U << 5U) | ((j + k - 2) << 3U));
345
0
                  }
346
0
                  *op++ = token;
347
0
                  memcpy(op, &offset, 2);
348
0
                  op += 2;
349
0
                  for (int l = 0; l < 4; l++) {
350
0
                    if ((l != i) && (l != j) && (l != k)) {
351
0
                      memcpy(op, &buf_cell[4 * l], 4);
352
0
                      op += 4;
353
0
                      goto match;
354
0
                    }
355
0
                  }
356
0
                }
357
0
              }
358
0
            }
359
0
          }
360
361
          // rows pairs
362
0
          for (int i = 0; i < 3; i++) {
363
0
            memcpy(buf_pair, &buf_cell[i * 4], 4);
364
0
            for (int j = i + 1; j < 4; j++) {
365
0
              memcpy(&buf_pair[4], &buf_cell[j * 4], 4);
366
0
              hval = XXH32(buf_pair, 8, 1);        // calculate rows pair hash
367
0
              hval >>= 32U - 12U;
368
0
              ref = obase + tab_pair[hval];
369
              /* calculate distance to the match */
370
0
              bool same = true;
371
0
              uint16_t offset;
372
0
              if (tab_pair[hval] != 0) {
373
0
                buf_aux = obase + tab_pair[hval];
374
0
                for (int k = 0; k < 8; k++) {
375
0
                  if (buf_pair[k] != buf_aux[k]) {
376
0
                    same = false;
377
0
                    break;
378
0
                  }
379
0
                }
380
0
                offset = (uint16_t) (anchor - obase - tab_pair[hval]);
381
0
              } else {
382
0
                same = false;
383
0
                if (j - i == 1) {
384
0
                  update_pair[i] = (uint32_t) (anchor + 1 + i * 4 - obase);     /* update hash table */
385
0
                  hash_pair[i] = hval;
386
0
                }
387
0
              }
388
0
              if (same) {
389
0
                distance = (int32_t) (anchor + i * 4 - ref);
390
0
              } else {
391
0
                distance = 0;
392
0
              }
393
0
              if ((distance != 0) && (distance < MAX_DISTANCE)) {     /* rows pair match */
394
0
                literal = false;
395
0
                if (i == 2) {
396
0
                  token = (uint8_t) (1U << 7U);
397
0
                } else {
398
0
                  token = (uint8_t) ((1U << 7U) | (i << 5U) | (j << 3U));
399
0
                }
400
0
                *op++ = token;
401
0
                memcpy(op, &offset, 2);
402
0
                op += 2;
403
0
                for (int k = 0; k < 4; k++) {
404
0
                  if ((k != i) && (k != j)) {
405
0
                    memcpy(op, &buf_cell[4 * k], 4);
406
0
                    op += 4;
407
0
                  }
408
0
                }
409
0
                goto match;
410
0
              }
411
0
            }
412
0
          }
413
414
0
          match:
415
0
          if (literal) {
416
0
            tab_cell[hash_cell] = (uint32_t) (anchor + 1 - obase);     /* update hash tables */
417
0
            if (update_triple[0] != 0) {
418
0
              for (int h = 0; h < 2; h++) {
419
0
                tab_triple[hash_triple[h]] = update_triple[h];
420
0
              }
421
0
            }
422
0
            if (update_pair[0] != 0) {
423
0
              for (int h = 0; h < 3; h++) {
424
0
                tab_pair[hash_pair[h]] = update_pair[h];
425
0
              }
426
0
            }
427
0
            token = 0;
428
0
            *op++ = token;
429
0
            memcpy(op, buf_cell, 16);
430
0
            op += 16;
431
0
          }
432
433
0
        } else {   // cell match
434
0
          token = (uint8_t) ((1U << 7U) | (1U << 6U));
435
0
          *op++ = token;
436
0
          uint16_t offset = (uint16_t) (anchor - obase - tab_cell[hash_cell]);
437
0
          memcpy(op, &offset, 2);
438
0
          op += 2;
439
0
        }
440
441
0
      }
442
0
      if ((op - obase) > input_len) {
443
0
        BLOSC_TRACE_ERROR("Compressed data is bigger than input!");
444
0
        return 0;
445
0
      }
446
0
    }
447
0
  }
448
449
0
  free(shape);
450
0
  free(chunkshape);
451
0
  free(blockshape);
452
453
0
  return (int) (op - obase);
454
0
}
455
456
457
// See https://habr.com/en/company/yandex/blog/457612/
458
#ifdef __AVX2__
459
460
#if defined(_MSC_VER)
461
#define ALIGNED_(x) __declspec(align(x))
462
#else
463
#if defined(__GNUC__)
464
#define ALIGNED_(x) __attribute__ ((aligned(x)))
465
#endif
466
#endif
467
#define ALIGNED_TYPE_(t, x) t ALIGNED_(x)
468
469
static unsigned char* copy_match_16(unsigned char *op, const unsigned char *match, int32_t len)
470
{
471
  size_t offset = op - match;
472
  while (len >= 16) {
473
474
    static const ALIGNED_TYPE_(uint8_t, 16) masks[] =
475
      {
476
                0,  1,  2,  1,  4,  1,  4,  2,  8,  7,  6,  5,  4,  3,  2,  1, // offset = 0, not used as mask, but for shift
477
                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // offset = 1
478
                0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,
479
                0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,
480
                0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,
481
                0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,
482
                0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,
483
                0,  1,  2,  3,  4,  5,  6,  0,  1,  2,  3,  4,  5,  6,  0,  1,
484
                0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
485
                0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,
486
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,  2,  3,  4,  5,
487
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  1,  2,  3,  4,
488
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  1,  2,  3,
489
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  0,  1,  2,
490
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  0,  1,
491
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
492
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  15, // offset = 16
493
      };
494
495
    _mm_storeu_si128((__m128i *)(op),
496
                     _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(match)),
497
                                      _mm_load_si128((const __m128i *)(masks) + offset)));
498
499
    match += masks[offset];
500
501
    op += 16;
502
    len -= 16;
503
  }
504
  // Deal with remainders
505
  for (; len > 0; len--) {
506
    *op++ = *match++;
507
  }
508
  return op;
509
}
510
#endif
511
512
513
int ndlz4_decompress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
514
1.04k
                     uint8_t meta, blosc2_dparams *dparams) {
515
1.04k
  BLOSC_UNUSED_PARAM(meta);
516
1.04k
  BLOSC_UNUSED_PARAM(dparams);
517
1.04k
  BLOSC_ERROR_NULL(input, BLOSC2_ERROR_NULL_POINTER);
518
1.04k
  BLOSC_ERROR_NULL(output, BLOSC2_ERROR_NULL_POINTER);
519
520
1.04k
  uint8_t *ip = (uint8_t *) input;
521
1.04k
  uint8_t *ip_limit = ip + input_len;
522
1.04k
  uint8_t *op = (uint8_t *) output;
523
1.04k
  uint8_t ndim;
524
1.04k
  int32_t blockshape[2];
525
1.04k
  int32_t eshape[2];
526
1.04k
  uint8_t *buffercpy;
527
1.04k
  uint8_t local_buffer[16];
528
1.04k
  uint8_t token;
529
1.04k
  if (NDLZ_UNEXPECT_CONDITIONAL(input_len < 8)) {
530
7
    return 0;
531
7
  }
532
533
  /* we start with literal copy */
534
1.03k
  ndim = *ip;
535
1.03k
  ip++;
536
1.03k
  if (ndim != 2) {
537
2
    BLOSC_TRACE_ERROR("This codec only works for ndim = 2");
538
2
    return BLOSC2_ERROR_FAILURE;
539
2
  }
540
1.03k
  memcpy(&blockshape[0], ip, 4);
541
1.03k
  ip += 4;
542
1.03k
  memcpy(&blockshape[1], ip, 4);
543
1.03k
  ip += 4;
544
545
  // Sanity check.  See https://www.cve.org/CVERecord?id=CVE-2024-3204
546
1.03k
  if (output_len < 0 || blockshape[0] < 0 || blockshape[1] < 0) {
547
68
    BLOSC_TRACE_ERROR("Output length or blockshape is negative");
548
68
    return BLOSC2_ERROR_FAILURE;
549
68
  }
550
551
968
  eshape[0] = ((blockshape[0] + 3) / 4) * 4;
552
968
  eshape[1] = ((blockshape[1] + 3) / 4) * 4;
553
554
968
  if (NDLZ_UNEXPECT_CONDITIONAL((int64_t)output_len < (int64_t)blockshape[0] * (int64_t)blockshape[1])) {
555
109
    BLOSC_TRACE_ERROR("The blockshape is bigger than the output buffer");
556
109
    return 0;
557
109
  }
558
859
  memset(op, 0, blockshape[0] * blockshape[1]);
559
560
859
  uint32_t i_stop[2];
561
2.57k
  for (int i = 0; i < 2; ++i) {
562
1.71k
    i_stop[i] = eshape[i] / 4;
563
1.71k
  }
564
565
  /* main loop */
566
859
  uint32_t ii[2];
567
859
  uint32_t padding[2] = {0};
568
859
  uint32_t ind = 0;
569
859
  uint8_t cell_aux[16];
570
7.18G
  for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) {
571
7.18G
    for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) {      // for each cell
572
1.78k
      if (NDLZ_UNEXPECT_CONDITIONAL(ip > ip_limit)) {
573
16
        BLOSC_TRACE_ERROR("Exceeding input length");
574
16
        return BLOSC2_ERROR_FAILURE;
575
16
      }
576
1.76k
      if (ii[0] == i_stop[0] - 1) {
577
1.32k
        padding[0] = (blockshape[0] % 4 == 0) ? 4 : blockshape[0] % 4;
578
1.32k
      } else {
579
446
        padding[0] = 4;
580
446
      }
581
1.76k
      if (ii[1] == i_stop[1] - 1) {
582
1.19k
        padding[1] = (blockshape[1] % 4 == 0) ? 4 : blockshape[1] % 4;
583
1.19k
      } else {
584
575
        padding[1] = 4;
585
575
      }
586
1.76k
      token = *ip++;
587
1.76k
      if (token == 0) {    // no match
588
183
        buffercpy = ip;
589
183
        ip += padding[0] * padding[1];
590
1.58k
      } else if (token == (uint8_t) ((1U << 7U) | (1U << 6U))) {  // cell match
591
88
        uint16_t offset = *((uint16_t *) ip);
592
88
        buffercpy = ip - offset - 1;
593
88
        ip += 2;
594
1.49k
      } else if (token == (uint8_t) (1U << 6U)) { // whole cell of same element
595
78
        buffercpy = cell_aux;
596
78
        memset(buffercpy, *ip, 16);
597
78
        ip++;
598
1.41k
      } else if (token >= 224) { // three rows match
599
285
        buffercpy = local_buffer;
600
285
        uint16_t offset = *((uint16_t *) ip);
601
285
        offset += 3;
602
285
        ip += 2;
603
285
        int i, j, k;
604
285
        if ((token >> 3U) == 28) {
605
75
          i = 1;
606
75
          j = 2;
607
75
          k = 3;
608
210
        } else {
609
210
          i = 0;
610
210
          if ((token >> 3U) < 30) {
611
83
            j = 1;
612
83
            k = 2;
613
127
          } else {
614
127
            k = 3;
615
127
            if ((token >> 3U) == 30) {
616
42
              j = 1;
617
85
            } else {
618
85
              j = 2;
619
85
            }
620
127
          }
621
210
        }
622
285
        memcpy(&buffercpy[i * 4], ip - offset, 4);
623
285
        memcpy(&buffercpy[j * 4], ip - offset + 4, 4);
624
285
        memcpy(&buffercpy[k * 4], ip - offset + 8, 4);
625
703
        for (int l = 0; l < 4; l++) {
626
703
          if ((l != i) && (l != j) && (l != k)) {
627
285
            memcpy(&buffercpy[l * 4], ip, 4);
628
285
            ip += 4;
629
285
            break;
630
285
          }
631
703
        }
632
633
1.13k
      } else if ((token >= 128) && (token <= 191)) { // rows pair match
634
833
        buffercpy = local_buffer;
635
833
        uint16_t offset = *((uint16_t *) ip);
636
833
        offset += 3;
637
833
        ip += 2;
638
833
        int i, j;
639
833
        if (token == 128) {
640
114
          i = 2;
641
114
          j = 3;
642
719
        } else {
643
719
          i = (token - 128) >> 5U;
644
719
          j = ((token - 128) >> 3U) - (i << 2U);
645
719
        }
646
833
        memcpy(&buffercpy[i * 4], ip - offset, 4);
647
833
        memcpy(&buffercpy[j * 4], ip - offset + 4, 4);
648
4.16k
        for (int k = 0; k < 4; k++) {
649
3.33k
          if ((k != i) && (k != j)) {
650
1.67k
            memcpy(&buffercpy[k * 4], ip, 4);
651
1.67k
            ip += 4;
652
1.67k
          }
653
3.33k
        }
654
833
      } else if ((token >= 40) && (token <= 63)) {  // 2 rows pair matches
655
270
        buffercpy = local_buffer;
656
270
        uint16_t offset_1 = *((uint16_t *) ip);
657
270
        offset_1 += 5;
658
270
        ip += 2;
659
270
        uint16_t offset_2 = *((uint16_t *) ip);
660
270
        offset_2 += 5;
661
270
        ip += 2;
662
270
        int i, j, k, l, m;
663
270
        i = 0;
664
270
        j = ((token - 32) >> 3U);
665
270
        l = -1;
666
1.08k
        for (k = 1; k < 4; k++) {
667
810
          if ((k != i) && (k != j)) {
668
540
            if (l == -1) {
669
270
              l = k;
670
270
            } else {
671
270
              m = k;
672
270
            }
673
540
          }
674
810
        }
675
270
        memcpy(&buffercpy[i * 4], ip - offset_1, 4);
676
270
        memcpy(&buffercpy[j * 4], ip - offset_1 + 4, 4);
677
270
        memcpy(&buffercpy[l * 4], ip - offset_2, 4);
678
270
        memcpy(&buffercpy[m * 4], ip - offset_2 + 4, 4);
679
680
270
      } else {
681
30
        BLOSC_TRACE_ERROR("Invalid token: %u at cell [%d, %d]\n", token, ii[0], ii[1]);
682
30
        return BLOSC2_ERROR_FAILURE;
683
30
      }
684
      // fill op with buffercpy
685
1.73k
      uint32_t orig = ii[0] * 4 * blockshape[1] + ii[1] * 4;
686
8.68k
      for (uint32_t i = 0; i < 4; i++) {
687
6.94k
        if (i < padding[0]) {
688
4.27k
          ind = orig + i * blockshape[1];
689
4.27k
          memcpy(&op[ind], buffercpy, padding[1]);
690
4.27k
        }
691
6.94k
        buffercpy += padding[1];
692
6.94k
      }
693
1.73k
      if (ind > (uint32_t) output_len) {
694
0
        BLOSC_TRACE_ERROR("Exceeding output size");
695
0
        return BLOSC2_ERROR_FAILURE;
696
0
      }
697
1.73k
    }
698
7.18G
  }
699
813
  ind += padding[1];
700
701
813
  if ((int32_t)ind != (blockshape[0] * blockshape[1])) {
702
0
    BLOSC_TRACE_ERROR("Output size is not compatible with embedded blockshape");
703
0
    return BLOSC2_ERROR_FAILURE;
704
0
  }
705
813
  if (ind > (uint32_t) output_len) {
706
0
    BLOSC_TRACE_ERROR("Exceeding output size");
707
0
    return BLOSC2_ERROR_FAILURE;
708
0
  }
709
710
813
  return (int) ind;
711
813
}