Coverage Report

Created: 2026-04-12 06:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/c-blosc2/plugins/codecs/ndlz/ndlz8x8.c
Line
Count
Source
1
/*********************************************************************
2
  Blosc - Blocked Shuffling and Compression Library
3
4
  Copyright (c) 2021  Blosc Development Team <blosc@blosc.org>
5
  https://blosc.org
6
  License: BSD 3-Clause (see LICENSE.txt)
7
8
  See LICENSE.txt for details about copyright and rights to use.
9
**********************************************************************/
10
11
/*********************************************************************
12
  This codec is meant to leverage multidimensionality for getting
13
  better compression ratios.  The idea is to look for similarities
14
  in places that are closer in a euclidean metric, not the typical
15
  linear one.
16
**********************************************************************/
17
18
#include "ndlz8x8.h"
19
#include "xxhash.h"
20
#include "b2nd.h"
21
22
#include <stdlib.h>
23
#include <string.h>
24
25
/*
26
 * Give hints to the compiler for branch prediction optimization.
27
 */
28
#if defined(__GNUC__) && (__GNUC__ > 2)
29
#define NDLZ_EXPECT_CONDITIONAL(c)    (__builtin_expect((c), 1))
30
1.18k
#define NDLZ_UNEXPECT_CONDITIONAL(c)  (__builtin_expect((c), 0))
31
#else
32
#define NDLZ_EXPECT_CONDITIONAL(c)    (c)
33
#define NDLZ_UNEXPECT_CONDITIONAL(c)  (c)
34
#endif
35
36
/*
37
 * Use inlined functions for supported systems.
38
 */
39
#if defined(_MSC_VER) && !defined(__cplusplus)   /* Visual Studio */
40
#define inline __inline  /* Visual C is not C99, but supports some kind of inline */
41
#endif
42
43
#define MAX_COPY 32U
44
0
#define MAX_DISTANCE 65535
45
46
47
#ifdef BLOSC_STRICT_ALIGN
48
#define NDLZ_READU16(p) ((p)[0] | (p)[1]<<8)
49
#define NDLZ_READU32(p) ((p)[0] | (p)[1]<<8 | (p)[2]<<16 | (p)[3]<<24)
50
#else
51
#define NDLZ_READU16(p) *((const uint16_t*)(p))
52
#define NDLZ_READU32(p) *((const uint32_t*)(p))
53
#endif
54
55
#define HASH_LOG (12)
56
57
58
int ndlz8_compress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
59
0
                   uint8_t meta, blosc2_cparams *cparams) {
60
0
  BLOSC_UNUSED_PARAM(meta);
61
0
  BLOSC_ERROR_NULL(cparams, BLOSC2_ERROR_NULL_POINTER);
62
0
  BLOSC_ERROR_NULL(cparams->schunk, BLOSC2_ERROR_NULL_POINTER);
63
0
  uint8_t *smeta;
64
0
  int32_t smeta_len;
65
66
0
  if (blosc2_meta_get(cparams->schunk, "b2nd", &smeta, &smeta_len) < 0) {
67
0
    BLOSC_TRACE_ERROR("b2nd layer not found!");
68
0
    return BLOSC2_ERROR_FAILURE;
69
0
  }
70
71
0
  const int cell_shape = 8;
72
0
  const int cell_size = 64;
73
0
  int8_t ndim;
74
0
  int64_t *shape = malloc(8 * sizeof(int64_t));
75
0
  int32_t *chunkshape = malloc(8 * sizeof(int32_t));
76
0
  int32_t *blockshape = malloc(8 * sizeof(int32_t));
77
0
  b2nd_deserialize_meta(smeta, smeta_len, &ndim, shape, chunkshape, blockshape, NULL, NULL);
78
0
  free(smeta);
79
80
0
  if (ndim != 2) {
81
0
    BLOSC_TRACE_ERROR("This codec only works for ndim = 2");
82
0
    return BLOSC2_ERROR_FAILURE;
83
0
  }
84
85
0
  if (input_len != (blockshape[0] * blockshape[1])) {
86
0
    BLOSC_TRACE_ERROR("Length not equal to blocksize");
87
0
    return BLOSC2_ERROR_FAILURE;
88
0
  }
89
90
0
  if (NDLZ_UNEXPECT_CONDITIONAL(output_len < (int) (1 + ndim * sizeof(int32_t)))) {
91
0
    BLOSC_TRACE_ERROR("Output too small");
92
0
    return BLOSC2_ERROR_FAILURE;
93
0
  }
94
95
0
  uint8_t *ip = (uint8_t *) input;
96
0
  uint8_t *op = (uint8_t *) output;
97
0
  uint8_t *op_limit;
98
0
  uint32_t hval, hash_cell;
99
0
  uint32_t hash_triple[6] = {0};
100
0
  uint32_t hash_pair[7] = {0};
101
0
  uint8_t *bufarea = malloc(cell_size);
102
0
  uint8_t *buf_cell = bufarea;
103
0
  uint8_t *buf_aux;
104
0
  uint32_t tab_cell[1U << 12U] = {0};
105
0
  uint32_t tab_triple[1U << 12U] = {0};
106
0
  uint32_t tab_pair[1U << 12U] = {0};
107
0
  uint32_t update_triple[6] = {0};
108
0
  uint32_t update_pair[7] = {0};
109
110
  // Minimum cratios before issuing and _early giveup_
111
  // Remind that ndlz is not meant for cratios <= 2 (too costly to decompress)
112
113
0
  op_limit = op + output_len;
114
115
  // Initialize the hash table to distances of 0
116
0
  for (unsigned i = 0; i < (1U << 12U); i++) {
117
0
    tab_cell[i] = 0;
118
0
    tab_triple[i] = 0;
119
0
    tab_pair[i] = 0;
120
0
  }
121
122
  /* input and output buffer cannot be less than 64 (cells are 8x8) */
123
0
  int overhead = 17 + (blockshape[0] * blockshape[1] / cell_size - 1) * 2;
124
0
  if (input_len < cell_size || output_len < overhead) {
125
0
    BLOSC_TRACE_ERROR("Incorrect length or maxout");
126
0
    return 0;
127
0
  }
128
129
0
  uint8_t *obase = op;
130
131
  /* we start with literal copy */
132
0
  *op++ = ndim;
133
0
  memcpy(op, &blockshape[0], 4);
134
0
  op += 4;
135
0
  memcpy(op, &blockshape[1], 4);
136
0
  op += 4;
137
138
0
  uint32_t i_stop[2];
139
0
  for (int i = 0; i < 2; ++i) {
140
0
    i_stop[i] = (blockshape[i] + cell_shape - 1) / cell_shape;
141
0
  }
142
143
144
  /* main loop */
145
0
  uint32_t padding[2];
146
0
  uint32_t ii[2];
147
0
  for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) {
148
0
    for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) {      // for each cell
149
0
      for (int h = 0; h < 7; h++) {         // new cell -> new possible references
150
0
        update_pair[h] = 0;
151
0
        if (h != 6) {
152
0
          update_triple[h] = 0;
153
0
        }
154
0
      }
155
156
0
      if (NDLZ_UNEXPECT_CONDITIONAL(op + cell_size + 1 > op_limit)) {
157
0
        free(shape);
158
0
        free(chunkshape);
159
0
        free(blockshape);
160
0
        free(bufarea);
161
0
        return 0;
162
0
      }
163
164
0
      uint32_t orig = ii[0] * cell_shape * blockshape[1] + ii[1] * cell_shape;
165
0
      if (((blockshape[0] % cell_shape != 0) && (ii[0] == i_stop[0] - 1)) ||
166
0
          ((blockshape[1] % cell_shape != 0) && (ii[1] == i_stop[1] - 1))) {
167
0
        uint8_t token = 0;                                   // padding -> literal copy
168
0
        *op++ = token;
169
0
        if (ii[0] == i_stop[0] - 1) {
170
0
          padding[0] = (blockshape[0] % cell_shape == 0) ? cell_shape : blockshape[0] % cell_shape;
171
0
        } else {
172
0
          padding[0] = cell_shape;
173
0
        }
174
0
        if (ii[1] == i_stop[1] - 1) {
175
0
          padding[1] = (blockshape[1] % cell_shape == 0) ? cell_shape : blockshape[1] % cell_shape;
176
0
        } else {
177
0
          padding[1] = cell_shape;
178
0
        }
179
0
        for (uint32_t i = 0; i < padding[0]; i++) {
180
0
          memcpy(op, &ip[orig + i * blockshape[1]], padding[1]);
181
0
          op += padding[1];
182
0
        }
183
0
      } else {
184
0
        for (uint64_t i = 0; i < (uint64_t) cell_shape; i++) {           // fill cell buffer
185
0
          uint64_t ind = orig + i * blockshape[1];
186
0
          memcpy(buf_cell, &ip[ind], cell_shape);
187
0
          buf_cell += cell_shape;
188
0
        }
189
0
        buf_cell -= cell_size;
190
191
0
        const uint8_t *ref;
192
0
        uint32_t distance;
193
0
        uint8_t *anchor = op;    /* comparison starting-point */
194
195
        /* find potential match */
196
0
        hash_cell = XXH32(buf_cell, cell_size, 1);        // calculate cell hash
197
0
        hash_cell >>= 32U - 12U;
198
0
        ref = obase + tab_cell[hash_cell];
199
200
        /* calculate distance to the match */
201
0
        if (tab_cell[hash_cell] == 0) {
202
0
          distance = 0;
203
0
        } else {
204
0
          bool same = true;
205
0
          buf_aux = obase + tab_cell[hash_cell];
206
0
          for (int i = 0; i < cell_size; i++) {
207
0
            if (buf_cell[i] != buf_aux[i]) {
208
0
              same = false;
209
0
              break;
210
0
            }
211
0
          }
212
0
          if (same) {
213
0
            distance = (int32_t) (anchor - ref);
214
0
          } else {
215
0
            distance = 0;
216
0
          }
217
0
        }
218
219
0
        bool alleq = true;
220
0
        for (int i = 1; i < cell_size; i++) {
221
0
          if (buf_cell[i] != buf_cell[0]) {
222
0
            alleq = false;
223
0
            break;
224
0
          }
225
0
        }
226
0
        if (alleq) {                              // all elements of the cell equal
227
0
          uint8_t token = (uint8_t) (1U << 6U);
228
0
          *op++ = token;
229
0
          *op++ = buf_cell[0];
230
231
0
        } else if (distance == 0 || (distance >= MAX_DISTANCE)) {   // no cell match
232
0
          bool literal = true;
233
234
          // rows triples matches
235
0
          for (int i = 0; i < 6; i++) {
236
0
            int triple_start = i * cell_shape;
237
0
            hval = XXH32(&buf_cell[triple_start], 24, 1);        // calculate triple hash
238
0
            hval >>= 32U - 12U;
239
            /* calculate distance to the match */
240
0
            bool same = true;
241
0
            uint16_t offset;
242
0
            if (tab_triple[hval] != 0) {
243
0
              buf_aux = obase + tab_triple[hval];
244
0
              for (int l = 0; l < 24; l++) {
245
0
                if (buf_cell[triple_start + l] != buf_aux[l]) {
246
0
                  same = false;
247
0
                  break;
248
0
                }
249
0
              }
250
0
              offset = (uint16_t) (anchor - obase - tab_triple[hval]);
251
0
            } else {
252
0
              same = false;
253
0
              update_triple[i] = (uint32_t) (anchor + 1 + triple_start - obase);     /* update hash table */
254
0
              hash_triple[i] = hval;
255
0
            }
256
0
            ref = obase + tab_triple[hval];
257
0
            if (same) {
258
0
              distance = (int32_t) (anchor + triple_start - ref);
259
0
            } else {
260
0
              distance = 0;
261
0
            }
262
0
            if ((distance != 0) && (distance < MAX_DISTANCE)) {     // 3 rows match
263
0
              literal = false;
264
0
              uint8_t token = (uint8_t) ((21 << 3U) | i);
265
0
              *op++ = token;
266
0
              memcpy(op, &offset, 2);
267
0
              op += 2;
268
0
              for (int l = 0; l < 8; l++) {
269
0
                if ((l < i) || (l > i + 2)) {
270
0
                  memcpy(op, &buf_cell[l * cell_shape], cell_shape);
271
0
                  op += cell_shape;
272
0
                }
273
0
              }
274
0
              goto match;
275
0
            }
276
0
          }
277
278
          // rows pairs matches
279
0
          for (int i = 0; i < 7; i++) {
280
0
            int pair_start = i * cell_shape;
281
0
            hval = XXH32(&buf_cell[pair_start], 16, 1);        // calculate rows pair hash
282
0
            hval >>= 32U - 12U;
283
0
            ref = obase + tab_pair[hval];
284
            /* calculate distance to the match */
285
0
            bool same = true;
286
0
            uint16_t offset;
287
0
            if (tab_pair[hval] != 0) {
288
0
              buf_aux = obase + tab_pair[hval];
289
0
              for (int k = 0; k < 16; k++) {
290
0
                if (buf_cell[pair_start + k] != buf_aux[k]) {
291
0
                  same = false;
292
0
                  break;
293
0
                }
294
0
              }
295
0
              offset = (uint16_t) (anchor - obase - tab_pair[hval]);
296
0
            } else {
297
0
              same = false;
298
0
              update_pair[i] = (uint32_t) (anchor + 1 + pair_start - obase);     /* update hash table */
299
0
              hash_pair[i] = hval;
300
0
            }
301
0
            if (same) {
302
0
              distance = (int32_t) (anchor + pair_start - ref);
303
0
            } else {
304
0
              distance = 0;
305
0
            }
306
0
            if ((distance != 0) && (distance < MAX_DISTANCE)) {     /* 1 rows pair match */
307
0
              literal = false;
308
0
              uint8_t token = (uint8_t) ((17 << 3U) | i);
309
0
              *op++ = token;
310
0
              offset = (uint16_t) (anchor - obase - tab_pair[hval]);
311
0
              memcpy(op, &offset, 2);
312
0
              op += 2;
313
0
              for (int l = 0; l < 8; l++) {
314
0
                if ((l < i) || (l > i + 1)) {
315
0
                  memcpy(op, &buf_cell[l * cell_shape], cell_shape);
316
0
                  op += cell_shape;
317
0
                }
318
0
              }
319
0
              goto match;
320
0
            }
321
0
          }
322
323
0
          match:
324
0
          if (literal) {
325
0
            tab_cell[hash_cell] = (uint32_t) (anchor + 1 - obase);     /* update hash tables */
326
327
0
            if (update_triple[0] != 0) {
328
0
              for (int h = 0; h < 6; h++) {
329
0
                tab_triple[hash_triple[h]] = update_triple[h];
330
0
              }
331
0
            }
332
0
            if (update_pair[0] != 0) {
333
0
              for (int h = 0; h < 7; h++) {
334
0
                tab_pair[hash_pair[h]] = update_pair[h];
335
0
              }
336
0
            }
337
0
            uint8_t token = 0;
338
0
            *op++ = token;
339
0
            memcpy(op, buf_cell, cell_size);
340
0
            op += cell_size;
341
342
0
          }
343
344
0
        } else {   // cell match
345
0
          uint8_t token = (uint8_t) ((1U << 7U) | (1U << 6U));
346
0
          *op++ = token;
347
0
          uint16_t offset = (uint16_t) (anchor - obase - tab_cell[hash_cell]);
348
0
          memcpy(op, &offset, 2);
349
0
          op += 2;
350
351
0
        }
352
353
0
      }
354
0
      if ((op - obase) > input_len) {
355
0
        free(shape);
356
0
        free(chunkshape);
357
0
        free(blockshape);
358
0
        free(bufarea);
359
0
        BLOSC_TRACE_ERROR("Compressed data is bigger than input!");
360
0
        return 0;
361
0
      }
362
0
    }
363
0
  }
364
365
0
  free(shape);
366
0
  free(chunkshape);
367
0
  free(blockshape);
368
0
  free(bufarea);
369
370
0
  return (int) (op - obase);
371
0
}
372
373
374
// See https://habr.com/en/company/yandex/blog/457612/
375
#ifdef __AVX2__
376
377
#if defined(_MSC_VER)
378
#define ALIGNED_(x) __declspec(align(x))
379
#else
380
#if defined(__GNUC__)
381
#define ALIGNED_(x) __attribute__ ((aligned(x)))
382
#endif
383
#endif
384
#define ALIGNED_TYPE_(t, x) t ALIGNED_(x)
385
386
static unsigned char* copy_match_16(unsigned char *op, const unsigned char *match, int32_t len)
387
{
388
  size_t offset = op - match;
389
  while (len >= 16) {
390
391
    static const ALIGNED_TYPE_(uint8_t, 16) masks[] =
392
      {
393
                0,  1,  2,  1,  4,  1,  4,  2,  8,  7,  6,  5,  4,  3,  2,  1, // offset = 0, not used as mask, but for shift
394
                0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0, // offset = 1
395
                0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,  0,  1,
396
                0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,  1,  2,  0,
397
                0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,  0,  1,  2,  3,
398
                0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,  1,  2,  3,  4,  0,
399
                0,  1,  2,  3,  4,  5,  0,  1,  2,  3,  4,  5,  0,  1,  2,  3,
400
                0,  1,  2,  3,  4,  5,  6,  0,  1,  2,  3,  4,  5,  6,  0,  1,
401
                0,  1,  2,  3,  4,  5,  6,  7,  0,  1,  2,  3,  4,  5,  6,  7,
402
                0,  1,  2,  3,  4,  5,  6,  7,  8,  0,  1,  2,  3,  4,  5,  6,
403
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9,  0,  1,  2,  3,  4,  5,
404
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10,  0,  1,  2,  3,  4,
405
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11,  0,  1,  2,  3,
406
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  0,  1,  2,
407
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13,  0,  1,
408
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  0,
409
                0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,  15, // offset = 16
410
      };
411
412
    _mm_storeu_si128((__m128i *)(op),
413
                     _mm_shuffle_epi8(_mm_loadu_si128((const __m128i *)(match)),
414
                                      _mm_load_si128((const __m128i *)(masks) + offset)));
415
416
    match += masks[offset];
417
418
    op += 16;
419
    len -= 16;
420
  }
421
  // Deal with remainders
422
  for (; len > 0; len--) {
423
    *op++ = *match++;
424
  }
425
  return op;
426
}
427
#endif
428
429
430
int ndlz8_decompress(const uint8_t *input, int32_t input_len, uint8_t *output, int32_t output_len,
431
451
                     uint8_t meta, blosc2_dparams *dparams) {
432
451
  BLOSC_UNUSED_PARAM(meta);
433
451
  BLOSC_UNUSED_PARAM(dparams);
434
451
  BLOSC_ERROR_NULL(input, BLOSC2_ERROR_NULL_POINTER);
435
451
  BLOSC_ERROR_NULL(output, BLOSC2_ERROR_NULL_POINTER);
436
437
451
  const int cell_shape = 8;
438
451
  const int cell_size = 64;
439
451
  uint8_t *ip = (uint8_t *) input;
440
451
  uint8_t *ip_limit = ip + input_len;
441
451
  uint8_t *op = (uint8_t *) output;
442
451
  uint8_t ndim;
443
451
  int32_t blockshape[2];
444
451
  int32_t eshape[2];
445
451
  uint8_t *buffercpy;
446
451
  uint8_t token;
447
451
  if (NDLZ_UNEXPECT_CONDITIONAL(input_len < 8)) {
448
4
    return 0;
449
4
  }
450
451
  /* we start with literal copy */
452
447
  ndim = *ip;
453
447
  ip++;
454
447
  if (ndim != 2) {
455
10
    BLOSC_TRACE_ERROR("This codec only works for ndim = 2");
456
10
    return BLOSC2_ERROR_FAILURE;
457
10
  }
458
437
  memcpy(&blockshape[0], ip, 4);
459
437
  ip += 4;
460
437
  memcpy(&blockshape[1], ip, 4);
461
437
  ip += 4;
462
463
  // Sanity check.  See https://www.cve.org/CVERecord?id=CVE-2024-3203
464
437
  if (output_len < 0 || blockshape[0] < 0 || blockshape[1] < 0) {
465
67
    BLOSC_TRACE_ERROR("Output length or blockshape is negative");
466
67
    return BLOSC2_ERROR_FAILURE;
467
67
  }
468
469
370
  eshape[0] = ((blockshape[0] + 7) / cell_shape) * cell_shape;
470
370
  eshape[1] = ((blockshape[1] + 7) / cell_shape) * cell_shape;
471
472
370
  if (NDLZ_UNEXPECT_CONDITIONAL((int64_t)output_len < (int64_t)blockshape[0] * (int64_t)blockshape[1])) {
473
99
    BLOSC_TRACE_ERROR("The blockshape is bigger than the output buffer");
474
99
    return 0;
475
99
  }
476
271
  memset(op, 0, blockshape[0] * blockshape[1]);
477
478
271
  int32_t i_stop[2];
479
813
  for (int i = 0; i < 2; ++i) {
480
542
    i_stop[i] = eshape[i] / cell_shape;
481
542
  }
482
483
  /* main loop */
484
271
  int32_t ii[2];
485
271
  int32_t padding[2] = {0};
486
271
  int32_t ind = 0;
487
271
  uint8_t *local_buffer = malloc(cell_size);
488
271
  uint8_t *cell_aux = malloc(cell_size);
489
1.92G
  for (ii[0] = 0; ii[0] < i_stop[0]; ++ii[0]) {
490
1.92G
    for (ii[1] = 0; ii[1] < i_stop[1]; ++ii[1]) {      // for each cell
491
361
      if (NDLZ_UNEXPECT_CONDITIONAL(ip > ip_limit)) {
492
11
        free(local_buffer);
493
11
        free(cell_aux);
494
11
        BLOSC_TRACE_ERROR("Exceeding input length");
495
11
        return BLOSC2_ERROR_FAILURE;
496
11
      }
497
350
      if (ii[0] == i_stop[0] - 1) {
498
334
        padding[0] = (blockshape[0] % cell_shape == 0) ? cell_shape : blockshape[0] % cell_shape;
499
334
      } else {
500
16
        padding[0] = cell_shape;
501
16
      }
502
350
      if (ii[1] == i_stop[1] - 1) {
503
236
        padding[1] = (blockshape[1] % cell_shape == 0) ? cell_shape : blockshape[1] % cell_shape;
504
236
      } else {
505
114
        padding[1] = cell_shape;
506
114
      }
507
350
      token = *ip++;
508
350
      uint8_t match_type = (token >> 3U);
509
350
      if (token == 0) {    // no match
510
134
        buffercpy = ip;
511
134
        ip += padding[0] * padding[1];
512
216
      } else if (token == (uint8_t) ((1U << 7U) | (1U << 6U))) {  // cell match
513
57
        uint16_t offset = *((uint16_t *) ip);
514
57
        buffercpy = ip - offset - 1;
515
57
        ip += 2;
516
159
      } else if (token == (uint8_t) (1U << 6U)) { // whole cell of same element
517
108
        buffercpy = cell_aux;
518
108
        memset(buffercpy, *ip, cell_size);
519
108
        ip++;
520
108
      } else if (match_type == 21) {    // triple match
521
11
        buffercpy = local_buffer;
522
11
        int row = (int) (token & 7);
523
11
        if (row + 2 >= cell_shape) {
524
2
          free(local_buffer);
525
2
          free(cell_aux);
526
2
          BLOSC_TRACE_ERROR("Triple match row out of bounds");
527
2
          return BLOSC2_ERROR_FAILURE;
528
2
        }
529
9
        uint16_t offset = *((uint16_t *) ip);
530
9
        ip += 2;
531
36
        for (int l = 0; l < 3; l++) {
532
27
          memcpy(&buffercpy[(row + l) * cell_shape],
533
27
                 ip - sizeof(token) - sizeof(offset) - offset + l * cell_shape, cell_shape);
534
27
        }
535
81
        for (int l = 0; l < cell_shape; l++) {
536
72
          if ((l < row) || (l > row + 2)) {
537
45
            memcpy(&buffercpy[l * cell_shape], ip, cell_shape);
538
45
            ip += cell_shape;
539
45
          }
540
72
        }
541
40
      } else if (match_type == 17) {    // pair match
542
17
        buffercpy = local_buffer;
543
17
        int row = (int) (token & 7);
544
17
        if (row + 1 >= cell_shape) {
545
1
          free(local_buffer);
546
1
          free(cell_aux);
547
1
          BLOSC_TRACE_ERROR("Pair match row out of bounds");
548
1
          return BLOSC2_ERROR_FAILURE;
549
1
        }
550
16
        uint16_t offset = *((uint16_t *) ip);
551
16
        ip += 2;
552
48
        for (int l = 0; l < 2; l++) {
553
32
          memcpy(&buffercpy[(row + l) * cell_shape],
554
32
                 ip - sizeof(token) - sizeof(offset) - offset + l * cell_shape, cell_shape);
555
32
        }
556
144
        for (int l = 0; l < cell_shape; l++) {
557
128
          if ((l < row) || (l > row + 1)) {
558
96
            memcpy(&buffercpy[l * cell_shape], ip, cell_shape);
559
96
            ip += cell_shape;
560
96
          }
561
128
        }
562
23
      } else {
563
23
        free(local_buffer);
564
23
        free(cell_aux);
565
23
        BLOSC_TRACE_ERROR("Invalid token: %u at cell [%d, %d]\n", token, ii[0], ii[1]);
566
23
        return BLOSC2_ERROR_FAILURE;
567
23
      }
568
569
324
      int32_t orig = ii[0] * cell_shape * blockshape[1] + ii[1] * cell_shape;
570
2.91k
      for (int32_t i = 0; i < (int32_t) cell_shape; i++) {
571
2.59k
        if (i < padding[0]) {
572
1.61k
          ind = orig + i * blockshape[1];
573
1.61k
          memcpy(&op[ind], buffercpy, padding[1]);
574
1.61k
        }
575
2.59k
        buffercpy += padding[1];
576
2.59k
      }
577
324
      if (ind > output_len) {
578
0
        free(local_buffer);
579
0
        free(cell_aux);
580
0
        BLOSC_TRACE_ERROR("Exceeding output size");
581
0
        return BLOSC2_ERROR_FAILURE;
582
0
      }
583
324
    }
584
1.92G
  }
585
234
  ind += padding[1];
586
587
234
  free(cell_aux);
588
234
  free(local_buffer);
589
590
234
  if (ind != (blockshape[0] * blockshape[1])) {
591
0
    BLOSC_TRACE_ERROR("Output size is not compatible with embedded blockshape");
592
0
    return BLOSC2_ERROR_FAILURE;
593
0
  }
594
234
  if (ind > output_len) {
595
0
    BLOSC_TRACE_ERROR("Exceeding output size");
596
0
    return BLOSC2_ERROR_FAILURE;
597
0
  }
598
599
234
  return (int) ind;
600
234
}