Coverage Report

Created: 2025-07-12 06:30

/src/c-blosc2/blosc/shuffle-generic.h
Line
Count
Source
1
/*********************************************************************
2
  Blosc - Blocked Shuffling and Compression Library
3
4
  Copyright (c) 2021  Blosc Development Team <blosc@blosc.org>
5
  https://blosc.org
6
  License: BSD 3-Clause (see LICENSE.txt)
7
8
  See LICENSE.txt for details about copyright and rights to use.
9
**********************************************************************/
10
11
/*********************************************************************
12
  Generic (non-hardware-accelerated) shuffle/unshuffle routines.
13
  These are used when hardware-accelerated functions aren't available
14
  for a particular platform; they are also used by the hardware-
15
  accelerated functions to handle any remaining elements in a block
16
  which isn't a multiple of the hardware's vector size.
17
**********************************************************************/
18
19
#ifndef BLOSC_SHUFFLE_GENERIC_H
20
#define BLOSC_SHUFFLE_GENERIC_H
21
22
#include "blosc2/blosc2-common.h"
23
24
#include <stdint.h>
25
#include <string.h>
26
27
/**
28
  Generic (non-hardware-accelerated) shuffle routine.
29
  This is the pure element-copying nested loop. It is used by the
30
  generic shuffle implementation and also by the vectorized shuffle
31
  implementations to process any remaining elements in a block which
32
  is not a multiple of (type_size * vector_size).
33
*/
34
static inline void shuffle_generic_inline(const int32_t type_size,
35
                                   const int32_t vectorizable_blocksize, const int32_t blocksize,
36
14.6k
                                   const uint8_t *_src, uint8_t *_dest) {
37
14.6k
  int32_t i, j;
38
  /* Calculate the number of elements in the block. */
39
14.6k
  const int32_t neblock_quot = blocksize / type_size;
40
14.6k
  const int32_t neblock_rem = blocksize % type_size;
41
14.6k
  const int32_t vectorizable_elements = vectorizable_blocksize / type_size;
42
43
44
  /* Non-optimized shuffle */
45
29.3k
  for (j = 0; j < type_size; j++) {
46
23.2M
    for (i = vectorizable_elements; i < (int32_t)neblock_quot; i++) {
47
23.2M
      _dest[j * neblock_quot + i] = _src[i * type_size + j];
48
23.2M
    }
49
14.6k
  }
50
51
  /* Copy any leftover bytes in the block without shuffling them. */
52
14.6k
  memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
53
14.6k
}
Unexecuted instantiation: shuffle.c:shuffle_generic_inline
shuffle-generic.c:shuffle_generic_inline
Line
Count
Source
36
14.6k
                                   const uint8_t *_src, uint8_t *_dest) {
37
14.6k
  int32_t i, j;
38
  /* Calculate the number of elements in the block. */
39
14.6k
  const int32_t neblock_quot = blocksize / type_size;
40
14.6k
  const int32_t neblock_rem = blocksize % type_size;
41
14.6k
  const int32_t vectorizable_elements = vectorizable_blocksize / type_size;
42
43
44
  /* Non-optimized shuffle */
45
29.3k
  for (j = 0; j < type_size; j++) {
46
23.2M
    for (i = vectorizable_elements; i < (int32_t)neblock_quot; i++) {
47
23.2M
      _dest[j * neblock_quot + i] = _src[i * type_size + j];
48
23.2M
    }
49
14.6k
  }
50
51
  /* Copy any leftover bytes in the block without shuffling them. */
52
14.6k
  memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
53
14.6k
}
Unexecuted instantiation: shuffle-sse2.c:shuffle_generic_inline
Unexecuted instantiation: shuffle-avx2.c:shuffle_generic_inline
54
55
/**
56
  Generic (non-hardware-accelerated) unshuffle routine.
57
  This is the pure element-copying nested loop. It is used by the
58
  generic unshuffle implementation and also by the vectorized unshuffle
59
  implementations to process any remaining elements in a block which
60
  is not a multiple of (type_size * vector_size).
61
*/
62
static inline void unshuffle_generic_inline(const int32_t type_size,
63
                                     const int32_t vectorizable_blocksize, const int32_t blocksize,
64
7.97k
                                     const uint8_t *_src, uint8_t *_dest) {
65
7.97k
  int32_t i, j;
66
67
  /* Calculate the number of elements in the block. */
68
7.97k
  const int32_t neblock_quot = blocksize / type_size;
69
7.97k
  const int32_t neblock_rem = blocksize % type_size;
70
7.97k
  const int32_t vectorizable_elements = vectorizable_blocksize / type_size;
71
72
  /* Non-optimized unshuffle */
73
9.58M
  for (i = vectorizable_elements; i < (int32_t)neblock_quot; i++) {
74
19.4M
    for (j = 0; j < type_size; j++) {
75
9.88M
      _dest[i * type_size + j] = _src[j * neblock_quot + i];
76
9.88M
    }
77
9.57M
  }
78
79
  /* Copy any leftover bytes in the block without unshuffling them. */
80
7.97k
  memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
81
7.97k
}
Unexecuted instantiation: shuffle.c:unshuffle_generic_inline
shuffle-generic.c:unshuffle_generic_inline
Line
Count
Source
64
7.42k
                                     const uint8_t *_src, uint8_t *_dest) {
65
7.42k
  int32_t i, j;
66
67
  /* Calculate the number of elements in the block. */
68
7.42k
  const int32_t neblock_quot = blocksize / type_size;
69
7.42k
  const int32_t neblock_rem = blocksize % type_size;
70
7.42k
  const int32_t vectorizable_elements = vectorizable_blocksize / type_size;
71
72
  /* Non-optimized unshuffle */
73
9.57M
  for (i = vectorizable_elements; i < (int32_t)neblock_quot; i++) {
74
19.2M
    for (j = 0; j < type_size; j++) {
75
9.69M
      _dest[i * type_size + j] = _src[j * neblock_quot + i];
76
9.69M
    }
77
9.56M
  }
78
79
  /* Copy any leftover bytes in the block without unshuffling them. */
80
7.42k
  memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
81
7.42k
}
Unexecuted instantiation: shuffle-sse2.c:unshuffle_generic_inline
shuffle-avx2.c:unshuffle_generic_inline
Line
Count
Source
64
552
                                     const uint8_t *_src, uint8_t *_dest) {
65
552
  int32_t i, j;
66
67
  /* Calculate the number of elements in the block. */
68
552
  const int32_t neblock_quot = blocksize / type_size;
69
552
  const int32_t neblock_rem = blocksize % type_size;
70
552
  const int32_t vectorizable_elements = vectorizable_blocksize / type_size;
71
72
  /* Non-optimized unshuffle */
73
6.77k
  for (i = vectorizable_elements; i < (int32_t)neblock_quot; i++) {
74
195k
    for (j = 0; j < type_size; j++) {
75
189k
      _dest[i * type_size + j] = _src[j * neblock_quot + i];
76
189k
    }
77
6.21k
  }
78
79
  /* Copy any leftover bytes in the block without unshuffling them. */
80
552
  memcpy(_dest + (blocksize - neblock_rem), _src + (blocksize - neblock_rem), neblock_rem);
81
552
}
82
83
/**
84
  Generic (non-hardware-accelerated) shuffle routine.
85
*/
86
BLOSC_NO_EXPORT void shuffle_generic(const int32_t bytesoftype, const int32_t blocksize,
87
                                     const uint8_t *_src, uint8_t *_dest);
88
89
/**
90
  Generic (non-hardware-accelerated) unshuffle routine.
91
*/
92
BLOSC_NO_EXPORT void unshuffle_generic(const int32_t bytesoftype, const int32_t blocksize,
93
                                       const uint8_t *_src, uint8_t *_dest);
94
95
#endif /* BLOSC_SHUFFLE_GENERIC_H */