/src/c-blosc2/blosc/bitshuffle-generic.c
Line | Count | Source (jump to first uncovered line) |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Copyright (c) 2021 Blosc Development Team <blosc@blosc.org> |
5 | | https://blosc.org |
6 | | License: BSD 3-Clause (see LICENSE.txt) |
7 | | |
8 | | See LICENSE.txt for details about copyright and rights to use. |
9 | | **********************************************************************/ |
10 | | |
11 | | #include "bitshuffle-generic.h" |
12 | | |
13 | | #include <stdlib.h> |
14 | | |
15 | | #ifdef _MSC_VER |
16 | | #pragma warning (push) |
17 | | #pragma warning (disable: 4146) |
18 | | #endif |
19 | | |
20 | | |
21 | | /* Memory copy with bshuf call signature. For testing and profiling. */ |
22 | | int64_t bshuf_copy(const void* in, void* out, const size_t size, |
23 | 128k | const size_t elem_size) { |
24 | | |
25 | 128k | const char* in_b = (const char*) in; |
26 | 128k | char* out_b = (char*) out; |
27 | | |
28 | 128k | memcpy(out_b, in_b, size * elem_size); |
29 | 128k | return size * elem_size; |
30 | 128k | } |
31 | | |
32 | | |
33 | | /* Transpose bytes within elements, starting partway through input. */ |
34 | | int64_t bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, |
35 | 0 | const size_t elem_size, const size_t start) { |
36 | |
|
37 | 0 | size_t ii, jj, kk; |
38 | 0 | const char* in_b = (const char*) in; |
39 | 0 | char* out_b = (char*) out; |
40 | |
|
41 | 0 | CHECK_MULT_EIGHT(start); |
42 | |
|
43 | 0 | if (size > start) { |
44 | | // ii loop separated into 2 loops so the compiler can unroll |
45 | | // the inner one. |
46 | 0 | for (ii = start; ii + 7 < size; ii += 8) { |
47 | 0 | for (jj = 0; jj < elem_size; jj++) { |
48 | 0 | for (kk = 0; kk < 8; kk++) { |
49 | 0 | out_b[jj * size + ii + kk] |
50 | 0 | = in_b[ii * elem_size + kk * elem_size + jj]; |
51 | 0 | } |
52 | 0 | } |
53 | 0 | } |
54 | 0 | for (ii = size - size % 8; ii < size; ii ++) { |
55 | 0 | for (jj = 0; jj < elem_size; jj++) { |
56 | 0 | out_b[jj * size + ii] = in_b[ii * elem_size + jj]; |
57 | 0 | } |
58 | 0 | } |
59 | 0 | } |
60 | 0 | return size * elem_size; |
61 | 0 | } |
62 | | |
63 | | |
64 | | /* Transpose bytes within elements. */ |
65 | | int64_t bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, |
66 | 0 | const size_t elem_size) { |
67 | |
|
68 | 0 | return bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); |
69 | 0 | } |
70 | | |
71 | | |
72 | | /* Transpose bits within bytes. */ |
73 | | int64_t bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, |
74 | 128k | const size_t elem_size, const size_t start_byte) { |
75 | | |
76 | 128k | const uint64_t* in_b = (const uint64_t*) in; |
77 | 128k | uint8_t* out_b = (uint8_t*) out; |
78 | | |
79 | 128k | uint64_t x, t; |
80 | | |
81 | 128k | size_t ii, kk; |
82 | 128k | size_t nbyte = elem_size * size; |
83 | 128k | size_t nbyte_bitrow = nbyte / 8; |
84 | | |
85 | 128k | uint64_t e=1; |
86 | 128k | const int little_endian = *(uint8_t *) &e == 1; |
87 | 128k | const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; |
88 | 128k | const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; |
89 | | |
90 | 128k | CHECK_MULT_EIGHT(nbyte); |
91 | 128k | CHECK_MULT_EIGHT(start_byte); |
92 | | |
93 | 136k | for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { |
94 | 8.39k | x = in_b[ii]; |
95 | 8.39k | if (little_endian) { |
96 | 8.39k | TRANS_BIT_8X8(x, t); |
97 | 8.39k | } else { |
98 | 0 | TRANS_BIT_8X8_BE(x, t); |
99 | 0 | } |
100 | 75.5k | for (kk = 0; kk < 8; kk ++) { |
101 | 67.1k | out_b[bit_row_offset + kk * bit_row_skip + ii] = x; |
102 | 67.1k | x = x >> 8; |
103 | 67.1k | } |
104 | 8.39k | } |
105 | 128k | return size * elem_size; |
106 | 128k | } |
107 | | |
108 | | |
109 | | /* Transpose bits within bytes. */ |
110 | | int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, |
111 | 0 | const size_t elem_size) { |
112 | |
|
113 | 0 | return bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); |
114 | 0 | } |
115 | | |
116 | | |
117 | | /* General transpose of an array, optimized for large element sizes. */ |
118 | | int64_t bshuf_trans_elem(const void* in, void* out, const size_t lda, |
119 | 128k | const size_t ldb, const size_t elem_size) { |
120 | | |
121 | 128k | size_t ii, jj; |
122 | 128k | const char* in_b = (const char*) in; |
123 | 128k | char* out_b = (char*) out; |
124 | 1.15M | for(ii = 0; ii < lda; ii++) { |
125 | 2.05M | for(jj = 0; jj < ldb; jj++) { |
126 | 1.02M | memcpy(&out_b[(jj*lda + ii) * elem_size], |
127 | 1.02M | &in_b[(ii*ldb + jj) * elem_size], elem_size); |
128 | 1.02M | } |
129 | 1.02M | } |
130 | 128k | return lda * ldb * elem_size; |
131 | 128k | } |
132 | | |
133 | | |
134 | | /* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ |
135 | | int64_t bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, |
136 | 128k | const size_t elem_size) { |
137 | | |
138 | 128k | size_t nbyte_bitrow = size / 8; |
139 | | |
140 | 128k | CHECK_MULT_EIGHT(size); |
141 | | |
142 | 128k | return bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); |
143 | 128k | } |
144 | | |
145 | | |
146 | | /* Transpose bits within elements. */ |
147 | | int64_t bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, |
148 | 0 | const size_t elem_size) { |
149 | |
|
150 | 0 | int64_t count; |
151 | 0 | void *tmp_buf; |
152 | |
|
153 | 0 | CHECK_MULT_EIGHT(size); |
154 | |
|
155 | 0 | tmp_buf = malloc(size * elem_size); |
156 | 0 | if (tmp_buf == NULL) return -1; |
157 | | |
158 | 0 | count = bshuf_trans_byte_elem_scal(in, out, size, elem_size); |
159 | 0 | CHECK_ERR_FREE(count, tmp_buf); |
160 | 0 | count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); |
161 | 0 | CHECK_ERR_FREE(count, tmp_buf); |
162 | 0 | count = bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); |
163 | |
|
164 | 0 | free(tmp_buf); |
165 | |
|
166 | 0 | return count; |
167 | 0 | } |
168 | | |
169 | | |
170 | | /* For data organized into a row for each bit (8 * elem_size rows), transpose |
171 | | * the bytes. */ |
172 | | int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, |
173 | 0 | const size_t elem_size) { |
174 | 0 | size_t ii, jj, kk, nbyte_row; |
175 | 0 | const char *in_b; |
176 | 0 | char *out_b; |
177 | | |
178 | |
|
179 | 0 | in_b = (const char*) in; |
180 | 0 | out_b = (char*) out; |
181 | |
|
182 | 0 | nbyte_row = size / 8; |
183 | |
|
184 | 0 | CHECK_MULT_EIGHT(size); |
185 | |
|
186 | 0 | for (jj = 0; jj < elem_size; jj++) { |
187 | 0 | for (ii = 0; ii < nbyte_row; ii++) { |
188 | 0 | for (kk = 0; kk < 8; kk++) { |
189 | 0 | out_b[ii * 8 * elem_size + jj * 8 + kk] = \ |
190 | 0 | in_b[(jj * 8 + kk) * nbyte_row + ii]; |
191 | 0 | } |
192 | 0 | } |
193 | 0 | } |
194 | 0 | return size * elem_size; |
195 | 0 | } |
196 | | |
197 | | |
198 | | /* Shuffle bits within the bytes of eight element blocks. */ |
199 | | int64_t bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, \ |
200 | 13.1k | const size_t size, const size_t elem_size) { |
201 | | |
202 | 13.1k | const char *in_b; |
203 | 13.1k | char *out_b; |
204 | 13.1k | uint64_t x, t; |
205 | 13.1k | size_t ii, jj, kk; |
206 | 13.1k | size_t nbyte, out_index; |
207 | | |
208 | 13.1k | uint64_t e=1; |
209 | 13.1k | const int little_endian = *(uint8_t *) &e == 1; |
210 | 13.1k | const size_t elem_skip = little_endian ? elem_size : -elem_size; |
211 | 13.1k | const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; |
212 | | |
213 | 13.1k | CHECK_MULT_EIGHT(size); |
214 | | |
215 | 13.1k | in_b = (const char*) in; |
216 | 13.1k | out_b = (char*) out; |
217 | | |
218 | 13.1k | nbyte = elem_size * size; |
219 | | |
220 | 66.5k | for (jj = 0; jj < 8 * elem_size; jj += 8) { |
221 | 6.34M | for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { |
222 | 6.28M | x = *((uint64_t*) &in_b[ii + jj]); |
223 | 6.28M | if (little_endian) { |
224 | 6.28M | TRANS_BIT_8X8(x, t); |
225 | 6.28M | } else { |
226 | 0 | TRANS_BIT_8X8_BE(x, t); |
227 | 0 | } |
228 | 56.5M | for (kk = 0; kk < 8; kk++) { |
229 | 50.2M | out_index = ii + jj / 8 + elem_offset + kk * elem_skip; |
230 | 50.2M | *((uint8_t*) &out_b[out_index]) = x; |
231 | 50.2M | x = x >> 8; |
232 | 50.2M | } |
233 | 6.28M | } |
234 | 53.3k | } |
235 | 13.1k | return size * elem_size; |
236 | 13.1k | } |
237 | | |
238 | | |
239 | | /* Untranspose bits within elements. */ |
240 | | int64_t bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, |
241 | 0 | const size_t elem_size) { |
242 | |
|
243 | 0 | int64_t count; |
244 | 0 | void *tmp_buf; |
245 | |
|
246 | 0 | CHECK_MULT_EIGHT(size); |
247 | |
|
248 | 0 | tmp_buf = malloc(size * elem_size); |
249 | 0 | if (tmp_buf == NULL) return -1; |
250 | | |
251 | 0 | count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); |
252 | 0 | CHECK_ERR_FREE(count, tmp_buf); |
253 | 0 | count = bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); |
254 | |
|
255 | 0 | free(tmp_buf); |
256 | |
|
257 | 0 | return count; |
258 | 0 | } |
259 | | |
260 | | #ifdef _MSC_VER |
261 | | #pragma warning (pop) |
262 | | #endif |