/src/c-blosc/blosc/bitshuffle-generic.c
Line | Count | Source |
1 | | /********************************************************************* |
2 | | Blosc - Blocked Shuffling and Compression Library |
3 | | |
4 | | Author: Francesc Alted <francesc@blosc.org> |
5 | | |
6 | | See LICENSE.txt for details about copyright and rights to use. |
7 | | **********************************************************************/ |
8 | | |
9 | | #include "bitshuffle-generic.h" |
10 | | |
11 | | |
12 | | /* Transpose bytes within elements, starting partway through input. */ |
13 | | int64_t blosc_internal_bshuf_trans_byte_elem_remainder(const void* in, void* out, const size_t size, |
14 | 0 | const size_t elem_size, const size_t start) { |
15 | |
|
16 | 0 | char* in_b = (char*) in; |
17 | 0 | char* out_b = (char*) out; |
18 | 0 | size_t ii, jj, kk; |
19 | |
|
20 | 0 | CHECK_MULT_EIGHT(start); |
21 | |
|
22 | 0 | if (size > start) { |
23 | | /* ii loop separated into 2 loops so the compiler can unroll */ |
24 | | /* the inner one. */ |
25 | 0 | for (ii = start; ii + 7 < size; ii += 8) { |
26 | 0 | for (jj = 0; jj < elem_size; jj++) { |
27 | 0 | for (kk = 0; kk < 8; kk++) { |
28 | 0 | out_b[jj * size + ii + kk] |
29 | 0 | = in_b[ii * elem_size + kk * elem_size + jj]; |
30 | 0 | } |
31 | 0 | } |
32 | 0 | } |
33 | 0 | for (ii = size - size % 8; ii < size; ii ++) { |
34 | 0 | for (jj = 0; jj < elem_size; jj++) { |
35 | 0 | out_b[jj * size + ii] = in_b[ii * elem_size + jj]; |
36 | 0 | } |
37 | 0 | } |
38 | 0 | } |
39 | 0 | return size * elem_size; |
40 | 0 | } |
41 | | |
42 | | |
43 | | /* Transpose bytes within elements. */ |
44 | | int64_t blosc_internal_bshuf_trans_byte_elem_scal(const void* in, void* out, const size_t size, |
45 | 0 | const size_t elem_size) { |
46 | |
|
47 | 0 | return blosc_internal_bshuf_trans_byte_elem_remainder(in, out, size, elem_size, 0); |
48 | 0 | } |
49 | | |
50 | | |
51 | | /* Transpose bits within bytes. */ |
52 | | int64_t blosc_internal_bshuf_trans_bit_byte_remainder(const void* in, void* out, const size_t size, |
53 | 86.2k | const size_t elem_size, const size_t start_byte) { |
54 | | |
55 | 86.2k | const uint64_t* in_b = (const uint64_t*) in; |
56 | 86.2k | uint8_t* out_b = (uint8_t*) out; |
57 | | |
58 | 86.2k | uint64_t x, t; |
59 | | |
60 | 86.2k | size_t ii, kk; |
61 | 86.2k | size_t nbyte = elem_size * size; |
62 | 86.2k | size_t nbyte_bitrow = nbyte / 8; |
63 | | |
64 | 86.2k | uint64_t e=1; |
65 | 86.2k | const int little_endian = *(uint8_t *) &e == 1; |
66 | 86.2k | const size_t bit_row_skip = little_endian ? nbyte_bitrow : -nbyte_bitrow; |
67 | 86.2k | const int64_t bit_row_offset = little_endian ? 0 : 7 * nbyte_bitrow; |
68 | | |
69 | 86.2k | CHECK_MULT_EIGHT(nbyte); |
70 | 86.2k | CHECK_MULT_EIGHT(start_byte); |
71 | | |
72 | 88.6k | for (ii = start_byte / 8; ii < nbyte_bitrow; ii ++) { |
73 | 2.40k | x = in_b[ii]; |
74 | 2.40k | if (little_endian) { |
75 | 2.40k | TRANS_BIT_8X8(x, t); |
76 | 2.40k | } else { |
77 | 0 | TRANS_BIT_8X8_BE(x, t); |
78 | 0 | } |
79 | 21.6k | for (kk = 0; kk < 8; kk ++) { |
80 | 19.2k | out_b[bit_row_offset + kk * bit_row_skip + ii] = x; |
81 | 19.2k | x = x >> 8; |
82 | 19.2k | } |
83 | 2.40k | } |
84 | 86.2k | return size * elem_size; |
85 | 86.2k | } |
86 | | |
87 | | |
88 | | /* Transpose bits within bytes. */ |
89 | | static int64_t bshuf_trans_bit_byte_scal(const void* in, void* out, const size_t size, |
90 | 0 | const size_t elem_size) { |
91 | |
|
92 | 0 | return blosc_internal_bshuf_trans_bit_byte_remainder(in, out, size, elem_size, 0); |
93 | 0 | } |
94 | | |
95 | | /* General transpose of an array, optimized for large element sizes. */ |
96 | | int64_t blosc_internal_bshuf_trans_elem(const void* in, void* out, const size_t lda, |
97 | 86.2k | const size_t ldb, const size_t elem_size) { |
98 | | |
99 | 86.2k | char* in_b = (char*) in; |
100 | 86.2k | char* out_b = (char*) out; |
101 | 86.2k | size_t ii, jj; |
102 | 776k | for (ii = 0; ii < lda; ii++) { |
103 | 1.38M | for (jj = 0; jj < ldb; jj++) { |
104 | 690k | memcpy(&out_b[(jj*lda + ii) * elem_size], |
105 | 690k | &in_b[(ii*ldb + jj) * elem_size], elem_size); |
106 | 690k | } |
107 | 690k | } |
108 | 86.2k | return lda * ldb * elem_size; |
109 | 86.2k | } |
110 | | |
111 | | |
112 | | /* Transpose rows of shuffled bits (size / 8 bytes) within groups of 8. */ |
113 | | int64_t blosc_internal_bshuf_trans_bitrow_eight(const void* in, void* out, const size_t size, |
114 | 86.2k | const size_t elem_size) { |
115 | | |
116 | 86.2k | size_t nbyte_bitrow = size / 8; |
117 | | |
118 | 86.2k | CHECK_MULT_EIGHT(size); |
119 | | |
120 | 86.2k | return blosc_internal_bshuf_trans_elem(in, out, 8, elem_size, nbyte_bitrow); |
121 | 86.2k | } |
122 | | |
123 | | |
124 | | /* Transpose bits within elements. */ |
125 | | int64_t blosc_internal_bshuf_trans_bit_elem_scal(const void* in, void* out, const size_t size, |
126 | 0 | const size_t elem_size, void* tmp_buf) { |
127 | |
|
128 | 0 | int64_t count; |
129 | |
|
130 | 0 | CHECK_MULT_EIGHT(size); |
131 | |
|
132 | 0 | count = blosc_internal_bshuf_trans_byte_elem_scal(in, out, size, elem_size); |
133 | 0 | CHECK_ERR(count); |
134 | 0 | count = bshuf_trans_bit_byte_scal(out, tmp_buf, size, elem_size); |
135 | 0 | CHECK_ERR(count); |
136 | 0 | count = blosc_internal_bshuf_trans_bitrow_eight(tmp_buf, out, size, elem_size); |
137 | |
|
138 | 0 | return count; |
139 | 0 | } |
140 | | |
141 | | |
142 | | /* For data organized into a row for each bit (8 * elem_size rows), transpose |
143 | | * the bytes. */ |
144 | | static int64_t bshuf_trans_byte_bitrow_scal(const void* in, void* out, const size_t size, |
145 | 0 | const size_t elem_size) { |
146 | 0 | char* in_b = (char*) in; |
147 | 0 | char* out_b = (char*) out; |
148 | |
|
149 | 0 | size_t nbyte_row = size / 8; |
150 | 0 | size_t ii, jj, kk; |
151 | |
|
152 | 0 | CHECK_MULT_EIGHT(size); |
153 | |
|
154 | 0 | for (jj = 0; jj < elem_size; jj++) { |
155 | 0 | for (ii = 0; ii < nbyte_row; ii++) { |
156 | 0 | for (kk = 0; kk < 8; kk++) { |
157 | 0 | out_b[ii * 8 * elem_size + jj * 8 + kk] = \ |
158 | 0 | in_b[(jj * 8 + kk) * nbyte_row + ii]; |
159 | 0 | } |
160 | 0 | } |
161 | 0 | } |
162 | 0 | return size * elem_size; |
163 | 0 | } |
164 | | |
165 | | |
166 | | /* Shuffle bits within the bytes of eight element blocks. */ |
167 | | int64_t blosc_internal_bshuf_shuffle_bit_eightelem_scal(const void* in, void* out, |
168 | 16.0k | const size_t size, const size_t elem_size) { |
169 | | |
170 | 16.0k | const char *in_b; |
171 | 16.0k | char *out_b; |
172 | 16.0k | uint64_t x, t; |
173 | 16.0k | size_t ii, jj, kk; |
174 | 16.0k | size_t nbyte, out_index; |
175 | | |
176 | 16.0k | uint64_t e=1; |
177 | 16.0k | const int little_endian = *(uint8_t *) &e == 1; |
178 | 16.0k | const size_t elem_skip = little_endian ? elem_size : -elem_size; |
179 | 16.0k | const uint64_t elem_offset = little_endian ? 0 : 7 * elem_size; |
180 | | |
181 | 16.0k | CHECK_MULT_EIGHT(size); |
182 | | |
183 | 16.0k | in_b = (const char*) in; |
184 | 16.0k | out_b = (char*) out; |
185 | | |
186 | 16.0k | nbyte = elem_size * size; |
187 | | |
188 | 32.1k | for (jj = 0; jj < 8 * elem_size; jj += 8) { |
189 | 8.30M | for (ii = 0; ii + 8 * elem_size - 1 < nbyte; ii += 8 * elem_size) { |
190 | 8.28M | x = *((uint64_t*) &in_b[ii + jj]); |
191 | 8.28M | if (little_endian) { |
192 | 8.28M | TRANS_BIT_8X8(x, t); |
193 | 8.28M | } else { |
194 | 0 | TRANS_BIT_8X8_BE(x, t); |
195 | 0 | } |
196 | 74.5M | for (kk = 0; kk < 8; kk++) { |
197 | 66.2M | out_index = ii + jj / 8 + elem_offset + kk * elem_skip; |
198 | 66.2M | *((uint8_t*) &out_b[out_index]) = x; |
199 | 66.2M | x = x >> 8; |
200 | 66.2M | } |
201 | 8.28M | } |
202 | 16.0k | } |
203 | 16.0k | return size * elem_size; |
204 | 16.0k | } |
205 | | |
206 | | |
207 | | /* Untranspose bits within elements. */ |
208 | | int64_t blosc_internal_bshuf_untrans_bit_elem_scal(const void* in, void* out, const size_t size, |
209 | 0 | const size_t elem_size, void* tmp_buf) { |
210 | |
|
211 | 0 | int64_t count; |
212 | |
|
213 | 0 | CHECK_MULT_EIGHT(size); |
214 | |
|
215 | 0 | count = bshuf_trans_byte_bitrow_scal(in, tmp_buf, size, elem_size); |
216 | 0 | CHECK_ERR(count); |
217 | 0 | count = blosc_internal_bshuf_shuffle_bit_eightelem_scal(tmp_buf, out, size, elem_size); |
218 | |
|
219 | 0 | return count; |
220 | 0 | } |