/src/php-src/ext/standard/base64.c
Line | Count | Source |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright © The PHP Group and Contributors. | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to the Modified BSD License that is | |
6 | | | bundled with this package in the file LICENSE, and is available | |
7 | | | through the World Wide Web at <https://www.php.net/license/>. | |
8 | | | | |
9 | | | SPDX-License-Identifier: BSD-3-Clause | |
10 | | +----------------------------------------------------------------------+ |
11 | | | Author: Jim Winstead <jimw@php.net> | |
12 | | | Xinchen Hui <laruence@php.net> | |
13 | | +----------------------------------------------------------------------+ |
14 | | */ |
15 | | |
16 | | #include <string.h> |
17 | | |
18 | | #include "php.h" |
19 | | #include "base64.h" |
20 | | |
21 | | /* {{{ base64 tables */ |
22 | | static const char base64_table[] = { |
23 | | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
24 | | 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
25 | | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
26 | | 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
27 | | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' |
28 | | }; |
29 | | |
30 | | static const char base64_pad = '='; |
31 | | |
32 | | static const short base64_reverse_table[256] = { |
33 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, |
34 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
35 | | -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63, |
36 | | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2, |
37 | | -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
38 | | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2, |
39 | | -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
40 | | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2, |
41 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
42 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
43 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
44 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
45 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
46 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
47 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
48 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 |
49 | | }; |
50 | | /* }}} */ |
51 | | |
52 | | #if defined(__aarch64__) || defined(_M_ARM64) |
53 | | #include <arm_neon.h> |
54 | | |
55 | | static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT) |
56 | | { |
57 | | /* reduce 0..51 -> 0 |
58 | | 52..61 -> 1 .. 10 |
59 | | 62 -> 11 |
60 | | 63 -> 12 */ |
61 | | uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51)); |
62 | | /* distinguish between ranges 0..25 and 26..51: |
63 | | 0 .. 25 -> remains 0 |
64 | | 26 .. 51 -> becomes 13 */ |
65 | | const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input); |
66 | | result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13))); |
67 | | /* read shift */ |
68 | | result = vqtbl2q_u8(shift_LUT, result); |
69 | | return vaddq_u8(result, input); |
70 | | } |
71 | | |
72 | | static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) |
73 | | { |
74 | | const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52, |
75 | | '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
76 | | '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
77 | | '/' - 63, 'A', 0, 0, |
78 | | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, |
79 | | '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
80 | | '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
81 | | '/' - 63, 'A', 0, 0}; |
82 | | const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_); |
83 | | do { |
84 | | /* [ccdddddd | bbbbcccc | aaaaaabb] |
85 | | x.val[2] | x.val[1] | x.val[0] */ |
86 | | const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in)); |
87 | | |
88 | | /* [00aa_aaaa] */ |
89 | | const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2); |
90 | | |
91 | | const uint8x16_t field_b = /* [00bb_bbbb] */ |
92 | | vbslq_u8(vdupq_n_u8(0x30), /* [0011_0000] */ |
93 | | vshlq_n_u8(x.val[0], 4), /* [aabb_0000] */ |
94 | | vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */ |
95 | | |
96 | | const uint8x16_t field_c = /* [00cc_cccc] */ |
97 | | vbslq_u8(vdupq_n_u8(0x3c), /* [0011_1100] */ |
98 | | vshlq_n_u8(x.val[1], 2), /* [bbcc_cc00] */ |
99 | | vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */ |
100 | | |
101 | | /* [00dd_dddd] */ |
102 | | const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f)); |
103 | | |
104 | | uint8x16x4_t result; |
105 | | result.val[0] = encode_toascii(field_a, shift_LUT); |
106 | | result.val[1] = encode_toascii(field_b, shift_LUT); |
107 | | result.val[2] = encode_toascii(field_c, shift_LUT); |
108 | | result.val[3] = encode_toascii(field_d, shift_LUT); |
109 | | |
110 | | vst4q_u8((uint8_t *)out, result); |
111 | | out += 64; |
112 | | in += 16 * 3; |
113 | | inl -= 16 * 3; |
114 | | } while (inl >= 16 * 3); |
115 | | |
116 | | *left = inl; |
117 | | return out; |
118 | | } |
119 | | #endif /* defined(__aarch64__) || defined(_M_ARM64) */ |
120 | | |
121 | | static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out, zend_long flags) /* {{{ */ |
122 | 9 | { |
123 | | #if defined(__aarch64__) || defined(_M_ARM64) |
124 | | if (inl >= 16 * 3) { |
125 | | size_t left = 0; |
126 | | out = neon_base64_encode(in, inl, out, &left); |
127 | | in += inl - left; |
128 | | inl = left; |
129 | | } |
130 | | #endif |
131 | | |
132 | 48 | while (inl > 2) { /* keep going until we have less than 24 bits */ |
133 | 39 | *out++ = base64_table[in[0] >> 2]; |
134 | 39 | *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; |
135 | 39 | *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)]; |
136 | 39 | *out++ = base64_table[in[2] & 0x3f]; |
137 | | |
138 | 39 | in += 3; |
139 | 39 | inl -= 3; /* we just handle 3 octets of data */ |
140 | 39 | } |
141 | | |
142 | | /* now deal with the tail end of things */ |
143 | 9 | if (inl != 0) { |
144 | 9 | *out++ = base64_table[in[0] >> 2]; |
145 | 9 | if (inl > 1) { |
146 | 9 | *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; |
147 | 9 | *out++ = base64_table[(in[1] & 0x0f) << 2]; |
148 | 9 | if ((flags & PHP_BASE64_NO_PADDING) == 0) { |
149 | 9 | *out++ = base64_pad; |
150 | 9 | } |
151 | 9 | } else { |
152 | 0 | *out++ = base64_table[(in[0] & 0x03) << 4]; |
153 | 0 | if ((flags & PHP_BASE64_NO_PADDING) == 0) { |
154 | 0 | *out++ = base64_pad; |
155 | 0 | *out++ = base64_pad; |
156 | 0 | } |
157 | 0 | } |
158 | 9 | } |
159 | | |
160 | 9 | *out = '\0'; |
161 | | |
162 | 9 | return out; |
163 | 9 | } |
164 | | /* }}} */ |
165 | | |
166 | | #if defined(__aarch64__) || defined(_M_ARM64) |
167 | | static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) { |
168 | | const uint8x16_t higher_nibble = vshrq_n_u8(input, 4); |
169 | | const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f)); |
170 | | const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble); |
171 | | const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f)); |
172 | | const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh); |
173 | | const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble); |
174 | | const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble); |
175 | | *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0)); |
176 | | return vaddq_u8(input, shift); |
177 | | } |
178 | | |
179 | | static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) { |
180 | | unsigned char *out_orig = out; |
181 | | const uint8_t shiftLUT_[32] = { |
182 | | 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71, |
183 | | 0, 0, 0, 0, 0, 0, 0, 0, |
184 | | 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71, |
185 | | 0, 0, 0, 0, 0, 0, 0, 0}; |
186 | | const uint8_t maskLUT_[32] = { |
187 | | /* 0 : 0b1010_1000*/ 0xa8, |
188 | | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
189 | | /* 10 : 0b1111_0000*/ 0xf0, |
190 | | /* 11 : 0b0101_0100*/ 0x54, |
191 | | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
192 | | /* 15 : 0b0101_0100*/ 0x54, |
193 | | |
194 | | /* 0 : 0b1010_1000*/ 0xa8, |
195 | | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
196 | | /* 10 : 0b1111_0000*/ 0xf0, |
197 | | /* 11 : 0b0101_0100*/ 0x54, |
198 | | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
199 | | /* 15 : 0b0101_0100*/ 0x54 |
200 | | }; |
201 | | const uint8_t bitposLUT_[32] = { |
202 | | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
203 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
204 | | |
205 | | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
206 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
207 | | }; |
208 | | const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_); |
209 | | const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_); |
210 | | const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);; |
211 | | |
212 | | do { |
213 | | const uint8x16x4_t x = vld4q_u8((const unsigned char *)in); |
214 | | uint8x16_t error_a; |
215 | | uint8x16_t error_b; |
216 | | uint8x16_t error_c; |
217 | | uint8x16_t error_d; |
218 | | uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT); |
219 | | uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT); |
220 | | uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT); |
221 | | uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT); |
222 | | |
223 | | const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d)); |
224 | | union {uint8_t mem[16]; uint64_t dw[2]; } error; |
225 | | vst1q_u8(error.mem, err); |
226 | | |
227 | | /* Check that the input only contains bytes belonging to the alphabet of |
228 | | Base64. If there are errors, decode the rest of the string with the |
229 | | scalar decoder. */ |
230 | | if (error.dw[0] | error.dw[1]) |
231 | | break; |
232 | | |
233 | | uint8x16x3_t result; |
234 | | result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2)); |
235 | | result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4)); |
236 | | result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6)); |
237 | | |
238 | | vst3q_u8((unsigned char *)out, result); |
239 | | out += 16 * 3; |
240 | | in += 16 * 4; |
241 | | inl -= 16 * 4; |
242 | | } while (inl >= 16 * 4); |
243 | | *left = inl; |
244 | | return out - out_orig; |
245 | | } |
246 | | #endif /* defined(__aarch64__) || defined(_M_ARM64) */ |
247 | | |
248 | | static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */ |
249 | 0 | { |
250 | 0 | int ch; |
251 | 0 | size_t i = 0, padding = 0, j = *outl; |
252 | |
|
253 | | #if defined(__aarch64__) || defined(_M_ARM64) |
254 | | if (inl >= 16 * 4) { |
255 | | size_t left = 0; |
256 | | j += neon_base64_decode(in, inl, out, &left); |
257 | | i = inl - left; |
258 | | in += i; |
259 | | inl = left; |
260 | | } |
261 | | #endif |
262 | | |
263 | | /* run through the whole string, converting as we go */ |
264 | 0 | while (inl-- > 0) { |
265 | 0 | ch = *in++; |
266 | 0 | if (ch == base64_pad) { |
267 | 0 | padding++; |
268 | 0 | continue; |
269 | 0 | } |
270 | | |
271 | 0 | ch = base64_reverse_table[ch]; |
272 | 0 | if (!strict) { |
273 | | /* skip unknown characters and whitespace */ |
274 | 0 | if (ch < 0) { |
275 | 0 | continue; |
276 | 0 | } |
277 | 0 | } else { |
278 | | /* skip whitespace */ |
279 | 0 | if (ch == -1) { |
280 | 0 | continue; |
281 | 0 | } |
282 | | /* fail on bad characters or if any data follows padding */ |
283 | 0 | if (ch == -2 || padding) { |
284 | 0 | goto fail; |
285 | 0 | } |
286 | 0 | } |
287 | | |
288 | 0 | switch (i % 4) { |
289 | 0 | case 0: |
290 | 0 | out[j] = ch << 2; |
291 | 0 | break; |
292 | 0 | case 1: |
293 | 0 | out[j++] |= ch >> 4; |
294 | 0 | out[j] = (ch & 0x0f) << 4; |
295 | 0 | break; |
296 | 0 | case 2: |
297 | 0 | out[j++] |= ch >>2; |
298 | 0 | out[j] = (ch & 0x03) << 6; |
299 | 0 | break; |
300 | 0 | case 3: |
301 | 0 | out[j++] |= ch; |
302 | 0 | break; |
303 | 0 | } |
304 | 0 | i++; |
305 | 0 | } |
306 | | |
307 | | /* fail if the input is truncated (only one char in last group) */ |
308 | 0 | if (strict && i % 4 == 1) { |
309 | 0 | goto fail; |
310 | 0 | } |
311 | | |
312 | | /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding |
313 | | * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ |
314 | 0 | if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { |
315 | 0 | goto fail; |
316 | 0 | } |
317 | | |
318 | 0 | *outl = j; |
319 | 0 | out[j] = '\0'; |
320 | |
|
321 | 0 | return 1; |
322 | | |
323 | 0 | fail: |
324 | 0 | return 0; |
325 | 0 | } |
326 | | /* }}} */ |
327 | | |
328 | | /* {{{ php_base64_encode */ |
329 | | |
330 | | #ifdef ZEND_INTRIN_AVX2_NATIVE |
331 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
332 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
333 | | # undef ZEND_INTRIN_SSSE3_FUNC_PROTO |
334 | | # undef ZEND_INTRIN_SSSE3_FUNC_PTR |
335 | | #elif defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_SSSE3_NATIVE) |
336 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
337 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
338 | | # define ZEND_INTRIN_SSSE3_RESOLVER 1 |
339 | | # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 |
340 | | # undef ZEND_INTRIN_SSSE3_FUNC_DECL |
341 | | # ifdef HAVE_FUNC_ATTRIBUTE_TARGET |
342 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) |
343 | | # else |
344 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func |
345 | | # endif |
346 | | #elif defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_SSSE3_NATIVE) |
347 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
348 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
349 | | # define ZEND_INTRIN_SSSE3_RESOLVER 1 |
350 | | # define ZEND_INTRIN_SSSE3_FUNC_PTR 1 |
351 | | # undef ZEND_INTRIN_SSSE3_FUNC_DECL |
352 | | # ifdef HAVE_FUNC_ATTRIBUTE_TARGET |
353 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) |
354 | | # else |
355 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func |
356 | | # endif |
357 | | #endif |
358 | | |
359 | | /* Only enable avx512 resolver if avx2 use resolver also */ |
360 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_FUNC_PROTO) |
361 | | #define BASE64_INTRIN_AVX512_FUNC_PROTO 1 |
362 | | #endif |
363 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_FUNC_PTR) |
364 | | #define BASE64_INTRIN_AVX512_FUNC_PTR 1 |
365 | | #endif |
366 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PROTO) |
367 | | #define BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO 1 |
368 | | #endif |
369 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PTR) |
370 | | #define BASE64_INTRIN_AVX512_VBMI_FUNC_PTR 1 |
371 | | #endif |
372 | | |
373 | | #ifdef ZEND_INTRIN_AVX2_NATIVE |
374 | | # include <immintrin.h> |
375 | | #elif defined(ZEND_INTRIN_SSSE3_NATIVE) |
376 | | # include <tmmintrin.h> |
377 | | #elif defined(ZEND_INTRIN_SSSE3_RESOLVER) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
378 | | # ifdef ZEND_INTRIN_AVX2_RESOLVER |
379 | | # include <immintrin.h> |
380 | | # else |
381 | | # include <tmmintrin.h> |
382 | | # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */ |
383 | | # include "Zend/zend_cpuinfo.h" |
384 | | |
385 | | # if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR) |
386 | | ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags)); |
387 | | ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict)); |
388 | | # endif |
389 | | # if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR) |
390 | | ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags)); |
391 | | ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict)); |
392 | | # endif |
393 | | |
394 | | # ifdef ZEND_INTRIN_AVX2_RESOLVER |
395 | | ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags)); |
396 | | ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict)); |
397 | | # endif |
398 | | |
399 | | # ifdef ZEND_INTRIN_SSSE3_RESOLVER |
400 | | ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)); |
401 | | ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)); |
402 | | # endif |
403 | | |
404 | | zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags); |
405 | | zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict); |
406 | | |
407 | | # if (defined(ZEND_INTRIN_AVX2_FUNC_PROTO) || defined(ZEND_INTRIN_SSSE3_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO)) |
408 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) __attribute__((ifunc("resolve_base64_encode"))); |
409 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) __attribute__((ifunc("resolve_base64_decode"))); |
410 | | |
411 | | typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t, zend_long flags); |
412 | | typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, bool); |
413 | | |
414 | | ZEND_NO_SANITIZE_ADDRESS |
415 | | ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ |
416 | 2 | static base64_encode_func_t resolve_base64_encode(void) { |
417 | 2 | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO |
418 | 2 | if (zend_cpu_supports_avx512_vbmi()) { |
419 | 0 | return php_base64_encode_avx512_vbmi; |
420 | 0 | } else |
421 | 2 | # endif |
422 | 2 | # ifdef BASE64_INTRIN_AVX512_FUNC_PROTO |
423 | 2 | if (zend_cpu_supports_avx512()) { |
424 | 0 | return php_base64_encode_avx512; |
425 | 0 | } else |
426 | 2 | # endif |
427 | 2 | # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO |
428 | 2 | if (zend_cpu_supports_avx2()) { |
429 | 2 | return php_base64_encode_avx2; |
430 | 2 | } else |
431 | 0 | # endif |
432 | 0 | #ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO |
433 | 0 | if (zend_cpu_supports_ssse3()) { |
434 | 0 | return php_base64_encode_ssse3; |
435 | 0 | } |
436 | 0 | #endif |
437 | 0 | return php_base64_encode_default; |
438 | 2 | } |
439 | | |
440 | | ZEND_NO_SANITIZE_ADDRESS |
441 | | ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ |
442 | 2 | static base64_decode_func_t resolve_base64_decode(void) { |
443 | 2 | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO |
444 | 2 | if (zend_cpu_supports_avx512_vbmi()) { |
445 | 0 | return php_base64_decode_ex_avx512_vbmi; |
446 | 0 | } else |
447 | 2 | # endif |
448 | 2 | # ifdef BASE64_INTRIN_AVX512_FUNC_PROTO |
449 | 2 | if (zend_cpu_supports_avx512()) { |
450 | 0 | return php_base64_decode_ex_avx512; |
451 | 0 | } else |
452 | 2 | # endif |
453 | 2 | # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO |
454 | 2 | if (zend_cpu_supports_avx2()) { |
455 | 2 | return php_base64_decode_ex_avx2; |
456 | 2 | } else |
457 | 0 | # endif |
458 | 0 | #ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO |
459 | 0 | if (zend_cpu_supports_ssse3()) { |
460 | 0 | return php_base64_decode_ex_ssse3; |
461 | 0 | } |
462 | 0 | #endif |
463 | 0 | return php_base64_decode_ex_default; |
464 | 2 | } |
465 | | # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ |
466 | | |
467 | | PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length, zend_long flags) = NULL; |
468 | | PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, bool strict) = NULL; |
469 | | |
470 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) { |
471 | | return php_base64_encode_ptr(str, length, flags); |
472 | | } |
473 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) { |
474 | | return php_base64_decode_ex_ptr(str, length, strict); |
475 | | } |
476 | | |
477 | | PHP_MINIT_FUNCTION(base64_intrin) |
478 | | { |
479 | | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PTR |
480 | | if (zend_cpu_supports_avx512_vbmi()) { |
481 | | php_base64_encode_ptr = php_base64_encode_avx512_vbmi; |
482 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx512_vbmi; |
483 | | } else |
484 | | # endif |
485 | | # ifdef BASE64_INTRIN_AVX512_FUNC_PTR |
486 | | if (zend_cpu_supports_avx512()) { |
487 | | php_base64_encode_ptr = php_base64_encode_avx512; |
488 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx512; |
489 | | } else |
490 | | # endif |
491 | | # ifdef ZEND_INTRIN_AVX2_FUNC_PTR |
492 | | if (zend_cpu_supports_avx2()) { |
493 | | php_base64_encode_ptr = php_base64_encode_avx2; |
494 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx2; |
495 | | } else |
496 | | # endif |
497 | | #ifdef ZEND_INTRIN_SSSE3_FUNC_PTR |
498 | | if (zend_cpu_supports_ssse3()) { |
499 | | php_base64_encode_ptr = php_base64_encode_ssse3; |
500 | | php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3; |
501 | | } else |
502 | | #endif |
503 | | { |
504 | | php_base64_encode_ptr = php_base64_encode_default; |
505 | | php_base64_decode_ex_ptr = php_base64_decode_ex_default; |
506 | | } |
507 | | return SUCCESS; |
508 | | } |
509 | | # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ |
510 | | #endif /* ZEND_INTRIN_AVX2_NATIVE */ |
511 | | |
512 | | #if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR) |
513 | | zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags) |
514 | 0 | { |
515 | 0 | const unsigned char *c = str; |
516 | 0 | unsigned char *o; |
517 | 0 | zend_string *result; |
518 | |
|
519 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
520 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
521 | |
|
522 | 0 | const __m512i shuffle_splitting = _mm512_setr_epi32( |
523 | 0 | 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, |
524 | 0 | 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, |
525 | 0 | 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); |
526 | 0 | const __m512i multi_shifts = _mm512_set1_epi64(0x3036242a1016040a); |
527 | 0 | const char *ascii_lookup_tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
528 | 0 | const __m512i ascii_lookup = _mm512_loadu_si512((__m512i *)ascii_lookup_tbl); |
529 | |
|
530 | 0 | while (length > 63) { |
531 | | /* Step 1: load input data */ |
532 | 0 | __m512i str = _mm512_loadu_si512((const __m512i *)c); |
533 | | |
534 | | /* Step 2: splitting 24-bit words into 32-bit lanes */ |
535 | 0 | str = _mm512_permutexvar_epi8(shuffle_splitting, str); |
536 | | |
537 | | /* Step 3: moving 6-bit word to separate bytes */ |
538 | 0 | str = _mm512_multishift_epi64_epi8(multi_shifts, str); |
539 | | |
540 | | /* Step 4: conversion to ASCII */ |
541 | 0 | str = _mm512_permutexvar_epi8(str, ascii_lookup); |
542 | | |
543 | | /* Step 5: store the final result */ |
544 | 0 | _mm512_storeu_si512((__m512i *)o, str); |
545 | 0 | c += 48; |
546 | 0 | o += 64; |
547 | 0 | length -= 48; |
548 | 0 | } |
549 | |
|
550 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
551 | |
|
552 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
553 | |
|
554 | 0 | return result; |
555 | 0 | } |
556 | | |
557 | | zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict) |
558 | 0 | { |
559 | 0 | const unsigned char *c = str; |
560 | 0 | unsigned char *o; |
561 | 0 | size_t outl = 0; |
562 | 0 | zend_string *result; |
563 | |
|
564 | 0 | result = zend_string_alloc(length, 0); |
565 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
566 | |
|
567 | 0 | const __m512i lookup_0 = _mm512_setr_epi32( |
568 | 0 | 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, |
569 | 0 | 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x3e808080, 0x3f808080, |
570 | 0 | 0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080); |
571 | 0 | const __m512i lookup_1 = _mm512_setr_epi32( |
572 | 0 | 0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, 0x1211100f, 0x16151413, |
573 | 0 | 0x80191817, 0x80808080, 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625, |
574 | 0 | 0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080); |
575 | |
|
576 | 0 | const __m512i merge_mask1 = _mm512_set1_epi32(0x01400140); |
577 | 0 | const __m512i merge_mask2 = _mm512_set1_epi32(0x00011000); |
578 | |
|
579 | 0 | const __m512i continuous_mask = _mm512_setr_epi32( |
580 | 0 | 0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, 0x191a1415, 0x1c1d1e18, |
581 | 0 | 0x26202122, 0x292a2425, 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38, |
582 | 0 | 0x00000000, 0x00000000, 0x00000000, 0x00000000); |
583 | |
|
584 | 0 | while (length > 64) { |
585 | | /* Step 1: load input data */ |
586 | 0 | const __m512i input = _mm512_loadu_si512((__m512i *)c); |
587 | | |
588 | | /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */ |
589 | 0 | __m512i str = _mm512_permutex2var_epi8(lookup_0, input, lookup_1); |
590 | 0 | const uint64_t mask = _mm512_movepi8_mask(_mm512_or_epi64(str, input)); /* convert MSBs to the mask */ |
591 | 0 | if (mask) { |
592 | 0 | break; |
593 | 0 | } |
594 | | |
595 | | /* Step 3: pack four fields within 32-bit words into 24-bit words. */ |
596 | 0 | const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, merge_mask1); |
597 | 0 | str = _mm512_madd_epi16(merge_ab_and_bc, merge_mask2); |
598 | | |
599 | | /* Step 4: move 3-byte words into the continuous array. */ |
600 | 0 | str = _mm512_permutexvar_epi8(continuous_mask, str); |
601 | | |
602 | | /* Step 5: store the final result */ |
603 | 0 | _mm512_storeu_si512((__m512i *)o, str); |
604 | |
|
605 | 0 | c += 64; |
606 | 0 | o += 48; |
607 | 0 | outl += 48; |
608 | 0 | length -= 64; |
609 | 0 | } |
610 | |
|
611 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
612 | 0 | zend_string_efree(result); |
613 | 0 | return NULL; |
614 | 0 | } |
615 | | |
616 | 0 | ZSTR_LEN(result) = outl; |
617 | |
|
618 | 0 | return result; |
619 | 0 | } |
620 | | #endif |
621 | | |
622 | | #if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR) |
623 | | zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags) |
624 | 0 | { |
625 | 0 | const unsigned char *c = str; |
626 | 0 | unsigned char *o; |
627 | 0 | zend_string *result; |
628 | |
|
629 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
630 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
631 | |
|
632 | 0 | while (length > 63) { |
633 | | /* Step 1: load input data */ |
634 | | /* [????|????|????|????|PPPO|OONN|NMMM|LLLK|KKJJ|JIII|HHHG|GGFF|FEEE|DDDC|CCBB|BAAA] */ |
635 | 0 | __m512i str = _mm512_loadu_si512((const __m512i *)c); |
636 | | |
637 | | /* Step 2: splitting 24-bit words into 32-bit lanes */ |
638 | | /* [0000|PPPO|OONN|NMMM|0000|LLLK|KKJJ|JIII|0000|HHHG|GGFF|FEEE|0000|DDDC|CCBB|BAAA] */ |
639 | 0 | str = _mm512_permutexvar_epi32( |
640 | 0 | _mm512_set_epi32(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0), str); |
641 | | /* [D1 D2 D0 D1|C1 C2 C0 C1|B1 B2 B0 B1|A1 A2 A0 A1] x 4 */ |
642 | 0 | str = _mm512_shuffle_epi8(str, _mm512_set4_epi32(0x0a0b090a, 0x07080607, 0x04050304, 0x01020001)); |
643 | | |
644 | | /* Step 3: moving 6-bit word to separate bytes */ |
645 | | /* in: [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] */ |
646 | | /* t0: [0000cccc|cc000000|aaaaaa00|00000000] */ |
647 | 0 | const __m512i t0 = _mm512_and_si512(str, _mm512_set1_epi32(0x0fc0fc00)); |
648 | | /* t1: [00000000|00cccccc|00000000|00aaaaaa] */ |
649 | 0 | const __m512i t1 = _mm512_srlv_epi16(t0, _mm512_set1_epi32(0x0006000a)); |
650 | | /* t2: [ccdddddd|00000000|aabbbbbb|cccc0000] */ |
651 | 0 | const __m512i t2 = _mm512_sllv_epi16(str, _mm512_set1_epi32(0x00080004)); |
652 | | /* str: [00dddddd|00cccccc|00bbbbbb|00aaaaaa] */ |
653 | 0 | str = _mm512_ternarylogic_epi32(_mm512_set1_epi32(0x3f003f00), t2, t1, 0xca); |
654 | | |
655 | | /* Step 4: conversion to ASCII */ |
656 | 0 | __m512i result = _mm512_subs_epu8(str, _mm512_set1_epi8(51)); |
657 | 0 | const __mmask64 less = _mm512_cmpgt_epi8_mask(_mm512_set1_epi8(26), str); |
658 | 0 | result = _mm512_mask_mov_epi8(result, less, _mm512_set1_epi8(13)); |
659 | 0 | const __m512i lut = _mm512_set4_epi32(0x000041f0, 0xedfcfcfc, 0xfcfcfcfc, 0xfcfcfc47); |
660 | 0 | result = _mm512_shuffle_epi8(lut, result); |
661 | 0 | result = _mm512_add_epi8(result, str); |
662 | | |
663 | | /* Step 5: store the final result */ |
664 | 0 | _mm512_storeu_si512((__m512i *)o, result); |
665 | 0 | c += 48; |
666 | 0 | o += 64; |
667 | 0 | length -= 48; |
668 | 0 | } |
669 | |
|
670 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
671 | |
|
672 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
673 | |
|
674 | 0 | return result; |
675 | 0 | } |
676 | | |
677 | | #define build_dword(b0, b1, b2, b3) \ |
678 | | ((uint32_t)(uint8_t)b0 << 0) | ((uint32_t)(uint8_t)b1 << 8) | \ |
679 | | ((uint32_t)(uint8_t)b2 << 16) | ((uint32_t)(uint8_t)b3 << 24) |
680 | | |
681 | | #define _mm512_set4lanes_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ |
682 | 0 | _mm512_setr4_epi32(build_dword(b0, b1, b2, b3), build_dword(b4, b5, b6, b7), \ |
683 | 0 | build_dword(b8, b9, b10, b11), build_dword(b12, b13, b14, b15)) |
684 | | |
685 | | zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict) |
686 | 0 | { |
687 | 0 | const unsigned char *c = str; |
688 | 0 | unsigned char *o; |
689 | 0 | size_t outl = 0; |
690 | 0 | zend_string *result; |
691 | |
|
692 | 0 | result = zend_string_alloc(length, 0); |
693 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
694 | |
|
695 | 0 | while (length > 64) { |
696 | | /* Step 1: load input data */ |
697 | 0 | __m512i str = _mm512_loadu_si512((__m512i *)c); |
698 | | |
699 | | /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */ |
700 | 0 | const __m512i higher_nibble = _mm512_and_si512(_mm512_srli_epi32(str, 4), _mm512_set1_epi8(0x0f)); |
701 | 0 | const __m512i lower_nibble = _mm512_and_si512(str, _mm512_set1_epi8(0x0f)); |
702 | 0 | const __m512i shiftLUT = _mm512_set4lanes_epi8( |
703 | 0 | 0, 0, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0); |
704 | 0 | const __m512i maskLUT = _mm512_set4lanes_epi8( |
705 | 0 | /* 0 : 0b1010_1000*/ 0xa8, |
706 | 0 | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
707 | 0 | /* 10 : 0b1111_0000*/ 0xf0, |
708 | 0 | /* 11 : 0b0101_0100*/ 0x54, |
709 | 0 | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
710 | 0 | /* 15 : 0b0101_0100*/ 0x54); |
711 | 0 | const __m512i bitposLUT = _mm512_set4lanes_epi8( |
712 | 0 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
713 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); |
714 | 0 | const __m512i M = _mm512_shuffle_epi8(maskLUT, lower_nibble); |
715 | 0 | const __m512i bit = _mm512_shuffle_epi8(bitposLUT, higher_nibble); |
716 | 0 | const uint64_t match = _mm512_test_epi8_mask(M, bit); |
717 | 0 | if (match != (uint64_t)-1) { |
718 | 0 | break; |
719 | 0 | } |
720 | 0 | const __m512i sh = _mm512_shuffle_epi8(shiftLUT, higher_nibble); |
721 | 0 | const __mmask64 eq_2f = _mm512_cmpeq_epi8_mask(str, _mm512_set1_epi8(0x2f)); |
722 | 0 | const __m512i shift = _mm512_mask_mov_epi8(sh, eq_2f, _mm512_set1_epi8(16)); |
723 | 0 | str = _mm512_add_epi8(str, shift); |
724 | | |
725 | | /* Step 3: pack four fields within 32-bit words into 24-bit words. */ |
726 | 0 | const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); |
727 | 0 | str = _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); |
728 | | |
729 | | /* Step 4: move 3-byte words into the continuous array. */ |
730 | 0 | const __m512i t1 = _mm512_shuffle_epi8(str, |
731 | 0 | _mm512_set4lanes_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); |
732 | 0 | const __m512i s6 = _mm512_setr_epi32(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0); |
733 | 0 | const __m512i t2 = _mm512_permutexvar_epi32(s6, t1); |
734 | | |
735 | | /* Step 5: store the final result */ |
736 | 0 | _mm512_storeu_si512((__m512i *)o, t2); |
737 | |
|
738 | 0 | c += 64; |
739 | 0 | o += 48; |
740 | 0 | outl += 48; |
741 | 0 | length -= 64; |
742 | 0 | } |
743 | |
|
744 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
745 | 0 | zend_string_efree(result); |
746 | 0 | return NULL; |
747 | 0 | } |
748 | | |
749 | 0 | ZSTR_LEN(result) = outl; |
750 | |
|
751 | 0 | return result; |
752 | 0 | } |
753 | | #endif |
754 | | |
755 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
756 | | # if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
757 | | static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); |
758 | | static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2"))); |
759 | | # endif |
760 | | static __m256i php_base64_encode_avx2_reshuffle(__m256i in) |
761 | 12 | { |
762 | | /* This one works with shifted (4 bytes) input in order to |
763 | | * be able to work efficiently in the 2 128-bit lanes */ |
764 | 12 | __m256i t0, t1, t2, t3; |
765 | | |
766 | | /* input, bytes MSB to LSB: |
767 | | * 0 0 0 0 x w v u t s r q p o n m |
768 | | * l k j i h g f e d c b a 0 0 0 0 */ |
769 | 12 | in = _mm256_shuffle_epi8(in, _mm256_set_epi8( |
770 | 12 | 10, 11, 9, 10, |
771 | 12 | 7, 8, 6, 7, |
772 | 12 | 4, 5, 3, 4, |
773 | 12 | 1, 2, 0, 1, |
774 | | |
775 | 12 | 14, 15, 13, 14, |
776 | 12 | 11, 12, 10, 11, |
777 | 12 | 8, 9, 7, 8, |
778 | 12 | 5, 6, 4, 5)); |
779 | | |
780 | 12 | t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); |
781 | | |
782 | 12 | t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); |
783 | | |
784 | 12 | t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); |
785 | | |
786 | 12 | t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); |
787 | | |
788 | 12 | return _mm256_or_si256(t1, t3); |
789 | | /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV |
790 | | * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS |
791 | | * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP |
792 | | * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM |
793 | | * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
794 | | * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
795 | | * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
796 | | * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ |
797 | 12 | } |
798 | | |
799 | | static __m256i php_base64_encode_avx2_translate(__m256i in) |
800 | 12 | { |
801 | 12 | __m256i lut, indices, mask; |
802 | | |
803 | 12 | lut = _mm256_setr_epi8( |
804 | 12 | 65, 71, -4, -4, -4, -4, -4, -4, |
805 | 12 | -4, -4, -4, -4, -19, -16, 0, 0, |
806 | 12 | 65, 71, -4, -4, -4, -4, -4, -4, |
807 | 12 | -4, -4, -4, -4, -19, -16, 0, 0); |
808 | | |
809 | 12 | indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); |
810 | | |
811 | 12 | mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25)); |
812 | | |
813 | 12 | indices = _mm256_sub_epi8(indices, mask); |
814 | | |
815 | 12 | return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); |
816 | | |
817 | 12 | } |
818 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */ |
819 | | |
820 | | #if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
821 | | |
822 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
823 | | static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); |
824 | | static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3"))); |
825 | | # endif |
826 | | |
827 | | static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) |
828 | 0 | { |
829 | 0 | __m128i t0, t1, t2, t3; |
830 | | |
831 | | /* input, bytes MSB to LSB: |
832 | | * 0 0 0 0 l k j i h g f e d c b a */ |
833 | 0 | in = _mm_shuffle_epi8(in, _mm_set_epi8( |
834 | 0 | 10, 11, 9, 10, |
835 | 0 | 7, 8, 6, 7, |
836 | 0 | 4, 5, 3, 4, |
837 | 0 | 1, 2, 0, 1)); |
838 | |
|
839 | 0 | t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); |
840 | |
|
841 | 0 | t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); |
842 | |
|
843 | 0 | t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); |
844 | |
|
845 | 0 | t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); |
846 | | |
847 | | /* output (upper case are MSB, lower case are LSB): |
848 | | * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
849 | | * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
850 | | * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
851 | | * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ |
852 | 0 | return _mm_or_si128(t1, t3); |
853 | 0 | } |
854 | | |
855 | | static __m128i php_base64_encode_ssse3_translate(__m128i in) |
856 | 0 | { |
857 | 0 | __m128i mask, indices; |
858 | 0 | __m128i lut = _mm_setr_epi8( |
859 | 0 | 65, 71, -4, -4, |
860 | 0 | -4, -4, -4, -4, |
861 | 0 | -4, -4, -4, -4, |
862 | 0 | -19, -16, 0, 0 |
863 | 0 | ); |
864 | | |
865 | | /* Translate values 0..63 to the Base64 alphabet. There are five sets: |
866 | | * # From To Abs Index Characters |
867 | | * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ |
868 | | * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz |
869 | | * 2 [52..61] [48..57] -4 [2..11] 0123456789 |
870 | | * 3 [62] [43] -19 12 + |
871 | | * 4 [63] [47] -16 13 / */ |
872 | | |
873 | | /* Create LUT indices from input: |
874 | | * the index for range #0 is right, others are 1 less than expected: */ |
875 | 0 | indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); |
876 | | |
877 | | /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */ |
878 | 0 | mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25)); |
879 | | |
880 | | /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */ |
881 | 0 | indices = _mm_sub_epi8(indices, mask); |
882 | | |
883 | | /* Add offsets to input values: */ |
884 | 0 | return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); |
885 | 0 | } |
886 | | |
887 | | #define PHP_BASE64_ENCODE_SSSE3_LOOP \ |
888 | 0 | while (length > 15) { \ |
889 | 0 | __m128i s = _mm_loadu_si128((__m128i *)c); \ |
890 | 0 | \ |
891 | 0 | s = php_base64_encode_ssse3_reshuffle(s); \ |
892 | 0 | \ |
893 | 0 | s = php_base64_encode_ssse3_translate(s); \ |
894 | 0 | \ |
895 | 0 | _mm_storeu_si128((__m128i *)o, s); \ |
896 | 0 | c += 12; \ |
897 | 0 | o += 16; \ |
898 | 0 | length -= 12; \ |
899 | 0 | } |
900 | | |
901 | | #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */ |
902 | | |
903 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
904 | | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE) |
905 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) |
906 | | # elif defined(ZEND_INTRIN_AVX2_RESOLVER) |
907 | | zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags) |
908 | | # else /* ZEND_INTRIN_SSSE3_RESOLVER */ |
909 | | zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags) |
910 | | # endif |
911 | 9 | { |
912 | 9 | const unsigned char *c = str; |
913 | 9 | unsigned char *o; |
914 | 9 | zend_string *result; |
915 | | |
916 | 9 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
917 | 9 | o = (unsigned char *)ZSTR_VAL(result); |
918 | 9 | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
919 | 9 | if (length > 31) { |
920 | 3 | __m256i s = _mm256_loadu_si256((__m256i *)c); |
921 | | |
922 | 3 | s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); |
923 | | |
924 | 12 | for (;;) { |
925 | 12 | s = php_base64_encode_avx2_reshuffle(s); |
926 | | |
927 | 12 | s = php_base64_encode_avx2_translate(s); |
928 | | |
929 | 12 | _mm256_storeu_si256((__m256i *)o, s); |
930 | 12 | c += 24; |
931 | 12 | o += 32; |
932 | 12 | length -= 24; |
933 | 12 | if (length < 28) { |
934 | 3 | break; |
935 | 3 | } |
936 | 9 | s = _mm256_loadu_si256((__m256i *)(c - 4)); |
937 | 9 | } |
938 | 3 | } |
939 | | # else |
940 | | PHP_BASE64_ENCODE_SSSE3_LOOP; |
941 | | # endif |
942 | | |
943 | 9 | o = php_base64_encode_impl(c, length, o, flags); |
944 | | |
945 | 9 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
946 | | |
947 | 9 | return result; |
948 | 9 | } |
949 | | |
950 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER) |
951 | | zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags) |
952 | 0 | { |
953 | 0 | const unsigned char *c = str; |
954 | 0 | unsigned char *o; |
955 | 0 | zend_string *result; |
956 | |
|
957 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
958 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
959 | |
|
960 | 0 | PHP_BASE64_ENCODE_SSSE3_LOOP; |
961 | |
|
962 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
963 | |
|
964 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
965 | |
|
966 | 0 | return result; |
967 | 0 | } |
968 | | # endif |
969 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ |
970 | | |
971 | | /* }}} */ |
972 | | |
973 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
974 | | # if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
975 | | static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); |
976 | | # endif |
977 | | |
978 | | static __m256i php_base64_decode_avx2_reshuffle(__m256i in) |
979 | 0 | { |
980 | 0 | __m256i merge_ab_and_bc, out; |
981 | |
|
982 | 0 | merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); |
983 | |
|
984 | 0 | out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); |
985 | |
|
986 | 0 | out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( |
987 | 0 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, |
988 | 0 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); |
989 | |
|
990 | 0 | return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); |
991 | 0 | } |
992 | | #endif |
993 | | |
994 | | #if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
995 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
996 | | static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); |
997 | | # endif |
998 | | |
999 | | static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) |
1000 | 0 | { |
1001 | 0 | __m128i merge_ab_and_bc, out; |
1002 | |
|
1003 | 0 | merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140)); |
1004 | | /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK |
1005 | | * 0000hhhh IIiiiiii 0000GGGG GGggHHHH |
1006 | | * 0000eeee FFffffff 0000DDDD DDddEEEE |
1007 | | * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */ |
1008 | |
|
1009 | 0 | out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000)); |
1010 | | /* 00000000 JJJJJJjj KKKKkkkk LLllllll |
1011 | | * 00000000 GGGGGGgg HHHHhhhh IIiiiiii |
1012 | | * 00000000 DDDDDDdd EEEEeeee FFffffff |
1013 | | * 00000000 AAAAAAaa BBBBbbbb CCcccccc */ |
1014 | |
|
1015 | 0 | return _mm_shuffle_epi8(out, _mm_setr_epi8( |
1016 | 0 | 2, 1, 0, |
1017 | 0 | 6, 5, 4, |
1018 | 0 | 10, 9, 8, |
1019 | 0 | 14, 13, 12, |
1020 | 0 | -1, -1, -1, -1)); |
1021 | | /* 00000000 00000000 00000000 00000000 |
1022 | | * LLllllll KKKKkkkk JJJJJJjj IIiiiiii |
1023 | | * HHHHhhhh GGGGGGgg FFffffff EEEEeeee |
1024 | | * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */ |
1025 | 0 | } |
1026 | | |
1027 | | #define PHP_BASE64_DECODE_SSSE3_LOOP \ |
1028 | 0 | while (length > 15 + 6 + 2) { \ |
1029 | 0 | __m128i lut_lo, lut_hi, lut_roll; \ |
1030 | 0 | __m128i hi_nibbles, lo_nibbles, hi, lo; \ |
1031 | 0 | __m128i s = _mm_loadu_si128((__m128i *)c); \ |
1032 | 0 | \ |
1033 | 0 | lut_lo = _mm_setr_epi8( \ |
1034 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ |
1035 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ |
1036 | 0 | lut_hi = _mm_setr_epi8( \ |
1037 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ |
1038 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ |
1039 | 0 | lut_roll = _mm_setr_epi8( \ |
1040 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, \ |
1041 | 0 | 0, 0, 0, 0, 0, 0, 0, 0); \ |
1042 | 0 | \ |
1043 | 0 | hi_nibbles = _mm_and_si128( \ |
1044 | 0 | _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \ |
1045 | 0 | lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \ |
1046 | 0 | hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \ |
1047 | 0 | lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \ |
1048 | 0 | \ |
1049 | 0 | \ |
1050 | 0 | if (UNEXPECTED( \ |
1051 | 0 | _mm_movemask_epi8( \ |
1052 | 0 | _mm_cmpgt_epi8( \ |
1053 | 0 | _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \ |
1054 | 0 | break; \ |
1055 | 0 | } else { \ |
1056 | 0 | __m128i eq_2f, roll; \ |
1057 | 0 | \ |
1058 | 0 | eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \ |
1059 | 0 | roll = _mm_shuffle_epi8( \ |
1060 | 0 | lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \ |
1061 | 0 | \ |
1062 | 0 | s = _mm_add_epi8(s, roll); \ |
1063 | 0 | s = php_base64_decode_ssse3_reshuffle(s); \ |
1064 | 0 | \ |
1065 | 0 | _mm_storeu_si128((__m128i *)o, s); \ |
1066 | 0 | \ |
1067 | 0 | c += 16; \ |
1068 | 0 | o += 12; \ |
1069 | 0 | outl += 12; \ |
1070 | 0 | length -= 16; \ |
1071 | 0 | } \ |
1072 | 0 | } |
1073 | | |
1074 | | #endif |
1075 | | |
1076 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1077 | | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE) |
1078 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) |
1079 | | # elif defined(ZEND_INTRIN_AVX2_RESOLVER) |
1080 | | zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict) |
1081 | | # else |
1082 | | zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict) |
1083 | | # endif |
1084 | 0 | { |
1085 | 0 | const unsigned char *c = str; |
1086 | 0 | unsigned char *o; |
1087 | 0 | size_t outl = 0; |
1088 | 0 | zend_string *result; |
1089 | |
|
1090 | 0 | result = zend_string_alloc(length, 0); |
1091 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
1092 | | |
1093 | | /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions" |
1094 | | * https://arxiv.org/pdf/1704.00605.pdf */ |
1095 | 0 | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
1096 | 0 | while (length > 31 + 11 + 2) { |
1097 | 0 | __m256i lut_lo, lut_hi, lut_roll; |
1098 | 0 | __m256i hi_nibbles, lo_nibbles, hi, lo; |
1099 | 0 | __m256i str = _mm256_loadu_si256((__m256i *)c); |
1100 | |
|
1101 | 0 | lut_lo = _mm256_setr_epi8( |
1102 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, |
1103 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, |
1104 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, |
1105 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); |
1106 | |
|
1107 | 0 | lut_hi = _mm256_setr_epi8( |
1108 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, |
1109 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, |
1110 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, |
1111 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); |
1112 | |
|
1113 | 0 | lut_roll = _mm256_setr_epi8( |
1114 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, |
1115 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, |
1116 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, |
1117 | 0 | 0, 0, 0, 0, 0, 0, 0, 0); |
1118 | |
|
1119 | 0 | hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f)); |
1120 | 0 | lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f)); |
1121 | 0 | hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); |
1122 | 0 | lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); |
1123 | |
|
1124 | 0 | if (!_mm256_testz_si256(lo, hi)) { |
1125 | 0 | break; |
1126 | 0 | } else { |
1127 | 0 | __m256i eq_2f, roll; |
1128 | 0 | eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f)); |
1129 | 0 | roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles)); |
1130 | | |
1131 | |
|
1132 | 0 | str = _mm256_add_epi8(str, roll); |
1133 | |
|
1134 | 0 | str = php_base64_decode_avx2_reshuffle(str); |
1135 | |
|
1136 | 0 | _mm256_storeu_si256((__m256i *)o, str); |
1137 | |
|
1138 | 0 | c += 32; |
1139 | 0 | o += 24; |
1140 | 0 | outl += 24; |
1141 | 0 | length -= 32; |
1142 | 0 | } |
1143 | 0 | } |
1144 | | # else |
1145 | | PHP_BASE64_DECODE_SSSE3_LOOP; |
1146 | | # endif |
1147 | |
|
1148 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1149 | 0 | zend_string_efree(result); |
1150 | 0 | return NULL; |
1151 | 0 | } |
1152 | | |
1153 | 0 | ZSTR_LEN(result) = outl; |
1154 | |
|
1155 | 0 | return result; |
1156 | 0 | } |
1157 | | |
1158 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER) |
1159 | | zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict) |
1160 | 0 | { |
1161 | 0 | const unsigned char *c = str; |
1162 | 0 | unsigned char *o; |
1163 | 0 | size_t outl = 0; |
1164 | 0 | zend_string *result; |
1165 | |
|
1166 | 0 | result = zend_string_alloc(length, 0); |
1167 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
1168 | |
|
1169 | 0 | PHP_BASE64_DECODE_SSSE3_LOOP; |
1170 | |
|
1171 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1172 | 0 | zend_string_efree(result); |
1173 | 0 | return NULL; |
1174 | 0 | } |
1175 | | |
1176 | 0 | ZSTR_LEN(result) = outl; |
1177 | |
|
1178 | 0 | return result; |
1179 | 0 | } |
1180 | | # endif |
1181 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ |
1182 | | |
1183 | | #if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE) |
1184 | | #if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1185 | | zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags) |
1186 | | #else |
1187 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) |
1188 | | #endif |
1189 | 0 | { |
1190 | 0 | unsigned char *p; |
1191 | 0 | zend_string *result; |
1192 | |
|
1193 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
1194 | 0 | p = (unsigned char *)ZSTR_VAL(result); |
1195 | |
|
1196 | 0 | p = php_base64_encode_impl(str, length, p, flags); |
1197 | |
|
1198 | 0 | ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); |
1199 | |
|
1200 | 0 | return result; |
1201 | 0 | } |
1202 | | #endif |
1203 | | |
1204 | | #if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE) |
1205 | | #if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1206 | | zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict) |
1207 | | #else |
1208 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) |
1209 | | #endif |
1210 | 0 | { |
1211 | 0 | zend_string *result; |
1212 | 0 | size_t outl = 0; |
1213 | |
|
1214 | 0 | result = zend_string_alloc(length, 0); |
1215 | |
|
1216 | 0 | if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1217 | 0 | zend_string_efree(result); |
1218 | 0 | return NULL; |
1219 | 0 | } |
1220 | | |
1221 | 0 | ZSTR_LEN(result) = outl; |
1222 | |
|
1223 | 0 | return result; |
1224 | 0 | } |
1225 | | #endif |
1226 | | /* }}} */ |
1227 | | |
1228 | | /* {{{ Encodes string using MIME base64 algorithm */ |
1229 | | PHP_FUNCTION(base64_encode) |
1230 | 9 | { |
1231 | 9 | char *str; |
1232 | 9 | size_t str_len; |
1233 | 9 | zend_string *result; |
1234 | | |
1235 | 27 | ZEND_PARSE_PARAMETERS_START(1, 1) |
1236 | 36 | Z_PARAM_STRING(str, str_len) |
1237 | 9 | ZEND_PARSE_PARAMETERS_END(); |
1238 | | |
1239 | 9 | result = php_base64_encode((unsigned char*)str, str_len); |
1240 | 9 | RETURN_STR(result); |
1241 | 9 | } |
1242 | | /* }}} */ |
1243 | | |
1244 | | /* {{{ Decodes string using MIME base64 algorithm */ |
1245 | | PHP_FUNCTION(base64_decode) |
1246 | 0 | { |
1247 | 0 | char *str; |
1248 | 0 | bool strict = 0; |
1249 | 0 | size_t str_len; |
1250 | 0 | zend_string *result; |
1251 | |
|
1252 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
1253 | 0 | Z_PARAM_STRING(str, str_len) |
1254 | 0 | Z_PARAM_OPTIONAL |
1255 | 0 | Z_PARAM_BOOL(strict) |
1256 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1257 | | |
1258 | 0 | result = php_base64_decode_ex((unsigned char*)str, str_len, strict); |
1259 | 0 | if (result != NULL) { |
1260 | 0 | RETURN_STR(result); |
1261 | 0 | } else { |
1262 | 0 | RETURN_FALSE; |
1263 | 0 | } |
1264 | 0 | } |
1265 | | /* }}} */ |