/src/php-src/ext/standard/base64.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | +----------------------------------------------------------------------+ |
3 | | | Copyright (c) The PHP Group | |
4 | | +----------------------------------------------------------------------+ |
5 | | | This source file is subject to version 3.01 of the PHP license, | |
6 | | | that is bundled with this package in the file LICENSE, and is | |
7 | | | available through the world-wide-web at the following url: | |
8 | | | https://www.php.net/license/3_01.txt | |
9 | | | If you did not receive a copy of the PHP license and are unable to | |
10 | | | obtain it through the world-wide-web, please send a note to | |
11 | | | license@php.net so we can mail you a copy immediately. | |
12 | | +----------------------------------------------------------------------+ |
13 | | | Author: Jim Winstead <jimw@php.net> | |
14 | | | Xinchen Hui <laruence@php.net> | |
15 | | +----------------------------------------------------------------------+ |
16 | | */ |
17 | | |
18 | | #include <string.h> |
19 | | |
20 | | #include "php.h" |
21 | | #include "base64.h" |
22 | | |
23 | | /* {{{ base64 tables */ |
24 | | static const char base64_table[] = { |
25 | | 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', |
26 | | 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', |
27 | | 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', |
28 | | 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', |
29 | | '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0' |
30 | | }; |
31 | | |
32 | | static const char base64_pad = '='; |
33 | | |
34 | | static const short base64_reverse_table[256] = { |
35 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, |
36 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
37 | | -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63, |
38 | | 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2, |
39 | | -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, |
40 | | 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2, |
41 | | -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, |
42 | | 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2, |
43 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
44 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
45 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
46 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
47 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
48 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
49 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, |
50 | | -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 |
51 | | }; |
52 | | /* }}} */ |
53 | | |
54 | | #if defined(__aarch64__) || defined(_M_ARM64) |
55 | | #include <arm_neon.h> |
56 | | |
57 | | static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT) |
58 | | { |
59 | | /* reduce 0..51 -> 0 |
60 | | 52..61 -> 1 .. 10 |
61 | | 62 -> 11 |
62 | | 63 -> 12 */ |
63 | | uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51)); |
64 | | /* distinguish between ranges 0..25 and 26..51: |
65 | | 0 .. 25 -> remains 0 |
66 | | 26 .. 51 -> becomes 13 */ |
67 | | const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input); |
68 | | result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13))); |
69 | | /* read shift */ |
70 | | result = vqtbl2q_u8(shift_LUT, result); |
71 | | return vaddq_u8(result, input); |
72 | | } |
73 | | |
74 | | static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) |
75 | | { |
76 | | const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52, |
77 | | '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
78 | | '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
79 | | '/' - 63, 'A', 0, 0, |
80 | | 'a' - 26, '0' - 52, '0' - 52, '0' - 52, |
81 | | '0' - 52, '0' - 52, '0' - 52, '0' - 52, |
82 | | '0' - 52, '0' - 52, '0' - 52, '+' - 62, |
83 | | '/' - 63, 'A', 0, 0}; |
84 | | const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_); |
85 | | do { |
86 | | /* [ccdddddd | bbbbcccc | aaaaaabb] |
87 | | x.val[2] | x.val[1] | x.val[0] */ |
88 | | const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in)); |
89 | | |
90 | | /* [00aa_aaaa] */ |
91 | | const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2); |
92 | | |
93 | | const uint8x16_t field_b = /* [00bb_bbbb] */ |
94 | | vbslq_u8(vdupq_n_u8(0x30), /* [0011_0000] */ |
95 | | vshlq_n_u8(x.val[0], 4), /* [aabb_0000] */ |
96 | | vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */ |
97 | | |
98 | | const uint8x16_t field_c = /* [00cc_cccc] */ |
99 | | vbslq_u8(vdupq_n_u8(0x3c), /* [0011_1100] */ |
100 | | vshlq_n_u8(x.val[1], 2), /* [bbcc_cc00] */ |
101 | | vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */ |
102 | | |
103 | | /* [00dd_dddd] */ |
104 | | const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f)); |
105 | | |
106 | | uint8x16x4_t result; |
107 | | result.val[0] = encode_toascii(field_a, shift_LUT); |
108 | | result.val[1] = encode_toascii(field_b, shift_LUT); |
109 | | result.val[2] = encode_toascii(field_c, shift_LUT); |
110 | | result.val[3] = encode_toascii(field_d, shift_LUT); |
111 | | |
112 | | vst4q_u8((uint8_t *)out, result); |
113 | | out += 64; |
114 | | in += 16 * 3; |
115 | | inl -= 16 * 3; |
116 | | } while (inl >= 16 * 3); |
117 | | |
118 | | *left = inl; |
119 | | return out; |
120 | | } |
121 | | #endif /* defined(__aarch64__) || defined(_M_ARM64) */ |
122 | | |
123 | | static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out, zend_long flags) /* {{{ */ |
124 | 12 | { |
125 | | #if defined(__aarch64__) || defined(_M_ARM64) |
126 | | if (inl >= 16 * 3) { |
127 | | size_t left = 0; |
128 | | out = neon_base64_encode(in, inl, out, &left); |
129 | | in += inl - left; |
130 | | inl = left; |
131 | | } |
132 | | #endif |
133 | | |
134 | 79 | while (inl > 2) { /* keep going until we have less than 24 bits */ |
135 | 67 | *out++ = base64_table[in[0] >> 2]; |
136 | 67 | *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; |
137 | 67 | *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)]; |
138 | 67 | *out++ = base64_table[in[2] & 0x3f]; |
139 | | |
140 | 67 | in += 3; |
141 | 67 | inl -= 3; /* we just handle 3 octets of data */ |
142 | 67 | } |
143 | | |
144 | | /* now deal with the tail end of things */ |
145 | 12 | if (inl != 0) { |
146 | 12 | *out++ = base64_table[in[0] >> 2]; |
147 | 12 | if (inl > 1) { |
148 | 10 | *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)]; |
149 | 10 | *out++ = base64_table[(in[1] & 0x0f) << 2]; |
150 | 10 | if ((flags & PHP_BASE64_NO_PADDING) == 0) { |
151 | 10 | *out++ = base64_pad; |
152 | 10 | } |
153 | 10 | } else { |
154 | 2 | *out++ = base64_table[(in[0] & 0x03) << 4]; |
155 | 2 | if ((flags & PHP_BASE64_NO_PADDING) == 0) { |
156 | 2 | *out++ = base64_pad; |
157 | 2 | *out++ = base64_pad; |
158 | 2 | } |
159 | 2 | } |
160 | 12 | } |
161 | | |
162 | 12 | *out = '\0'; |
163 | | |
164 | 12 | return out; |
165 | 12 | } |
166 | | /* }}} */ |
167 | | |
168 | | #if defined(__aarch64__) || defined(_M_ARM64) |
169 | | static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) { |
170 | | const uint8x16_t higher_nibble = vshrq_n_u8(input, 4); |
171 | | const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f)); |
172 | | const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble); |
173 | | const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f)); |
174 | | const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh); |
175 | | const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble); |
176 | | const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble); |
177 | | *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0)); |
178 | | return vaddq_u8(input, shift); |
179 | | } |
180 | | |
181 | | static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) { |
182 | | unsigned char *out_orig = out; |
183 | | const uint8_t shiftLUT_[32] = { |
184 | | 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71, |
185 | | 0, 0, 0, 0, 0, 0, 0, 0, |
186 | | 0, 0, 19, 4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71, |
187 | | 0, 0, 0, 0, 0, 0, 0, 0}; |
188 | | const uint8_t maskLUT_[32] = { |
189 | | /* 0 : 0b1010_1000*/ 0xa8, |
190 | | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
191 | | /* 10 : 0b1111_0000*/ 0xf0, |
192 | | /* 11 : 0b0101_0100*/ 0x54, |
193 | | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
194 | | /* 15 : 0b0101_0100*/ 0x54, |
195 | | |
196 | | /* 0 : 0b1010_1000*/ 0xa8, |
197 | | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
198 | | /* 10 : 0b1111_0000*/ 0xf0, |
199 | | /* 11 : 0b0101_0100*/ 0x54, |
200 | | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
201 | | /* 15 : 0b0101_0100*/ 0x54 |
202 | | }; |
203 | | const uint8_t bitposLUT_[32] = { |
204 | | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
205 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, |
206 | | |
207 | | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
208 | | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 |
209 | | }; |
210 | | const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_); |
211 | | const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_); |
212 | | const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);; |
213 | | |
214 | | do { |
215 | | const uint8x16x4_t x = vld4q_u8((const unsigned char *)in); |
216 | | uint8x16_t error_a; |
217 | | uint8x16_t error_b; |
218 | | uint8x16_t error_c; |
219 | | uint8x16_t error_d; |
220 | | uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT); |
221 | | uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT); |
222 | | uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT); |
223 | | uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT); |
224 | | |
225 | | const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d)); |
226 | | union {uint8_t mem[16]; uint64_t dw[2]; } error; |
227 | | vst1q_u8(error.mem, err); |
228 | | |
229 | | /* Check that the input only contains bytes belonging to the alphabet of |
230 | | Base64. If there are errors, decode the rest of the string with the |
231 | | scalar decoder. */ |
232 | | if (error.dw[0] | error.dw[1]) |
233 | | break; |
234 | | |
235 | | uint8x16x3_t result; |
236 | | result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2)); |
237 | | result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4)); |
238 | | result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6)); |
239 | | |
240 | | vst3q_u8((unsigned char *)out, result); |
241 | | out += 16 * 3; |
242 | | in += 16 * 4; |
243 | | inl -= 16 * 4; |
244 | | } while (inl >= 16 * 4); |
245 | | *left = inl; |
246 | | return out - out_orig; |
247 | | } |
248 | | #endif /* defined(__aarch64__) || defined(_M_ARM64) */ |
249 | | |
250 | | static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */ |
251 | 0 | { |
252 | 0 | int ch; |
253 | 0 | size_t i = 0, padding = 0, j = *outl; |
254 | |
|
255 | | #if defined(__aarch64__) || defined(_M_ARM64) |
256 | | if (inl >= 16 * 4) { |
257 | | size_t left = 0; |
258 | | j += neon_base64_decode(in, inl, out, &left); |
259 | | i = inl - left; |
260 | | in += i; |
261 | | inl = left; |
262 | | } |
263 | | #endif |
264 | | |
265 | | /* run through the whole string, converting as we go */ |
266 | 0 | while (inl-- > 0) { |
267 | 0 | ch = *in++; |
268 | 0 | if (ch == base64_pad) { |
269 | 0 | padding++; |
270 | 0 | continue; |
271 | 0 | } |
272 | | |
273 | 0 | ch = base64_reverse_table[ch]; |
274 | 0 | if (!strict) { |
275 | | /* skip unknown characters and whitespace */ |
276 | 0 | if (ch < 0) { |
277 | 0 | continue; |
278 | 0 | } |
279 | 0 | } else { |
280 | | /* skip whitespace */ |
281 | 0 | if (ch == -1) { |
282 | 0 | continue; |
283 | 0 | } |
284 | | /* fail on bad characters or if any data follows padding */ |
285 | 0 | if (ch == -2 || padding) { |
286 | 0 | goto fail; |
287 | 0 | } |
288 | 0 | } |
289 | | |
290 | 0 | switch (i % 4) { |
291 | 0 | case 0: |
292 | 0 | out[j] = ch << 2; |
293 | 0 | break; |
294 | 0 | case 1: |
295 | 0 | out[j++] |= ch >> 4; |
296 | 0 | out[j] = (ch & 0x0f) << 4; |
297 | 0 | break; |
298 | 0 | case 2: |
299 | 0 | out[j++] |= ch >>2; |
300 | 0 | out[j] = (ch & 0x03) << 6; |
301 | 0 | break; |
302 | 0 | case 3: |
303 | 0 | out[j++] |= ch; |
304 | 0 | break; |
305 | 0 | } |
306 | 0 | i++; |
307 | 0 | } |
308 | | |
309 | | /* fail if the input is truncated (only one char in last group) */ |
310 | 0 | if (strict && i % 4 == 1) { |
311 | 0 | goto fail; |
312 | 0 | } |
313 | | |
314 | | /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding |
315 | | * RFC 4648: "In some circumstances, the use of padding [--] is not required" */ |
316 | 0 | if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) { |
317 | 0 | goto fail; |
318 | 0 | } |
319 | | |
320 | 0 | *outl = j; |
321 | 0 | out[j] = '\0'; |
322 | |
|
323 | 0 | return 1; |
324 | | |
325 | 0 | fail: |
326 | 0 | return 0; |
327 | 0 | } |
328 | | /* }}} */ |
329 | | |
330 | | /* {{{ php_base64_encode */ |
331 | | |
332 | | #ifdef ZEND_INTRIN_AVX2_NATIVE |
333 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
334 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
335 | | # undef ZEND_INTRIN_SSSE3_FUNC_PROTO |
336 | | # undef ZEND_INTRIN_SSSE3_FUNC_PTR |
337 | | #elif defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_SSSE3_NATIVE) |
338 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
339 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
340 | | # define ZEND_INTRIN_SSSE3_RESOLVER 1 |
341 | | # define ZEND_INTRIN_SSSE3_FUNC_PROTO 1 |
342 | | # undef ZEND_INTRIN_SSSE3_FUNC_DECL |
343 | | # ifdef HAVE_FUNC_ATTRIBUTE_TARGET |
344 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) |
345 | | # else |
346 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func |
347 | | # endif |
348 | | #elif defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_SSSE3_NATIVE) |
349 | | # undef ZEND_INTRIN_SSSE3_NATIVE |
350 | | # undef ZEND_INTRIN_SSSE3_RESOLVER |
351 | | # define ZEND_INTRIN_SSSE3_RESOLVER 1 |
352 | | # define ZEND_INTRIN_SSSE3_FUNC_PTR 1 |
353 | | # undef ZEND_INTRIN_SSSE3_FUNC_DECL |
354 | | # ifdef HAVE_FUNC_ATTRIBUTE_TARGET |
355 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3"))) |
356 | | # else |
357 | | # define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func |
358 | | # endif |
359 | | #endif |
360 | | |
361 | | /* Only enable avx512 resolver if avx2 use resolver also */ |
362 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_FUNC_PROTO) |
363 | | #define BASE64_INTRIN_AVX512_FUNC_PROTO 1 |
364 | | #endif |
365 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_FUNC_PTR) |
366 | | #define BASE64_INTRIN_AVX512_FUNC_PTR 1 |
367 | | #endif |
368 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PROTO) |
369 | | #define BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO 1 |
370 | | #endif |
371 | | #if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PTR) |
372 | | #define BASE64_INTRIN_AVX512_VBMI_FUNC_PTR 1 |
373 | | #endif |
374 | | |
375 | | #ifdef ZEND_INTRIN_AVX2_NATIVE |
376 | | # include <immintrin.h> |
377 | | #elif defined(ZEND_INTRIN_SSSE3_NATIVE) |
378 | | # include <tmmintrin.h> |
379 | | #elif defined(ZEND_INTRIN_SSSE3_RESOLVER) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
380 | | # ifdef ZEND_INTRIN_AVX2_RESOLVER |
381 | | # include <immintrin.h> |
382 | | # else |
383 | | # include <tmmintrin.h> |
384 | | # endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */ |
385 | | # include "Zend/zend_cpuinfo.h" |
386 | | |
387 | | # if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR) |
388 | | ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags)); |
389 | | ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict)); |
390 | | # endif |
391 | | # if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR) |
392 | | ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags)); |
393 | | ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict)); |
394 | | # endif |
395 | | |
396 | | # ifdef ZEND_INTRIN_AVX2_RESOLVER |
397 | | ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags)); |
398 | | ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict)); |
399 | | # endif |
400 | | |
401 | | # ifdef ZEND_INTRIN_SSSE3_RESOLVER |
402 | | ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)); |
403 | | ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)); |
404 | | # endif |
405 | | |
406 | | zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags); |
407 | | zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict); |
408 | | |
409 | | # if (defined(ZEND_INTRIN_AVX2_FUNC_PROTO) || defined(ZEND_INTRIN_SSSE3_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO)) |
410 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) __attribute__((ifunc("resolve_base64_encode"))); |
411 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) __attribute__((ifunc("resolve_base64_decode"))); |
412 | | |
413 | | typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t, zend_long flags); |
414 | | typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, bool); |
415 | | |
416 | | ZEND_NO_SANITIZE_ADDRESS |
417 | | ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ |
418 | 16 | static base64_encode_func_t resolve_base64_encode(void) { |
419 | 16 | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO |
420 | 16 | if (zend_cpu_supports_avx512_vbmi()) { |
421 | 0 | return php_base64_encode_avx512_vbmi; |
422 | 0 | } else |
423 | 16 | # endif |
424 | 16 | # ifdef BASE64_INTRIN_AVX512_FUNC_PROTO |
425 | 16 | if (zend_cpu_supports_avx512()) { |
426 | 0 | return php_base64_encode_avx512; |
427 | 0 | } else |
428 | 16 | # endif |
429 | 16 | # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO |
430 | 16 | if (zend_cpu_supports_avx2()) { |
431 | 16 | return php_base64_encode_avx2; |
432 | 16 | } else |
433 | 0 | # endif |
434 | 0 | #ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO |
435 | 0 | if (zend_cpu_supports_ssse3()) { |
436 | 0 | return php_base64_encode_ssse3; |
437 | 0 | } |
438 | 0 | #endif |
439 | 0 | return php_base64_encode_default; |
440 | 16 | } |
441 | | |
442 | | ZEND_NO_SANITIZE_ADDRESS |
443 | | ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */ |
444 | 16 | static base64_decode_func_t resolve_base64_decode(void) { |
445 | 16 | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO |
446 | 16 | if (zend_cpu_supports_avx512_vbmi()) { |
447 | 0 | return php_base64_decode_ex_avx512_vbmi; |
448 | 0 | } else |
449 | 16 | # endif |
450 | 16 | # ifdef BASE64_INTRIN_AVX512_FUNC_PROTO |
451 | 16 | if (zend_cpu_supports_avx512()) { |
452 | 0 | return php_base64_decode_ex_avx512; |
453 | 0 | } else |
454 | 16 | # endif |
455 | 16 | # ifdef ZEND_INTRIN_AVX2_FUNC_PROTO |
456 | 16 | if (zend_cpu_supports_avx2()) { |
457 | 16 | return php_base64_decode_ex_avx2; |
458 | 16 | } else |
459 | 0 | # endif |
460 | 0 | #ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO |
461 | 0 | if (zend_cpu_supports_ssse3()) { |
462 | 0 | return php_base64_decode_ex_ssse3; |
463 | 0 | } |
464 | 0 | #endif |
465 | 0 | return php_base64_decode_ex_default; |
466 | 16 | } |
467 | | # else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ |
468 | | |
469 | | PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length, zend_long flags) = NULL; |
470 | | PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, bool strict) = NULL; |
471 | | |
472 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) { |
473 | | return php_base64_encode_ptr(str, length, flags); |
474 | | } |
475 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) { |
476 | | return php_base64_decode_ex_ptr(str, length, strict); |
477 | | } |
478 | | |
479 | | PHP_MINIT_FUNCTION(base64_intrin) |
480 | | { |
481 | | # ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PTR |
482 | | if (zend_cpu_supports_avx512_vbmi()) { |
483 | | php_base64_encode_ptr = php_base64_encode_avx512_vbmi; |
484 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx512_vbmi; |
485 | | } else |
486 | | # endif |
487 | | # ifdef BASE64_INTRIN_AVX512_FUNC_PTR |
488 | | if (zend_cpu_supports_avx512()) { |
489 | | php_base64_encode_ptr = php_base64_encode_avx512; |
490 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx512; |
491 | | } else |
492 | | # endif |
493 | | # ifdef ZEND_INTRIN_AVX2_FUNC_PTR |
494 | | if (zend_cpu_supports_avx2()) { |
495 | | php_base64_encode_ptr = php_base64_encode_avx2; |
496 | | php_base64_decode_ex_ptr = php_base64_decode_ex_avx2; |
497 | | } else |
498 | | # endif |
499 | | #ifdef ZEND_INTRIN_SSSE3_FUNC_PTR |
500 | | if (zend_cpu_supports_ssse3()) { |
501 | | php_base64_encode_ptr = php_base64_encode_ssse3; |
502 | | php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3; |
503 | | } else |
504 | | #endif |
505 | | { |
506 | | php_base64_encode_ptr = php_base64_encode_default; |
507 | | php_base64_decode_ex_ptr = php_base64_decode_ex_default; |
508 | | } |
509 | | return SUCCESS; |
510 | | } |
511 | | # endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */ |
512 | | #endif /* ZEND_INTRIN_AVX2_NATIVE */ |
513 | | |
514 | | #if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR) |
515 | | zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags) |
516 | 0 | { |
517 | 0 | const unsigned char *c = str; |
518 | 0 | unsigned char *o; |
519 | 0 | zend_string *result; |
520 | |
|
521 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
522 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
523 | |
|
524 | 0 | const __m512i shuffle_splitting = _mm512_setr_epi32( |
525 | 0 | 0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10, |
526 | 0 | 0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122, |
527 | 0 | 0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e); |
528 | 0 | const __m512i multi_shifts = _mm512_set1_epi64(0x3036242a1016040a); |
529 | 0 | const char *ascii_lookup_tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; |
530 | 0 | const __m512i ascii_lookup = _mm512_loadu_si512((__m512i *)ascii_lookup_tbl); |
531 | |
|
532 | 0 | while (length > 63) { |
533 | | /* Step 1: load input data */ |
534 | 0 | __m512i str = _mm512_loadu_si512((const __m512i *)c); |
535 | | |
536 | | /* Step 2: splitting 24-bit words into 32-bit lanes */ |
537 | 0 | str = _mm512_permutexvar_epi8(shuffle_splitting, str); |
538 | | |
539 | | /* Step 3: moving 6-bit word to sperate bytes */ |
540 | 0 | str = _mm512_multishift_epi64_epi8(multi_shifts, str); |
541 | | |
542 | | /* Step 4: conversion to ASCII */ |
543 | 0 | str = _mm512_permutexvar_epi8(str, ascii_lookup); |
544 | | |
545 | | /* Step 5: store the final result */ |
546 | 0 | _mm512_storeu_si512((__m512i *)o, str); |
547 | 0 | c += 48; |
548 | 0 | o += 64; |
549 | 0 | length -= 48; |
550 | 0 | } |
551 | |
|
552 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
553 | |
|
554 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
555 | |
|
556 | 0 | return result; |
557 | 0 | } |
558 | | |
559 | | zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict) |
560 | 0 | { |
561 | 0 | const unsigned char *c = str; |
562 | 0 | unsigned char *o; |
563 | 0 | size_t outl = 0; |
564 | 0 | zend_string *result; |
565 | |
|
566 | 0 | result = zend_string_alloc(length, 0); |
567 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
568 | |
|
569 | 0 | const __m512i lookup_0 = _mm512_setr_epi32( |
570 | 0 | 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, |
571 | 0 | 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x3e808080, 0x3f808080, |
572 | 0 | 0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080); |
573 | 0 | const __m512i lookup_1 = _mm512_setr_epi32( |
574 | 0 | 0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, 0x1211100f, 0x16151413, |
575 | 0 | 0x80191817, 0x80808080, 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625, |
576 | 0 | 0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080); |
577 | |
|
578 | 0 | const __m512i merge_mask1 = _mm512_set1_epi32(0x01400140); |
579 | 0 | const __m512i merge_mask2 = _mm512_set1_epi32(0x00011000); |
580 | |
|
581 | 0 | const __m512i continuous_mask = _mm512_setr_epi32( |
582 | 0 | 0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, 0x191a1415, 0x1c1d1e18, |
583 | 0 | 0x26202122, 0x292a2425, 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38, |
584 | 0 | 0x00000000, 0x00000000, 0x00000000, 0x00000000); |
585 | |
|
586 | 0 | while (length > 64) { |
587 | | /* Step 1: load input data */ |
588 | 0 | const __m512i input = _mm512_loadu_si512((__m512i *)c); |
589 | | |
590 | | /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */ |
591 | 0 | __m512i str = _mm512_permutex2var_epi8(lookup_0, input, lookup_1); |
592 | 0 | const uint64_t mask = _mm512_movepi8_mask(_mm512_or_epi64(str, input)); /* convert MSBs to the mask */ |
593 | 0 | if (mask) { |
594 | 0 | break; |
595 | 0 | } |
596 | | |
597 | | /* Step 3: pack four fields within 32-bit words into 24-bit words. */ |
598 | 0 | const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, merge_mask1); |
599 | 0 | str = _mm512_madd_epi16(merge_ab_and_bc, merge_mask2); |
600 | | |
601 | | /* Step 4: move 3-byte words into the continuous array. */ |
602 | 0 | str = _mm512_permutexvar_epi8(continuous_mask, str); |
603 | | |
604 | | /* Step 5: store the final result */ |
605 | 0 | _mm512_storeu_si512((__m512i *)o, str); |
606 | |
|
607 | 0 | c += 64; |
608 | 0 | o += 48; |
609 | 0 | outl += 48; |
610 | 0 | length -= 64; |
611 | 0 | } |
612 | |
|
613 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
614 | 0 | zend_string_efree(result); |
615 | 0 | return NULL; |
616 | 0 | } |
617 | | |
618 | 0 | ZSTR_LEN(result) = outl; |
619 | |
|
620 | 0 | return result; |
621 | 0 | } |
622 | | #endif |
623 | | |
624 | | #if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR) |
625 | | zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags) |
626 | 0 | { |
627 | 0 | const unsigned char *c = str; |
628 | 0 | unsigned char *o; |
629 | 0 | zend_string *result; |
630 | |
|
631 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
632 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
633 | |
|
634 | 0 | while (length > 63) { |
635 | | /* Step 1: load input data */ |
636 | | /* [????|????|????|????|PPPO|OONN|NMMM|LLLK|KKJJ|JIII|HHHG|GGFF|FEEE|DDDC|CCBB|BAAA] */ |
637 | 0 | __m512i str = _mm512_loadu_si512((const __m512i *)c); |
638 | | |
639 | | /* Step 2: splitting 24-bit words into 32-bit lanes */ |
640 | | /* [0000|PPPO|OONN|NMMM|0000|LLLK|KKJJ|JIII|0000|HHHG|GGFF|FEEE|0000|DDDC|CCBB|BAAA] */ |
641 | 0 | str = _mm512_permutexvar_epi32( |
642 | 0 | _mm512_set_epi32(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0), str); |
643 | | /* [D1 D2 D0 D1|C1 C2 C0 C1|B1 B2 B0 B1|A1 A2 A0 A1] x 4 */ |
644 | 0 | str = _mm512_shuffle_epi8(str, _mm512_set4_epi32(0x0a0b090a, 0x07080607, 0x04050304, 0x01020001)); |
645 | | |
646 | | /* Step 3: moving 6-bit word to sperate bytes */ |
647 | | /* in: [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] */ |
648 | | /* t0: [0000cccc|cc000000|aaaaaa00|00000000] */ |
649 | 0 | const __m512i t0 = _mm512_and_si512(str, _mm512_set1_epi32(0x0fc0fc00)); |
650 | | /* t1: [00000000|00cccccc|00000000|00aaaaaa] */ |
651 | 0 | const __m512i t1 = _mm512_srlv_epi16(t0, _mm512_set1_epi32(0x0006000a)); |
652 | | /* t2: [ccdddddd|00000000|aabbbbbb|cccc0000] */ |
653 | 0 | const __m512i t2 = _mm512_sllv_epi16(str, _mm512_set1_epi32(0x00080004)); |
654 | | /* str: [00dddddd|00cccccc|00bbbbbb|00aaaaaa] */ |
655 | 0 | str = _mm512_ternarylogic_epi32(_mm512_set1_epi32(0x3f003f00), t2, t1, 0xca); |
656 | | |
657 | | /* Step 4: conversion to ASCII */ |
658 | 0 | __m512i result = _mm512_subs_epu8(str, _mm512_set1_epi8(51)); |
659 | 0 | const __mmask64 less = _mm512_cmpgt_epi8_mask(_mm512_set1_epi8(26), str); |
660 | 0 | result = _mm512_mask_mov_epi8(result, less, _mm512_set1_epi8(13)); |
661 | 0 | const __m512i lut = _mm512_set4_epi32(0x000041f0, 0xedfcfcfc, 0xfcfcfcfc, 0xfcfcfc47); |
662 | 0 | result = _mm512_shuffle_epi8(lut, result); |
663 | 0 | result = _mm512_add_epi8(result, str); |
664 | | |
665 | | /* Step 5: store the final result */ |
666 | 0 | _mm512_storeu_si512((__m512i *)o, result); |
667 | 0 | c += 48; |
668 | 0 | o += 64; |
669 | 0 | length -= 48; |
670 | 0 | } |
671 | |
|
672 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
673 | |
|
674 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
675 | |
|
676 | 0 | return result; |
677 | 0 | } |
678 | | |
679 | | #define build_dword(b0, b1, b2, b3) \ |
680 | | ((uint32_t)(uint8_t)b0 << 0) | ((uint32_t)(uint8_t)b1 << 8) | \ |
681 | | ((uint32_t)(uint8_t)b2 << 16) | ((uint32_t)(uint8_t)b3 << 24) |
682 | | |
683 | | #define _mm512_set4lanes_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \ |
684 | 0 | _mm512_setr4_epi32(build_dword(b0, b1, b2, b3), build_dword(b4, b5, b6, b7), \ |
685 | 0 | build_dword(b8, b9, b10, b11), build_dword(b12, b13, b14, b15)) |
686 | | |
687 | | zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict) |
688 | 0 | { |
689 | 0 | const unsigned char *c = str; |
690 | 0 | unsigned char *o; |
691 | 0 | size_t outl = 0; |
692 | 0 | zend_string *result; |
693 | |
|
694 | 0 | result = zend_string_alloc(length, 0); |
695 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
696 | |
|
697 | 0 | while (length > 64) { |
698 | | /* Step 1: load input data */ |
699 | 0 | __m512i str = _mm512_loadu_si512((__m512i *)c); |
700 | | |
701 | | /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */ |
702 | 0 | const __m512i higher_nibble = _mm512_and_si512(_mm512_srli_epi32(str, 4), _mm512_set1_epi8(0x0f)); |
703 | 0 | const __m512i lower_nibble = _mm512_and_si512(str, _mm512_set1_epi8(0x0f)); |
704 | 0 | const __m512i shiftLUT = _mm512_set4lanes_epi8( |
705 | 0 | 0, 0, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0); |
706 | 0 | const __m512i maskLUT = _mm512_set4lanes_epi8( |
707 | 0 | /* 0 : 0b1010_1000*/ 0xa8, |
708 | 0 | /* 1 .. 9 : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, |
709 | 0 | /* 10 : 0b1111_0000*/ 0xf0, |
710 | 0 | /* 11 : 0b0101_0100*/ 0x54, |
711 | 0 | /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50, |
712 | 0 | /* 15 : 0b0101_0100*/ 0x54); |
713 | 0 | const __m512i bitposLUT = _mm512_set4lanes_epi8( |
714 | 0 | 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, |
715 | 0 | 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00); |
716 | 0 | const __m512i M = _mm512_shuffle_epi8(maskLUT, lower_nibble); |
717 | 0 | const __m512i bit = _mm512_shuffle_epi8(bitposLUT, higher_nibble); |
718 | 0 | const uint64_t match = _mm512_test_epi8_mask(M, bit); |
719 | 0 | if (match != (uint64_t)-1) { |
720 | 0 | break; |
721 | 0 | } |
722 | 0 | const __m512i sh = _mm512_shuffle_epi8(shiftLUT, higher_nibble); |
723 | 0 | const __mmask64 eq_2f = _mm512_cmpeq_epi8_mask(str, _mm512_set1_epi8(0x2f)); |
724 | 0 | const __m512i shift = _mm512_mask_mov_epi8(sh, eq_2f, _mm512_set1_epi8(16)); |
725 | 0 | str = _mm512_add_epi8(str, shift); |
726 | | |
727 | | /* Step 3: pack four fields within 32-bit words into 24-bit words. */ |
728 | 0 | const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140)); |
729 | 0 | str = _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000)); |
730 | | |
731 | | /* Step 4: move 3-byte words into the continuous array. */ |
732 | 0 | const __m512i t1 = _mm512_shuffle_epi8(str, |
733 | 0 | _mm512_set4lanes_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); |
734 | 0 | const __m512i s6 = _mm512_setr_epi32(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0); |
735 | 0 | const __m512i t2 = _mm512_permutexvar_epi32(s6, t1); |
736 | | |
737 | | /* Step 5: store the final result */ |
738 | 0 | _mm512_storeu_si512((__m512i *)o, t2); |
739 | |
|
740 | 0 | c += 64; |
741 | 0 | o += 48; |
742 | 0 | outl += 48; |
743 | 0 | length -= 64; |
744 | 0 | } |
745 | |
|
746 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
747 | 0 | zend_string_efree(result); |
748 | 0 | return NULL; |
749 | 0 | } |
750 | | |
751 | 0 | ZSTR_LEN(result) = outl; |
752 | |
|
753 | 0 | return result; |
754 | 0 | } |
755 | | #endif |
756 | | |
757 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
758 | | # if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
759 | | static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); |
760 | | static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2"))); |
761 | | # endif |
762 | | static __m256i php_base64_encode_avx2_reshuffle(__m256i in) |
763 | 17 | { |
764 | | /* This one works with shifted (4 bytes) input in order to |
765 | | * be able to work efficiently in the 2 128-bit lanes */ |
766 | 17 | __m256i t0, t1, t2, t3; |
767 | | |
768 | | /* input, bytes MSB to LSB: |
769 | | * 0 0 0 0 x w v u t s r q p o n m |
770 | | * l k j i h g f e d c b a 0 0 0 0 */ |
771 | 17 | in = _mm256_shuffle_epi8(in, _mm256_set_epi8( |
772 | 17 | 10, 11, 9, 10, |
773 | 17 | 7, 8, 6, 7, |
774 | 17 | 4, 5, 3, 4, |
775 | 17 | 1, 2, 0, 1, |
776 | | |
777 | 17 | 14, 15, 13, 14, |
778 | 17 | 11, 12, 10, 11, |
779 | 17 | 8, 9, 7, 8, |
780 | 17 | 5, 6, 4, 5)); |
781 | | |
782 | 17 | t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00)); |
783 | | |
784 | 17 | t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040)); |
785 | | |
786 | 17 | t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0)); |
787 | | |
788 | 17 | t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010)); |
789 | | |
790 | 17 | return _mm256_or_si256(t1, t3); |
791 | | /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV |
792 | | * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS |
793 | | * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP |
794 | | * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM |
795 | | * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
796 | | * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
797 | | * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
798 | | * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ |
799 | 17 | } |
800 | | |
801 | | static __m256i php_base64_encode_avx2_translate(__m256i in) |
802 | 17 | { |
803 | 17 | __m256i lut, indices, mask; |
804 | | |
805 | 17 | lut = _mm256_setr_epi8( |
806 | 17 | 65, 71, -4, -4, -4, -4, -4, -4, |
807 | 17 | -4, -4, -4, -4, -19, -16, 0, 0, |
808 | 17 | 65, 71, -4, -4, -4, -4, -4, -4, |
809 | 17 | -4, -4, -4, -4, -19, -16, 0, 0); |
810 | | |
811 | 17 | indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51)); |
812 | | |
813 | 17 | mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25)); |
814 | | |
815 | 17 | indices = _mm256_sub_epi8(indices, mask); |
816 | | |
817 | 17 | return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices)); |
818 | | |
819 | 17 | } |
820 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */ |
821 | | |
822 | | #if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
823 | | |
824 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
825 | | static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); |
826 | | static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3"))); |
827 | | # endif |
828 | | |
829 | | static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) |
830 | 0 | { |
831 | 0 | __m128i t0, t1, t2, t3; |
832 | | |
833 | | /* input, bytes MSB to LSB: |
834 | | * 0 0 0 0 l k j i h g f e d c b a */ |
835 | 0 | in = _mm_shuffle_epi8(in, _mm_set_epi8( |
836 | 0 | 10, 11, 9, 10, |
837 | 0 | 7, 8, 6, 7, |
838 | 0 | 4, 5, 3, 4, |
839 | 0 | 1, 2, 0, 1)); |
840 | |
|
841 | 0 | t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00)); |
842 | |
|
843 | 0 | t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040)); |
844 | |
|
845 | 0 | t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0)); |
846 | |
|
847 | 0 | t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010)); |
848 | | |
849 | | /* output (upper case are MSB, lower case are LSB): |
850 | | * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ |
851 | | * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG |
852 | | * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD |
853 | | * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */ |
854 | 0 | return _mm_or_si128(t1, t3); |
855 | 0 | } |
856 | | |
857 | | static __m128i php_base64_encode_ssse3_translate(__m128i in) |
858 | 0 | { |
859 | 0 | __m128i mask, indices; |
860 | 0 | __m128i lut = _mm_setr_epi8( |
861 | 0 | 65, 71, -4, -4, |
862 | 0 | -4, -4, -4, -4, |
863 | 0 | -4, -4, -4, -4, |
864 | 0 | -19, -16, 0, 0 |
865 | 0 | ); |
866 | | |
867 | | /* Translate values 0..63 to the Base64 alphabet. There are five sets: |
868 | | * # From To Abs Index Characters |
869 | | * 0 [0..25] [65..90] +65 0 ABCDEFGHIJKLMNOPQRSTUVWXYZ |
870 | | * 1 [26..51] [97..122] +71 1 abcdefghijklmnopqrstuvwxyz |
871 | | * 2 [52..61] [48..57] -4 [2..11] 0123456789 |
872 | | * 3 [62] [43] -19 12 + |
873 | | * 4 [63] [47] -16 13 / */ |
874 | | |
875 | | /* Create LUT indices from input: |
876 | | * the index for range #0 is right, others are 1 less than expected: */ |
877 | 0 | indices = _mm_subs_epu8(in, _mm_set1_epi8(51)); |
878 | | |
879 | | /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */ |
880 | 0 | mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25)); |
881 | | |
882 | | /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */ |
883 | 0 | indices = _mm_sub_epi8(indices, mask); |
884 | | |
885 | | /* Add offsets to input values: */ |
886 | 0 | return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices)); |
887 | 0 | } |
888 | | |
889 | | #define PHP_BASE64_ENCODE_SSSE3_LOOP \ |
890 | 0 | while (length > 15) { \ |
891 | 0 | __m128i s = _mm_loadu_si128((__m128i *)c); \ |
892 | 0 | \ |
893 | 0 | s = php_base64_encode_ssse3_reshuffle(s); \ |
894 | 0 | \ |
895 | 0 | s = php_base64_encode_ssse3_translate(s); \ |
896 | 0 | \ |
897 | 0 | _mm_storeu_si128((__m128i *)o, s); \ |
898 | 0 | c += 12; \ |
899 | 0 | o += 16; \ |
900 | 0 | length -= 12; \ |
901 | 0 | } |
902 | | |
903 | | #endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */ |
904 | | |
905 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
906 | | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE) |
907 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) |
908 | | # elif defined(ZEND_INTRIN_AVX2_RESOLVER) |
909 | | zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags) |
910 | | # else /* ZEND_INTRIN_SSSE3_RESOLVER */ |
911 | | zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags) |
912 | | # endif |
913 | 12 | { |
914 | 12 | const unsigned char *c = str; |
915 | 12 | unsigned char *o; |
916 | 12 | zend_string *result; |
917 | | |
918 | 12 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
919 | 12 | o = (unsigned char *)ZSTR_VAL(result); |
920 | 12 | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
921 | 12 | if (length > 31) { |
922 | 5 | __m256i s = _mm256_loadu_si256((__m256i *)c); |
923 | | |
924 | 5 | s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6)); |
925 | | |
926 | 17 | for (;;) { |
927 | 17 | s = php_base64_encode_avx2_reshuffle(s); |
928 | | |
929 | 17 | s = php_base64_encode_avx2_translate(s); |
930 | | |
931 | 17 | _mm256_storeu_si256((__m256i *)o, s); |
932 | 17 | c += 24; |
933 | 17 | o += 32; |
934 | 17 | length -= 24; |
935 | 17 | if (length < 28) { |
936 | 5 | break; |
937 | 5 | } |
938 | 12 | s = _mm256_loadu_si256((__m256i *)(c - 4)); |
939 | 12 | } |
940 | 5 | } |
941 | | # else |
942 | | PHP_BASE64_ENCODE_SSSE3_LOOP; |
943 | | # endif |
944 | | |
945 | 12 | o = php_base64_encode_impl(c, length, o, flags); |
946 | | |
947 | 12 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
948 | | |
949 | 12 | return result; |
950 | 12 | } |
951 | | |
952 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER) |
953 | | zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags) |
954 | 0 | { |
955 | 0 | const unsigned char *c = str; |
956 | 0 | unsigned char *o; |
957 | 0 | zend_string *result; |
958 | |
|
959 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
960 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
961 | |
|
962 | 0 | PHP_BASE64_ENCODE_SSSE3_LOOP; |
963 | |
|
964 | 0 | o = php_base64_encode_impl(c, length, o, flags); |
965 | |
|
966 | 0 | ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result)); |
967 | |
|
968 | 0 | return result; |
969 | 0 | } |
970 | | # endif |
971 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ |
972 | | |
973 | | /* }}} */ |
974 | | |
975 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
976 | | # if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
977 | | static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2"))); |
978 | | # endif |
979 | | |
980 | | static __m256i php_base64_decode_avx2_reshuffle(__m256i in) |
981 | 0 | { |
982 | 0 | __m256i merge_ab_and_bc, out; |
983 | |
|
984 | 0 | merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140)); |
985 | |
|
986 | 0 | out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000)); |
987 | |
|
988 | 0 | out = _mm256_shuffle_epi8(out, _mm256_setr_epi8( |
989 | 0 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1, |
990 | 0 | 2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1)); |
991 | |
|
992 | 0 | return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1)); |
993 | 0 | } |
994 | | #endif |
995 | | |
996 | | #if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
997 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET) |
998 | | static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3"))); |
999 | | # endif |
1000 | | |
1001 | | static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) |
1002 | 0 | { |
1003 | 0 | __m128i merge_ab_and_bc, out; |
1004 | |
|
1005 | 0 | merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140)); |
1006 | | /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK |
1007 | | * 0000hhhh IIiiiiii 0000GGGG GGggHHHH |
1008 | | * 0000eeee FFffffff 0000DDDD DDddEEEE |
1009 | | * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */ |
1010 | |
|
1011 | 0 | out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000)); |
1012 | | /* 00000000 JJJJJJjj KKKKkkkk LLllllll |
1013 | | * 00000000 GGGGGGgg HHHHhhhh IIiiiiii |
1014 | | * 00000000 DDDDDDdd EEEEeeee FFffffff |
1015 | | * 00000000 AAAAAAaa BBBBbbbb CCcccccc */ |
1016 | |
|
1017 | 0 | return _mm_shuffle_epi8(out, _mm_setr_epi8( |
1018 | 0 | 2, 1, 0, |
1019 | 0 | 6, 5, 4, |
1020 | 0 | 10, 9, 8, |
1021 | 0 | 14, 13, 12, |
1022 | 0 | -1, -1, -1, -1)); |
1023 | | /* 00000000 00000000 00000000 00000000 |
1024 | | * LLllllll KKKKkkkk JJJJJJjj IIiiiiii |
1025 | | * HHHHhhhh GGGGGGgg FFffffff EEEEeeee |
1026 | | * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */ |
1027 | 0 | } |
1028 | | |
1029 | | #define PHP_BASE64_DECODE_SSSE3_LOOP \ |
1030 | 0 | while (length > 15 + 6 + 2) { \ |
1031 | 0 | __m128i lut_lo, lut_hi, lut_roll; \ |
1032 | 0 | __m128i hi_nibbles, lo_nibbles, hi, lo; \ |
1033 | 0 | __m128i s = _mm_loadu_si128((__m128i *)c); \ |
1034 | 0 | \ |
1035 | 0 | lut_lo = _mm_setr_epi8( \ |
1036 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, \ |
1037 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); \ |
1038 | 0 | lut_hi = _mm_setr_epi8( \ |
1039 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, \ |
1040 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); \ |
1041 | 0 | lut_roll = _mm_setr_epi8( \ |
1042 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, \ |
1043 | 0 | 0, 0, 0, 0, 0, 0, 0, 0); \ |
1044 | 0 | \ |
1045 | 0 | hi_nibbles = _mm_and_si128( \ |
1046 | 0 | _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \ |
1047 | 0 | lo_nibbles = _mm_and_si128(s, _mm_set1_epi8(0x2f)); \ |
1048 | 0 | hi = _mm_shuffle_epi8(lut_hi, hi_nibbles); \ |
1049 | 0 | lo = _mm_shuffle_epi8(lut_lo, lo_nibbles); \ |
1050 | 0 | \ |
1051 | 0 | \ |
1052 | 0 | if (UNEXPECTED( \ |
1053 | 0 | _mm_movemask_epi8( \ |
1054 | 0 | _mm_cmpgt_epi8( \ |
1055 | 0 | _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \ |
1056 | 0 | break; \ |
1057 | 0 | } else { \ |
1058 | 0 | __m128i eq_2f, roll; \ |
1059 | 0 | \ |
1060 | 0 | eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f)); \ |
1061 | 0 | roll = _mm_shuffle_epi8( \ |
1062 | 0 | lut_roll, _mm_add_epi8(eq_2f, hi_nibbles)); \ |
1063 | 0 | \ |
1064 | 0 | s = _mm_add_epi8(s, roll); \ |
1065 | 0 | s = php_base64_decode_ssse3_reshuffle(s); \ |
1066 | 0 | \ |
1067 | 0 | _mm_storeu_si128((__m128i *)o, s); \ |
1068 | 0 | \ |
1069 | 0 | c += 16; \ |
1070 | 0 | o += 12; \ |
1071 | 0 | outl += 12; \ |
1072 | 0 | length -= 16; \ |
1073 | 0 | } \ |
1074 | 0 | } |
1075 | | |
1076 | | #endif |
1077 | | |
1078 | | #if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1079 | | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE) |
1080 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) |
1081 | | # elif defined(ZEND_INTRIN_AVX2_RESOLVER) |
1082 | | zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict) |
1083 | | # else |
1084 | | zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict) |
1085 | | # endif |
1086 | 0 | { |
1087 | 0 | const unsigned char *c = str; |
1088 | 0 | unsigned char *o; |
1089 | 0 | size_t outl = 0; |
1090 | 0 | zend_string *result; |
1091 | |
|
1092 | 0 | result = zend_string_alloc(length, 0); |
1093 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
1094 | | |
1095 | | /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions" |
1096 | | * https://arxiv.org/pdf/1704.00605.pdf */ |
1097 | 0 | # if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) |
1098 | 0 | while (length > 31 + 11 + 2) { |
1099 | 0 | __m256i lut_lo, lut_hi, lut_roll; |
1100 | 0 | __m256i hi_nibbles, lo_nibbles, hi, lo; |
1101 | 0 | __m256i str = _mm256_loadu_si256((__m256i *)c); |
1102 | |
|
1103 | 0 | lut_lo = _mm256_setr_epi8( |
1104 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, |
1105 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A, |
1106 | 0 | 0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, |
1107 | 0 | 0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A); |
1108 | |
|
1109 | 0 | lut_hi = _mm256_setr_epi8( |
1110 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, |
1111 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, |
1112 | 0 | 0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08, |
1113 | 0 | 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10); |
1114 | |
|
1115 | 0 | lut_roll = _mm256_setr_epi8( |
1116 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, |
1117 | 0 | 0, 0, 0, 0, 0, 0, 0, 0, |
1118 | 0 | 0, 16, 19, 4, -65, -65, -71, -71, |
1119 | 0 | 0, 0, 0, 0, 0, 0, 0, 0); |
1120 | |
|
1121 | 0 | hi_nibbles = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f)); |
1122 | 0 | lo_nibbles = _mm256_and_si256(str, _mm256_set1_epi8(0x2f)); |
1123 | 0 | hi = _mm256_shuffle_epi8(lut_hi, hi_nibbles); |
1124 | 0 | lo = _mm256_shuffle_epi8(lut_lo, lo_nibbles); |
1125 | |
|
1126 | 0 | if (!_mm256_testz_si256(lo, hi)) { |
1127 | 0 | break; |
1128 | 0 | } else { |
1129 | 0 | __m256i eq_2f, roll; |
1130 | 0 | eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f)); |
1131 | 0 | roll = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles)); |
1132 | | |
1133 | |
|
1134 | 0 | str = _mm256_add_epi8(str, roll); |
1135 | |
|
1136 | 0 | str = php_base64_decode_avx2_reshuffle(str); |
1137 | |
|
1138 | 0 | _mm256_storeu_si256((__m256i *)o, str); |
1139 | |
|
1140 | 0 | c += 32; |
1141 | 0 | o += 24; |
1142 | 0 | outl += 24; |
1143 | 0 | length -= 32; |
1144 | 0 | } |
1145 | 0 | } |
1146 | | # else |
1147 | | PHP_BASE64_DECODE_SSSE3_LOOP; |
1148 | | # endif |
1149 | |
|
1150 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1151 | 0 | zend_string_efree(result); |
1152 | 0 | return NULL; |
1153 | 0 | } |
1154 | | |
1155 | 0 | ZSTR_LEN(result) = outl; |
1156 | |
|
1157 | 0 | return result; |
1158 | 0 | } |
1159 | | |
1160 | | # if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER) |
1161 | | zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict) |
1162 | 0 | { |
1163 | 0 | const unsigned char *c = str; |
1164 | 0 | unsigned char *o; |
1165 | 0 | size_t outl = 0; |
1166 | 0 | zend_string *result; |
1167 | |
|
1168 | 0 | result = zend_string_alloc(length, 0); |
1169 | 0 | o = (unsigned char *)ZSTR_VAL(result); |
1170 | |
|
1171 | 0 | PHP_BASE64_DECODE_SSSE3_LOOP; |
1172 | |
|
1173 | 0 | if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1174 | 0 | zend_string_efree(result); |
1175 | 0 | return NULL; |
1176 | 0 | } |
1177 | | |
1178 | 0 | ZSTR_LEN(result) = outl; |
1179 | |
|
1180 | 0 | return result; |
1181 | 0 | } |
1182 | | # endif |
1183 | | #endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */ |
1184 | | |
1185 | | #if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE) |
1186 | | #if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1187 | | zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags) |
1188 | | #else |
1189 | | PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) |
1190 | | #endif |
1191 | 0 | { |
1192 | 0 | unsigned char *p; |
1193 | 0 | zend_string *result; |
1194 | |
|
1195 | 0 | result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0); |
1196 | 0 | p = (unsigned char *)ZSTR_VAL(result); |
1197 | |
|
1198 | 0 | p = php_base64_encode_impl(str, length, p, flags); |
1199 | |
|
1200 | 0 | ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result)); |
1201 | |
|
1202 | 0 | return result; |
1203 | 0 | } |
1204 | | #endif |
1205 | | |
1206 | | #if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE) |
1207 | | #if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER) |
1208 | | zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict) |
1209 | | #else |
1210 | | PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) |
1211 | | #endif |
1212 | 0 | { |
1213 | 0 | zend_string *result; |
1214 | 0 | size_t outl = 0; |
1215 | |
|
1216 | 0 | result = zend_string_alloc(length, 0); |
1217 | |
|
1218 | 0 | if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) { |
1219 | 0 | zend_string_efree(result); |
1220 | 0 | return NULL; |
1221 | 0 | } |
1222 | | |
1223 | 0 | ZSTR_LEN(result) = outl; |
1224 | |
|
1225 | 0 | return result; |
1226 | 0 | } |
1227 | | #endif |
1228 | | /* }}} */ |
1229 | | |
1230 | | /* {{{ Encodes string using MIME base64 algorithm */ |
1231 | | PHP_FUNCTION(base64_encode) |
1232 | 12 | { |
1233 | 12 | char *str; |
1234 | 12 | size_t str_len; |
1235 | 12 | zend_string *result; |
1236 | | |
1237 | 36 | ZEND_PARSE_PARAMETERS_START(1, 1) |
1238 | 48 | Z_PARAM_STRING(str, str_len) |
1239 | 12 | ZEND_PARSE_PARAMETERS_END(); |
1240 | | |
1241 | 12 | result = php_base64_encode((unsigned char*)str, str_len); |
1242 | 12 | RETURN_STR(result); |
1243 | 12 | } |
1244 | | /* }}} */ |
1245 | | |
1246 | | /* {{{ Decodes string using MIME base64 algorithm */ |
1247 | | PHP_FUNCTION(base64_decode) |
1248 | 0 | { |
1249 | 0 | char *str; |
1250 | 0 | bool strict = 0; |
1251 | 0 | size_t str_len; |
1252 | 0 | zend_string *result; |
1253 | |
|
1254 | 0 | ZEND_PARSE_PARAMETERS_START(1, 2) |
1255 | 0 | Z_PARAM_STRING(str, str_len) |
1256 | 0 | Z_PARAM_OPTIONAL |
1257 | 0 | Z_PARAM_BOOL(strict) |
1258 | 0 | ZEND_PARSE_PARAMETERS_END(); |
1259 | | |
1260 | 0 | result = php_base64_decode_ex((unsigned char*)str, str_len, strict); |
1261 | 0 | if (result != NULL) { |
1262 | 0 | RETURN_STR(result); |
1263 | 0 | } else { |
1264 | 0 | RETURN_FALSE; |
1265 | 0 | } |
1266 | 0 | } |
1267 | | /* }}} */ |