Coverage Report

Created: 2026-06-02 06:36

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/php-src/ext/standard/base64.c
Line
Count
Source
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright © The PHP Group and Contributors.                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to the Modified BSD License that is      |
6
   | bundled with this package in the file LICENSE, and is available      |
7
   | through the World Wide Web at <https://www.php.net/license/>.        |
8
   |                                                                      |
9
   | SPDX-License-Identifier: BSD-3-Clause                                |
10
   +----------------------------------------------------------------------+
11
   | Author: Jim Winstead <jimw@php.net>                                  |
12
   |         Xinchen Hui <laruence@php.net>                               |
13
   +----------------------------------------------------------------------+
14
 */
15
16
#include <string.h>
17
18
#include "php.h"
19
#include "base64.h"
20
21
/* {{{ base64 tables */
22
static const char base64_table[] = {
23
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
24
  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
25
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
26
  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
27
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
28
};
29
30
static const char base64_pad = '=';
31
32
static const short base64_reverse_table[256] = {
33
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
34
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
35
  -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
36
  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
37
  -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
38
  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
39
  -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
40
  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
41
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
42
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
43
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
44
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
45
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
49
};
50
/* }}} */
51
52
#if defined(__aarch64__) || defined(_M_ARM64)
53
#include <arm_neon.h>
54
55
static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
56
{
57
  /* reduce  0..51 -> 0
58
            52..61 -> 1 .. 10
59
                62 -> 11
60
                63 -> 12 */
61
  uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51));
62
  /* distinguish between ranges 0..25 and 26..51:
63
     0 .. 25 -> remains 0
64
     26 .. 51 -> becomes 13 */
65
  const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
66
  result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13)));
67
  /* read shift */
68
  result = vqtbl2q_u8(shift_LUT, result);
69
  return vaddq_u8(result, input);
70
}
71
72
static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left)
73
{
74
  const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52,
75
          '0' - 52, '0' - 52, '0' - 52, '0' - 52,
76
          '0' - 52, '0' - 52, '0' - 52, '+' - 62,
77
          '/' - 63, 'A',      0,        0,
78
          'a' - 26, '0' - 52, '0' - 52, '0' - 52,
79
          '0' - 52, '0' - 52, '0' - 52, '0' - 52,
80
          '0' - 52, '0' - 52, '0' - 52, '+' - 62,
81
          '/' - 63, 'A',      0,        0};
82
  const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_);
83
  do {
84
    /* [ccdddddd | bbbbcccc | aaaaaabb]
85
        x.val[2] | x.val[1] | x.val[0] */
86
    const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in));
87
88
    /* [00aa_aaaa] */
89
    const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
90
91
    const uint8x16_t field_b =             /* [00bb_bbbb] */
92
        vbslq_u8(vdupq_n_u8(0x30),         /* [0011_0000] */
93
                 vshlq_n_u8(x.val[0], 4),  /* [aabb_0000] */
94
                 vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */
95
96
    const uint8x16_t field_c =             /* [00cc_cccc] */
97
        vbslq_u8(vdupq_n_u8(0x3c),         /* [0011_1100] */
98
                 vshlq_n_u8(x.val[1], 2),  /* [bbcc_cc00] */
99
                 vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */
100
101
    /* [00dd_dddd] */
102
    const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
103
104
    uint8x16x4_t result;
105
    result.val[0] = encode_toascii(field_a, shift_LUT);
106
    result.val[1] = encode_toascii(field_b, shift_LUT);
107
    result.val[2] = encode_toascii(field_c, shift_LUT);
108
    result.val[3] = encode_toascii(field_d, shift_LUT);
109
110
    vst4q_u8((uint8_t *)out, result);
111
    out += 64;
112
    in += 16 * 3;
113
    inl -= 16 * 3;
114
  } while (inl >= 16 * 3);
115
116
  *left = inl;
117
  return out;
118
}
119
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
120
121
static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out, zend_long flags) /* {{{ */
122
0
{
123
#if defined(__aarch64__) || defined(_M_ARM64)
124
  if (inl >= 16 * 3) {
125
    size_t left = 0;
126
    out = neon_base64_encode(in, inl, out, &left);
127
    in += inl - left;
128
    inl = left;
129
  }
130
#endif
131
132
0
  while (inl > 2) { /* keep going until we have less than 24 bits */
133
0
    *out++ = base64_table[in[0] >> 2];
134
0
    *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
135
0
    *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
136
0
    *out++ = base64_table[in[2] & 0x3f];
137
138
0
    in += 3;
139
0
    inl -= 3; /* we just handle 3 octets of data */
140
0
  }
141
142
  /* now deal with the tail end of things */
143
0
  if (inl != 0) {
144
0
    *out++ = base64_table[in[0] >> 2];
145
0
    if (inl > 1) {
146
0
      *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
147
0
      *out++ = base64_table[(in[1] & 0x0f) << 2];
148
0
      if ((flags & PHP_BASE64_NO_PADDING) == 0) {
149
0
        *out++ = base64_pad;
150
0
      }
151
0
    } else {
152
0
      *out++ = base64_table[(in[0] & 0x03) << 4];
153
0
      if ((flags & PHP_BASE64_NO_PADDING) == 0) {
154
0
        *out++ = base64_pad;
155
0
        *out++ = base64_pad;
156
0
      }
157
0
    }
158
0
  }
159
160
0
  *out = '\0';
161
162
0
  return out;
163
0
}
164
/* }}} */
165
166
#if defined(__aarch64__) || defined(_M_ARM64)
167
static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
168
  const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
169
  const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
170
  const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
171
  const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
172
  const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
173
  const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble);
174
  const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
175
  *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0));
176
  return vaddq_u8(input, shift);
177
}
178
179
static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) {
180
  unsigned char *out_orig = out;
181
  const uint8_t shiftLUT_[32] = {
182
    0,   0,  19,   4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
183
    0,   0,   0,   0,   0,   0,   0,   0,
184
    0,   0,  19,   4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
185
    0,   0,   0,   0,   0,   0,   0,   0};
186
  const uint8_t maskLUT_[32] = {
187
    /* 0        : 0b1010_1000*/ 0xa8,
188
    /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
189
    /* 10       : 0b1111_0000*/ 0xf0,
190
    /* 11       : 0b0101_0100*/ 0x54,
191
    /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
192
    /* 15       : 0b0101_0100*/ 0x54,
193
194
    /* 0        : 0b1010_1000*/ 0xa8,
195
    /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
196
    /* 10       : 0b1111_0000*/ 0xf0,
197
    /* 11       : 0b0101_0100*/ 0x54,
198
    /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
199
    /* 15       : 0b0101_0100*/ 0x54
200
  };
201
  const uint8_t bitposLUT_[32] = {
202
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
203
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
204
205
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
206
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
207
  };
208
  const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_);
209
  const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_);
210
  const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);;
211
212
  do {
213
    const uint8x16x4_t x = vld4q_u8((const unsigned char *)in);
214
    uint8x16_t error_a;
215
    uint8x16_t error_b;
216
    uint8x16_t error_c;
217
    uint8x16_t error_d;
218
    uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
219
    uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
220
    uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
221
    uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
222
223
    const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
224
    union {uint8_t mem[16]; uint64_t dw[2]; } error;
225
    vst1q_u8(error.mem, err);
226
227
    /* Check that the input only contains bytes belonging to the alphabet of
228
       Base64. If there are errors, decode the rest of the string with the
229
       scalar decoder. */
230
    if (error.dw[0] | error.dw[1])
231
      break;
232
233
    uint8x16x3_t result;
234
    result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
235
    result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
236
    result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
237
238
    vst3q_u8((unsigned char *)out, result);
239
    out += 16 * 3;
240
    in += 16 * 4;
241
    inl -= 16 * 4;
242
  } while (inl >= 16 * 4);
243
  *left = inl;
244
  return out - out_orig;
245
}
246
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
247
248
static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */
249
0
{
250
0
  int ch;
251
0
  size_t i = 0, padding = 0, j = *outl;
252
253
#if defined(__aarch64__) || defined(_M_ARM64)
254
  if (inl >= 16 * 4) {
255
    size_t left = 0;
256
    j += neon_base64_decode(in, inl, out, &left);
257
    i = inl - left;
258
    in += i;
259
    inl = left;
260
  }
261
#endif
262
263
  /* run through the whole string, converting as we go */
264
0
  while (inl-- > 0) {
265
0
    ch = *in++;
266
0
    if (ch == base64_pad) {
267
0
      padding++;
268
0
      continue;
269
0
    }
270
271
0
    ch = base64_reverse_table[ch];
272
0
    if (!strict) {
273
      /* skip unknown characters and whitespace */
274
0
      if (ch < 0) {
275
0
        continue;
276
0
      }
277
0
    } else {
278
      /* skip whitespace */
279
0
      if (ch == -1) {
280
0
        continue;
281
0
      }
282
      /* fail on bad characters or if any data follows padding */
283
0
      if (ch == -2 || padding) {
284
0
        goto fail;
285
0
      }
286
0
    }
287
288
0
    switch (i % 4) {
289
0
      case 0:
290
0
        out[j] = ch << 2;
291
0
        break;
292
0
      case 1:
293
0
        out[j++] |= ch >> 4;
294
0
        out[j] = (ch & 0x0f) << 4;
295
0
        break;
296
0
      case 2:
297
0
        out[j++] |= ch >>2;
298
0
        out[j] = (ch & 0x03) << 6;
299
0
        break;
300
0
      case 3:
301
0
        out[j++] |= ch;
302
0
        break;
303
0
    }
304
0
    i++;
305
0
  }
306
307
  /* fail if the input is truncated (only one char in last group) */
308
0
  if (strict && i % 4 == 1) {
309
0
    goto fail;
310
0
  }
311
312
  /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
313
   * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
314
0
  if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
315
0
    goto fail;
316
0
  }
317
318
0
  *outl = j;
319
0
  out[j] = '\0';
320
321
0
  return 1;
322
323
0
fail:
324
0
  return 0;
325
0
}
326
/* }}} */
327
328
/* {{{ php_base64_encode */
329
330
#ifdef ZEND_INTRIN_AVX2_NATIVE
331
# undef ZEND_INTRIN_SSSE3_NATIVE
332
# undef ZEND_INTRIN_SSSE3_RESOLVER
333
# undef ZEND_INTRIN_SSSE3_FUNC_PROTO
334
# undef ZEND_INTRIN_SSSE3_FUNC_PTR
335
#elif defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_SSSE3_NATIVE)
336
# undef ZEND_INTRIN_SSSE3_NATIVE
337
# undef ZEND_INTRIN_SSSE3_RESOLVER
338
# define ZEND_INTRIN_SSSE3_RESOLVER 1
339
# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
340
# undef ZEND_INTRIN_SSSE3_FUNC_DECL
341
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
342
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
343
# else
344
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
345
# endif
346
#elif defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_SSSE3_NATIVE)
347
# undef ZEND_INTRIN_SSSE3_NATIVE
348
# undef ZEND_INTRIN_SSSE3_RESOLVER
349
# define ZEND_INTRIN_SSSE3_RESOLVER 1
350
# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
351
# undef ZEND_INTRIN_SSSE3_FUNC_DECL
352
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
353
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
354
# else
355
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
356
# endif
357
#endif
358
359
/* Only enable avx512 resolver if avx2 use resolver also */
360
#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_FUNC_PROTO)
361
#define BASE64_INTRIN_AVX512_FUNC_PROTO 1
362
#endif
363
#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_FUNC_PTR)
364
#define BASE64_INTRIN_AVX512_FUNC_PTR 1
365
#endif
366
#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PROTO)
367
#define BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO 1
368
#endif
369
#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PTR)
370
#define BASE64_INTRIN_AVX512_VBMI_FUNC_PTR 1
371
#endif
372
373
#ifdef ZEND_INTRIN_AVX2_NATIVE
374
# include <immintrin.h>
375
#elif defined(ZEND_INTRIN_SSSE3_NATIVE)
376
# include <tmmintrin.h>
377
#elif defined(ZEND_INTRIN_SSSE3_RESOLVER) || defined(ZEND_INTRIN_AVX2_RESOLVER)
378
# ifdef ZEND_INTRIN_AVX2_RESOLVER
379
#  include <immintrin.h>
380
# else
381
#  include <tmmintrin.h>
382
# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
383
# include "Zend/zend_cpuinfo.h"
384
385
# if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
386
ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags));
387
ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict));
388
# endif
389
# if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
390
ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags));
391
ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict));
392
# endif
393
394
# ifdef ZEND_INTRIN_AVX2_RESOLVER
395
ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags));
396
ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict));
397
# endif
398
399
# ifdef ZEND_INTRIN_SSSE3_RESOLVER
400
ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags));
401
ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict));
402
# endif
403
404
zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags);
405
zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict);
406
407
# if (defined(ZEND_INTRIN_AVX2_FUNC_PROTO) || defined(ZEND_INTRIN_SSSE3_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO))
408
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) __attribute__((ifunc("resolve_base64_encode")));
409
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) __attribute__((ifunc("resolve_base64_decode")));
410
411
typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t, zend_long flags);
412
typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, bool);
413
414
ZEND_NO_SANITIZE_ADDRESS
415
ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
416
2
static base64_encode_func_t resolve_base64_encode(void) {
417
2
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
418
2
  if (zend_cpu_supports_avx512_vbmi()) {
419
0
    return php_base64_encode_avx512_vbmi;
420
0
  } else
421
2
# endif
422
2
# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
423
2
  if (zend_cpu_supports_avx512()) {
424
0
    return php_base64_encode_avx512;
425
0
  } else
426
2
# endif
427
2
# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
428
2
  if (zend_cpu_supports_avx2()) {
429
2
    return php_base64_encode_avx2;
430
2
  } else
431
0
# endif
432
0
#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
433
0
  if (zend_cpu_supports_ssse3()) {
434
0
    return php_base64_encode_ssse3;
435
0
  }
436
0
#endif
437
0
  return php_base64_encode_default;
438
2
}
439
440
ZEND_NO_SANITIZE_ADDRESS
441
ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
442
2
static base64_decode_func_t resolve_base64_decode(void) {
443
2
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
444
2
  if (zend_cpu_supports_avx512_vbmi()) {
445
0
    return php_base64_decode_ex_avx512_vbmi;
446
0
  } else
447
2
# endif
448
2
# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
449
2
  if (zend_cpu_supports_avx512()) {
450
0
    return php_base64_decode_ex_avx512;
451
0
  } else
452
2
# endif
453
2
# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
454
2
  if (zend_cpu_supports_avx2()) {
455
2
    return php_base64_decode_ex_avx2;
456
2
  } else
457
0
# endif
458
0
#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
459
0
  if (zend_cpu_supports_ssse3()) {
460
0
    return php_base64_decode_ex_ssse3;
461
0
  }
462
0
#endif
463
0
  return php_base64_decode_ex_default;
464
2
}
465
# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
466
467
PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length, zend_long flags) = NULL;
468
PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, bool strict) = NULL;
469
470
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) {
471
  return php_base64_encode_ptr(str, length, flags);
472
}
473
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) {
474
  return php_base64_decode_ex_ptr(str, length, strict);
475
}
476
477
PHP_MINIT_FUNCTION(base64_intrin)
478
{
479
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PTR
480
  if (zend_cpu_supports_avx512_vbmi()) {
481
    php_base64_encode_ptr = php_base64_encode_avx512_vbmi;
482
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx512_vbmi;
483
  } else
484
# endif
485
# ifdef BASE64_INTRIN_AVX512_FUNC_PTR
486
  if (zend_cpu_supports_avx512()) {
487
    php_base64_encode_ptr = php_base64_encode_avx512;
488
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx512;
489
  } else
490
# endif
491
# ifdef ZEND_INTRIN_AVX2_FUNC_PTR
492
  if (zend_cpu_supports_avx2()) {
493
    php_base64_encode_ptr = php_base64_encode_avx2;
494
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
495
  } else
496
# endif
497
#ifdef ZEND_INTRIN_SSSE3_FUNC_PTR
498
  if (zend_cpu_supports_ssse3()) {
499
    php_base64_encode_ptr = php_base64_encode_ssse3;
500
    php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
501
  } else
502
#endif
503
  {
504
    php_base64_encode_ptr = php_base64_encode_default;
505
    php_base64_decode_ex_ptr = php_base64_decode_ex_default;
506
  }
507
  return SUCCESS;
508
}
509
# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
510
#endif /* ZEND_INTRIN_AVX2_NATIVE */
511
512
#if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
513
zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags)
514
0
{
515
0
  const unsigned char *c = str;
516
0
  unsigned char *o;
517
0
  zend_string *result;
518
519
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
520
0
  o = (unsigned char *)ZSTR_VAL(result);
521
522
0
  const __m512i shuffle_splitting = _mm512_setr_epi32(
523
0
    0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10,
524
0
    0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122,
525
0
    0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e);
526
0
  const __m512i multi_shifts = _mm512_set1_epi64(0x3036242a1016040a);
527
0
  const char *ascii_lookup_tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
528
0
  const __m512i ascii_lookup = _mm512_loadu_si512((__m512i *)ascii_lookup_tbl);
529
530
0
  while (length > 63) {
531
    /* Step 1: load input data */
532
0
    __m512i str = _mm512_loadu_si512((const __m512i *)c);
533
534
    /* Step 2: splitting 24-bit words into 32-bit lanes */
535
0
    str = _mm512_permutexvar_epi8(shuffle_splitting, str);
536
537
    /* Step 3: moving 6-bit word to separate bytes */
538
0
    str = _mm512_multishift_epi64_epi8(multi_shifts, str);
539
540
    /* Step 4: conversion to ASCII */
541
0
    str = _mm512_permutexvar_epi8(str, ascii_lookup);
542
543
    /* Step 5: store the final result */
544
0
    _mm512_storeu_si512((__m512i *)o, str);
545
0
    c += 48;
546
0
    o += 64;
547
0
    length -= 48;
548
0
  }
549
550
0
  o = php_base64_encode_impl(c, length, o, flags);
551
552
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
553
554
0
  return result;
555
0
}
556
557
zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict)
558
0
{
559
0
  const unsigned char *c = str;
560
0
  unsigned char *o;
561
0
  size_t outl = 0;
562
0
  zend_string *result;
563
564
0
  result = zend_string_alloc(length, 0);
565
0
  o = (unsigned char *)ZSTR_VAL(result);
566
567
0
  const __m512i lookup_0 = _mm512_setr_epi32(
568
0
    0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080,
569
0
    0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x3e808080, 0x3f808080,
570
0
    0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080);
571
0
  const __m512i lookup_1 = _mm512_setr_epi32(
572
0
    0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, 0x1211100f, 0x16151413,
573
0
    0x80191817, 0x80808080, 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625,
574
0
    0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080);
575
576
0
  const __m512i merge_mask1 = _mm512_set1_epi32(0x01400140);
577
0
  const __m512i merge_mask2 = _mm512_set1_epi32(0x00011000);
578
579
0
  const __m512i continuous_mask = _mm512_setr_epi32(
580
0
    0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, 0x191a1415, 0x1c1d1e18,
581
0
    0x26202122, 0x292a2425, 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38,
582
0
    0x00000000, 0x00000000, 0x00000000, 0x00000000);
583
584
0
  while (length > 64) {
585
    /* Step 1: load input data */
586
0
    const __m512i input = _mm512_loadu_si512((__m512i *)c);
587
588
    /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */
589
0
    __m512i str = _mm512_permutex2var_epi8(lookup_0, input, lookup_1);
590
0
    const uint64_t mask = _mm512_movepi8_mask(_mm512_or_epi64(str, input)); /* convert MSBs to the mask */
591
0
    if (mask) {
592
0
      break;
593
0
    }
594
595
    /* Step 3: pack four fields within 32-bit words into 24-bit words. */
596
0
    const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, merge_mask1);
597
0
    str = _mm512_madd_epi16(merge_ab_and_bc, merge_mask2);
598
599
    /* Step 4: move 3-byte words into the continuous array. */
600
0
    str = _mm512_permutexvar_epi8(continuous_mask, str);
601
602
    /* Step 5: store the final result */
603
0
    _mm512_storeu_si512((__m512i *)o, str);
604
605
0
    c += 64;
606
0
    o += 48;
607
0
    outl += 48;
608
0
    length -= 64;
609
0
  }
610
611
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
612
0
    zend_string_efree(result);
613
0
    return NULL;
614
0
  }
615
616
0
  ZSTR_LEN(result) = outl;
617
618
0
  return result;
619
0
}
620
#endif
621
622
#if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
623
zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags)
624
0
{
625
0
  const unsigned char *c = str;
626
0
  unsigned char *o;
627
0
  zend_string *result;
628
629
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
630
0
  o = (unsigned char *)ZSTR_VAL(result);
631
632
0
  while (length > 63) {
633
    /* Step 1: load input data */
634
    /* [????|????|????|????|PPPO|OONN|NMMM|LLLK|KKJJ|JIII|HHHG|GGFF|FEEE|DDDC|CCBB|BAAA] */
635
0
    __m512i str = _mm512_loadu_si512((const __m512i *)c);
636
637
    /* Step 2: splitting 24-bit words into 32-bit lanes */
638
    /* [0000|PPPO|OONN|NMMM|0000|LLLK|KKJJ|JIII|0000|HHHG|GGFF|FEEE|0000|DDDC|CCBB|BAAA] */
639
0
    str = _mm512_permutexvar_epi32(
640
0
      _mm512_set_epi32(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0), str);
641
    /* [D1 D2 D0 D1|C1 C2 C0 C1|B1 B2 B0 B1|A1 A2 A0 A1] x 4 */
642
0
    str = _mm512_shuffle_epi8(str, _mm512_set4_epi32(0x0a0b090a, 0x07080607, 0x04050304, 0x01020001));
643
644
    /* Step 3: moving 6-bit word to separate bytes */
645
    /* in:  [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] */
646
    /* t0:  [0000cccc|cc000000|aaaaaa00|00000000] */
647
0
    const __m512i t0 = _mm512_and_si512(str, _mm512_set1_epi32(0x0fc0fc00));
648
    /* t1:  [00000000|00cccccc|00000000|00aaaaaa] */
649
0
    const __m512i t1 = _mm512_srlv_epi16(t0, _mm512_set1_epi32(0x0006000a));
650
    /* t2:  [ccdddddd|00000000|aabbbbbb|cccc0000] */
651
0
    const __m512i t2 = _mm512_sllv_epi16(str, _mm512_set1_epi32(0x00080004));
652
    /* str: [00dddddd|00cccccc|00bbbbbb|00aaaaaa] */
653
0
    str = _mm512_ternarylogic_epi32(_mm512_set1_epi32(0x3f003f00), t2, t1, 0xca);
654
655
    /* Step 4: conversion to ASCII */
656
0
    __m512i result = _mm512_subs_epu8(str, _mm512_set1_epi8(51));
657
0
    const __mmask64 less = _mm512_cmpgt_epi8_mask(_mm512_set1_epi8(26), str);
658
0
    result = _mm512_mask_mov_epi8(result, less, _mm512_set1_epi8(13));
659
0
    const __m512i lut = _mm512_set4_epi32(0x000041f0, 0xedfcfcfc, 0xfcfcfcfc, 0xfcfcfc47);
660
0
    result = _mm512_shuffle_epi8(lut, result);
661
0
    result = _mm512_add_epi8(result, str);
662
663
    /* Step 5: store the final result */
664
0
    _mm512_storeu_si512((__m512i *)o, result);
665
0
    c += 48;
666
0
    o += 64;
667
0
    length -= 48;
668
0
  }
669
670
0
  o = php_base64_encode_impl(c, length, o, flags);
671
672
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
673
674
0
  return result;
675
0
}
676
677
#define build_dword(b0, b1, b2, b3)         \
678
  ((uint32_t)(uint8_t)b0 << 0) | ((uint32_t)(uint8_t)b1 << 8) | \
679
  ((uint32_t)(uint8_t)b2 << 16) | ((uint32_t)(uint8_t)b3 << 24)
680
681
#define _mm512_set4lanes_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \
682
0
  _mm512_setr4_epi32(build_dword(b0, b1, b2, b3), build_dword(b4, b5, b6, b7),      \
683
0
         build_dword(b8, b9, b10, b11), build_dword(b12, b13, b14, b15))
684
685
zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict)
686
0
{
687
0
  const unsigned char *c = str;
688
0
  unsigned char *o;
689
0
  size_t outl = 0;
690
0
  zend_string *result;
691
692
0
  result = zend_string_alloc(length, 0);
693
0
  o = (unsigned char *)ZSTR_VAL(result);
694
695
0
  while (length > 64) {
696
    /* Step 1: load input data */
697
0
    __m512i str = _mm512_loadu_si512((__m512i *)c);
698
699
    /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */
700
0
    const __m512i higher_nibble = _mm512_and_si512(_mm512_srli_epi32(str, 4), _mm512_set1_epi8(0x0f));
701
0
    const __m512i lower_nibble = _mm512_and_si512(str, _mm512_set1_epi8(0x0f));
702
0
    const __m512i shiftLUT = _mm512_set4lanes_epi8(
703
0
        0, 0, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0);
704
0
    const __m512i maskLUT = _mm512_set4lanes_epi8(
705
0
        /* 0        : 0b1010_1000*/ 0xa8,
706
0
        /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
707
0
        /* 10       : 0b1111_0000*/ 0xf0,
708
0
        /* 11       : 0b0101_0100*/ 0x54,
709
0
        /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
710
0
        /* 15       : 0b0101_0100*/ 0x54);
711
0
    const __m512i bitposLUT = _mm512_set4lanes_epi8(
712
0
        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
713
0
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
714
0
    const __m512i M = _mm512_shuffle_epi8(maskLUT, lower_nibble);
715
0
    const __m512i bit = _mm512_shuffle_epi8(bitposLUT, higher_nibble);
716
0
    const uint64_t match = _mm512_test_epi8_mask(M, bit);
717
0
    if (match != (uint64_t)-1) {
718
0
      break;
719
0
    }
720
0
    const __m512i sh = _mm512_shuffle_epi8(shiftLUT, higher_nibble);
721
0
    const __mmask64 eq_2f = _mm512_cmpeq_epi8_mask(str, _mm512_set1_epi8(0x2f));
722
0
    const __m512i shift = _mm512_mask_mov_epi8(sh, eq_2f, _mm512_set1_epi8(16));
723
0
    str = _mm512_add_epi8(str, shift);
724
725
    /* Step 3: pack four fields within 32-bit words into 24-bit words. */
726
0
    const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140));
727
0
    str = _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000));
728
729
    /* Step 4: move 3-byte words into the continuous array. */
730
0
    const __m512i t1 = _mm512_shuffle_epi8(str,
731
0
      _mm512_set4lanes_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
732
0
    const __m512i s6 = _mm512_setr_epi32(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0);
733
0
    const __m512i t2 = _mm512_permutexvar_epi32(s6, t1);
734
735
    /* Step 5: store the final result */
736
0
    _mm512_storeu_si512((__m512i *)o, t2);
737
738
0
    c += 64;
739
0
    o += 48;
740
0
    outl += 48;
741
0
    length -= 64;
742
0
  }
743
744
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
745
0
    zend_string_efree(result);
746
0
    return NULL;
747
0
  }
748
749
0
  ZSTR_LEN(result) = outl;
750
751
0
  return result;
752
0
}
753
#endif
754
755
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
756
# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
757
static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
758
static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
759
# endif
760
static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
761
0
{
762
  /* This one works with shifted (4 bytes) input in order to
763
   * be able to work efficiently in the 2 128-bit lanes */
764
0
  __m256i t0, t1, t2, t3;
765
766
  /* input, bytes MSB to LSB:
767
   * 0 0 0 0 x w v u t s r q p o n m
768
   * l k j i h g f e d c b a 0 0 0 0 */
769
0
  in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
770
0
    10, 11,  9, 10,
771
0
     7,  8,  6,  7,
772
0
     4,  5,  3,  4,
773
0
     1,  2,  0,  1,
774
775
0
    14, 15, 13, 14,
776
0
    11, 12, 10, 11,
777
0
     8,  9,  7,  8,
778
0
     5,  6,  4,  5));
779
780
0
  t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
781
782
0
  t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
783
784
0
  t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
785
786
0
  t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
787
788
0
  return _mm256_or_si256(t1, t3);
789
  /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
790
   * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
791
   * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
792
   * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
793
   * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
794
   * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
795
   * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
796
   * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
797
0
}
798
799
static __m256i php_base64_encode_avx2_translate(__m256i in)
800
0
{
801
0
  __m256i lut, indices, mask;
802
803
0
  lut = _mm256_setr_epi8(
804
0
      65, 71, -4, -4, -4, -4, -4, -4,
805
0
      -4, -4, -4, -4, -19, -16, 0, 0,
806
0
      65, 71, -4, -4, -4, -4, -4, -4,
807
0
      -4, -4, -4, -4, -19, -16, 0, 0);
808
809
0
  indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
810
811
0
  mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
812
813
0
  indices = _mm256_sub_epi8(indices, mask);
814
815
0
  return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
816
817
0
}
818
#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
819
820
#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
821
822
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
823
static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
824
static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
825
# endif
826
827
static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
828
0
{
829
0
  __m128i t0, t1, t2, t3;
830
831
  /* input, bytes MSB to LSB:
832
   * 0 0 0 0 l k j i h g f e d c b a */
833
0
  in = _mm_shuffle_epi8(in, _mm_set_epi8(
834
0
        10, 11,  9, 10,
835
0
        7,  8,  6,  7,
836
0
        4,  5,  3,  4,
837
0
        1,  2,  0,  1));
838
839
0
  t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
840
841
0
  t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
842
843
0
  t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
844
845
0
  t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
846
847
  /* output (upper case are MSB, lower case are LSB):
848
   * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
849
   * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
850
   * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
851
   * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
852
0
  return _mm_or_si128(t1, t3);
853
0
}
854
855
static __m128i php_base64_encode_ssse3_translate(__m128i in)
856
0
{
857
0
  __m128i mask, indices;
858
0
  __m128i lut = _mm_setr_epi8(
859
0
      65,  71, -4, -4,
860
0
      -4,  -4, -4, -4,
861
0
      -4,  -4, -4, -4,
862
0
      -19, -16,  0,  0
863
0
      );
864
865
  /* Translate values 0..63 to the Base64 alphabet. There are five sets:
866
   * #  From      To         Abs    Index  Characters
867
   * 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
868
   * 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
869
   * 2  [52..61]  [48..57]    -4  [2..11]  0123456789
870
   * 3  [62]      [43]       -19       12  +
871
   * 4  [63]      [47]       -16       13  / */
872
873
  /* Create LUT indices from input:
874
   * the index for range #0 is right, others are 1 less than expected: */
875
0
  indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
876
877
  /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
878
0
  mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
879
880
  /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
881
0
  indices = _mm_sub_epi8(indices, mask);
882
883
  /* Add offsets to input values: */
884
0
  return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
885
0
}
886
887
#define PHP_BASE64_ENCODE_SSSE3_LOOP        \
888
0
  while (length > 15) {             \
889
0
    __m128i s = _mm_loadu_si128((__m128i *)c);  \
890
0
                          \
891
0
    s = php_base64_encode_ssse3_reshuffle(s); \
892
0
                          \
893
0
    s = php_base64_encode_ssse3_translate(s); \
894
0
                          \
895
0
    _mm_storeu_si128((__m128i *)o, s);      \
896
0
    c += 12;                  \
897
0
    o += 16;                  \
898
0
    length -= 12;               \
899
0
  }
900
901
#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
902
903
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
904
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
905
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags)
906
# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
907
zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags)
908
# else /* ZEND_INTRIN_SSSE3_RESOLVER */
909
zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)
910
# endif
911
0
{
912
0
  const unsigned char *c = str;
913
0
  unsigned char *o;
914
0
  zend_string *result;
915
916
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
917
0
  o = (unsigned char *)ZSTR_VAL(result);
918
0
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
919
0
  if (length > 31) {
920
0
    __m256i s = _mm256_loadu_si256((__m256i *)c);
921
922
0
    s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
923
924
0
    for (;;) {
925
0
      s = php_base64_encode_avx2_reshuffle(s);
926
927
0
      s = php_base64_encode_avx2_translate(s);
928
929
0
      _mm256_storeu_si256((__m256i *)o, s);
930
0
      c += 24;
931
0
      o += 32;
932
0
      length -= 24;
933
0
      if (length < 28) {
934
0
        break;
935
0
      }
936
0
      s = _mm256_loadu_si256((__m256i *)(c - 4));
937
0
    }
938
0
  }
939
# else
940
  PHP_BASE64_ENCODE_SSSE3_LOOP;
941
# endif
942
943
0
  o = php_base64_encode_impl(c, length, o, flags);
944
945
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
946
947
0
  return result;
948
0
}
949
950
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
951
zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)
952
0
{
953
0
  const unsigned char *c = str;
954
0
  unsigned char *o;
955
0
  zend_string *result;
956
957
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
958
0
  o = (unsigned char *)ZSTR_VAL(result);
959
960
0
  PHP_BASE64_ENCODE_SSSE3_LOOP;
961
962
0
  o = php_base64_encode_impl(c, length, o, flags);
963
964
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
965
966
0
  return result;
967
0
}
968
# endif
969
#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
970
971
/* }}} */
972
973
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
974
# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
975
static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
976
# endif
977
978
static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
979
0
{
980
0
  __m256i merge_ab_and_bc, out;
981
982
0
  merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
983
984
0
  out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
985
986
0
  out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
987
0
        2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
988
0
        2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
989
990
0
  return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
991
0
}
992
#endif
993
994
#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
995
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
996
static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
997
# endif
998
999
static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
1000
0
{
1001
0
  __m128i merge_ab_and_bc, out;
1002
1003
0
  merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
1004
  /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
1005
   * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
1006
   * 0000eeee FFffffff 0000DDDD DDddEEEE
1007
   * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
1008
1009
0
  out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
1010
  /* 00000000 JJJJJJjj KKKKkkkk LLllllll
1011
   * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
1012
   * 00000000 DDDDDDdd EEEEeeee FFffffff
1013
   * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
1014
1015
0
  return  _mm_shuffle_epi8(out, _mm_setr_epi8(
1016
0
     2,  1,  0,
1017
0
     6,  5,  4,
1018
0
    10,  9,  8,
1019
0
    14, 13, 12,
1020
0
    -1, -1, -1, -1));
1021
  /* 00000000 00000000 00000000 00000000
1022
   * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
1023
   * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
1024
   * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
1025
0
}
1026
1027
#define PHP_BASE64_DECODE_SSSE3_LOOP                \
1028
0
  while (length > 15 + 6 + 2) {                 \
1029
0
    __m128i lut_lo, lut_hi, lut_roll;             \
1030
0
    __m128i hi_nibbles, lo_nibbles, hi, lo;           \
1031
0
    __m128i s = _mm_loadu_si128((__m128i *)c);          \
1032
0
                                  \
1033
0
    lut_lo = _mm_setr_epi8(                   \
1034
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,   \
1035
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);  \
1036
0
    lut_hi = _mm_setr_epi8(                   \
1037
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,   \
1038
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);  \
1039
0
    lut_roll = _mm_setr_epi8(                 \
1040
0
        0,  16,  19,   4, -65, -65, -71, -71,       \
1041
0
        0,   0,   0,   0,   0,   0,   0,   0);        \
1042
0
                                  \
1043
0
    hi_nibbles  = _mm_and_si128(                \
1044
0
            _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
1045
0
    lo_nibbles  = _mm_and_si128(s, _mm_set1_epi8(0x2f));    \
1046
0
    hi          = _mm_shuffle_epi8(lut_hi, hi_nibbles);     \
1047
0
    lo          = _mm_shuffle_epi8(lut_lo, lo_nibbles);     \
1048
0
                                  \
1049
0
                                  \
1050
0
    if (UNEXPECTED(                       \
1051
0
      _mm_movemask_epi8(                    \
1052
0
        _mm_cmpgt_epi8(                   \
1053
0
          _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
1054
0
      break;                          \
1055
0
    } else {                         \
1056
0
      __m128i eq_2f, roll;                  \
1057
0
                                  \
1058
0
      eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));     \
1059
0
      roll = _mm_shuffle_epi8(                \
1060
0
          lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));   \
1061
0
                                  \
1062
0
      s = _mm_add_epi8(s, roll);                \
1063
0
      s = php_base64_decode_ssse3_reshuffle(s);       \
1064
0
                                  \
1065
0
      _mm_storeu_si128((__m128i *)o, s);            \
1066
0
                                  \
1067
0
      c += 16;                        \
1068
0
      o += 12;                        \
1069
0
      outl += 12;                       \
1070
0
      length -= 16;                     \
1071
0
    }                              \
1072
0
  }
1073
1074
#endif
1075
1076
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1077
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
1078
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
1079
# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
1080
zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict)
1081
# else
1082
zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
1083
# endif
1084
0
{
1085
0
  const unsigned char *c = str;
1086
0
  unsigned char *o;
1087
0
  size_t outl = 0;
1088
0
  zend_string *result;
1089
1090
0
  result = zend_string_alloc(length, 0);
1091
0
  o = (unsigned char *)ZSTR_VAL(result);
1092
1093
  /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
1094
  * https://arxiv.org/pdf/1704.00605.pdf */
1095
0
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
1096
0
  while (length > 31 + 11 + 2) {
1097
0
    __m256i lut_lo, lut_hi, lut_roll;
1098
0
    __m256i hi_nibbles, lo_nibbles, hi, lo;
1099
0
    __m256i str = _mm256_loadu_si256((__m256i *)c);
1100
1101
0
    lut_lo = _mm256_setr_epi8(
1102
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1103
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
1104
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1105
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
1106
1107
0
    lut_hi = _mm256_setr_epi8(
1108
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1109
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
1110
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1111
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
1112
1113
0
    lut_roll = _mm256_setr_epi8(
1114
0
        0,  16,  19,   4, -65, -65, -71, -71,
1115
0
        0,   0,   0,   0,   0,   0,   0,   0,
1116
0
        0,  16,  19,   4, -65, -65, -71, -71,
1117
0
        0,   0,   0,   0,   0,   0,   0,   0);
1118
1119
0
    hi_nibbles  = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
1120
0
    lo_nibbles  = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
1121
0
    hi          = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
1122
0
    lo          = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
1123
1124
0
    if (!_mm256_testz_si256(lo, hi)) {
1125
0
      break;
1126
0
    } else {
1127
0
      __m256i eq_2f, roll;
1128
0
      eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
1129
0
      roll  = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
1130
1131
1132
0
      str = _mm256_add_epi8(str, roll);
1133
1134
0
      str = php_base64_decode_avx2_reshuffle(str);
1135
1136
0
      _mm256_storeu_si256((__m256i *)o, str);
1137
1138
0
      c += 32;
1139
0
      o += 24;
1140
0
      outl += 24;
1141
0
      length -= 32;
1142
0
    }
1143
0
  }
1144
# else
1145
  PHP_BASE64_DECODE_SSSE3_LOOP;
1146
# endif
1147
1148
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1149
0
    zend_string_efree(result);
1150
0
    return NULL;
1151
0
  }
1152
1153
0
  ZSTR_LEN(result) = outl;
1154
1155
0
  return result;
1156
0
}
1157
1158
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
1159
zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
1160
0
{
1161
0
  const unsigned char *c = str;
1162
0
  unsigned char *o;
1163
0
  size_t outl = 0;
1164
0
  zend_string *result;
1165
1166
0
  result = zend_string_alloc(length, 0);
1167
0
  o = (unsigned char *)ZSTR_VAL(result);
1168
1169
0
  PHP_BASE64_DECODE_SSSE3_LOOP;
1170
1171
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1172
0
    zend_string_efree(result);
1173
0
    return NULL;
1174
0
  }
1175
1176
0
  ZSTR_LEN(result) = outl;
1177
1178
0
  return result;
1179
0
}
1180
# endif
1181
#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
1182
1183
#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1184
#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1185
zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags)
1186
#else
1187
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags)
1188
#endif
1189
0
{
1190
0
  unsigned char *p;
1191
0
  zend_string *result;
1192
1193
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
1194
0
  p = (unsigned char *)ZSTR_VAL(result);
1195
1196
0
  p = php_base64_encode_impl(str, length, p, flags);
1197
1198
0
  ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
1199
1200
0
  return result;
1201
0
}
1202
#endif
1203
1204
#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1205
#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1206
zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict)
1207
#else
1208
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
1209
#endif
1210
0
{
1211
0
  zend_string *result;
1212
0
  size_t outl = 0;
1213
1214
0
  result = zend_string_alloc(length, 0);
1215
1216
0
  if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1217
0
    zend_string_efree(result);
1218
0
    return NULL;
1219
0
  }
1220
1221
0
  ZSTR_LEN(result) = outl;
1222
1223
0
  return result;
1224
0
}
1225
#endif
1226
/* }}} */
1227
1228
/* {{{ Encodes string using MIME base64 algorithm */
1229
PHP_FUNCTION(base64_encode)
1230
0
{
1231
0
  char *str;
1232
0
  size_t str_len;
1233
0
  zend_string *result;
1234
1235
0
  ZEND_PARSE_PARAMETERS_START(1, 1)
1236
0
    Z_PARAM_STRING(str, str_len)
1237
0
  ZEND_PARSE_PARAMETERS_END();
1238
1239
0
  result = php_base64_encode((unsigned char*)str, str_len);
1240
0
  RETURN_STR(result);
1241
0
}
1242
/* }}} */
1243
1244
/* {{{ Decodes string using MIME base64 algorithm */
1245
PHP_FUNCTION(base64_decode)
1246
0
{
1247
0
  char *str;
1248
0
  bool strict = 0;
1249
0
  size_t str_len;
1250
0
  zend_string *result;
1251
1252
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
1253
0
    Z_PARAM_STRING(str, str_len)
1254
0
    Z_PARAM_OPTIONAL
1255
0
    Z_PARAM_BOOL(strict)
1256
0
  ZEND_PARSE_PARAMETERS_END();
1257
1258
0
  result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
1259
0
  if (result != NULL) {
1260
0
    RETURN_STR(result);
1261
0
  } else {
1262
0
    RETURN_FALSE;
1263
0
  }
1264
0
}
1265
/* }}} */