Coverage Report

Created: 2025-06-13 06:43

/src/php-src/ext/standard/base64.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
   +----------------------------------------------------------------------+
3
   | Copyright (c) The PHP Group                                          |
4
   +----------------------------------------------------------------------+
5
   | This source file is subject to version 3.01 of the PHP license,      |
6
   | that is bundled with this package in the file LICENSE, and is        |
7
   | available through the world-wide-web at the following url:           |
8
   | https://www.php.net/license/3_01.txt                                 |
9
   | If you did not receive a copy of the PHP license and are unable to   |
10
   | obtain it through the world-wide-web, please send a note to          |
11
   | license@php.net so we can mail you a copy immediately.               |
12
   +----------------------------------------------------------------------+
13
   | Author: Jim Winstead <jimw@php.net>                                  |
14
   |         Xinchen Hui <laruence@php.net>                               |
15
   +----------------------------------------------------------------------+
16
 */
17
18
#include <string.h>
19
20
#include "php.h"
21
#include "base64.h"
22
23
/* {{{ base64 tables */
24
static const char base64_table[] = {
25
  'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
26
  'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z',
27
  'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
28
  'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
29
  '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/', '\0'
30
};
31
32
static const char base64_pad = '=';
33
34
static const short base64_reverse_table[256] = {
35
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
36
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
37
  -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
38
  52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
39
  -2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
40
  15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
41
  -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
42
  41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
43
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
44
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
45
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
46
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
47
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
48
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
49
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
50
  -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
51
};
52
/* }}} */
53
54
#if defined(__aarch64__) || defined(_M_ARM64)
55
#include <arm_neon.h>
56
57
static zend_always_inline uint8x16_t encode_toascii(const uint8x16_t input, const uint8x16x2_t shift_LUT)
58
{
59
  /* reduce  0..51 -> 0
60
            52..61 -> 1 .. 10
61
                62 -> 11
62
                63 -> 12 */
63
  uint8x16_t result = vqsubq_u8(input, vdupq_n_u8(51));
64
  /* distinguish between ranges 0..25 and 26..51:
65
     0 .. 25 -> remains 0
66
     26 .. 51 -> becomes 13 */
67
  const uint8x16_t less = vcgtq_u8(vdupq_n_u8(26), input);
68
  result = vorrq_u8(result, vandq_u8(less, vdupq_n_u8(13)));
69
  /* read shift */
70
  result = vqtbl2q_u8(shift_LUT, result);
71
  return vaddq_u8(result, input);
72
}
73
74
static zend_always_inline unsigned char *neon_base64_encode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left)
75
{
76
  const uint8_t shift_LUT_[32] = {'a' - 26, '0' - 52, '0' - 52, '0' - 52,
77
          '0' - 52, '0' - 52, '0' - 52, '0' - 52,
78
          '0' - 52, '0' - 52, '0' - 52, '+' - 62,
79
          '/' - 63, 'A',      0,        0,
80
          'a' - 26, '0' - 52, '0' - 52, '0' - 52,
81
          '0' - 52, '0' - 52, '0' - 52, '0' - 52,
82
          '0' - 52, '0' - 52, '0' - 52, '+' - 62,
83
          '/' - 63, 'A',      0,        0};
84
  const uint8x16x2_t shift_LUT = *((const uint8x16x2_t *)shift_LUT_);
85
  do {
86
    /* [ccdddddd | bbbbcccc | aaaaaabb]
87
        x.val[2] | x.val[1] | x.val[0] */
88
    const uint8x16x3_t x = vld3q_u8((const uint8_t *)(in));
89
90
    /* [00aa_aaaa] */
91
    const uint8x16_t field_a = vshrq_n_u8(x.val[0], 2);
92
93
    const uint8x16_t field_b =             /* [00bb_bbbb] */
94
        vbslq_u8(vdupq_n_u8(0x30),         /* [0011_0000] */
95
                 vshlq_n_u8(x.val[0], 4),  /* [aabb_0000] */
96
                 vshrq_n_u8(x.val[1], 4)); /* [0000_bbbb] */
97
98
    const uint8x16_t field_c =             /* [00cc_cccc] */
99
        vbslq_u8(vdupq_n_u8(0x3c),         /* [0011_1100] */
100
                 vshlq_n_u8(x.val[1], 2),  /* [bbcc_cc00] */
101
                 vshrq_n_u8(x.val[2], 6)); /* [0000_00cc] */
102
103
    /* [00dd_dddd] */
104
    const uint8x16_t field_d = vandq_u8(x.val[2], vdupq_n_u8(0x3f));
105
106
    uint8x16x4_t result;
107
    result.val[0] = encode_toascii(field_a, shift_LUT);
108
    result.val[1] = encode_toascii(field_b, shift_LUT);
109
    result.val[2] = encode_toascii(field_c, shift_LUT);
110
    result.val[3] = encode_toascii(field_d, shift_LUT);
111
112
    vst4q_u8((uint8_t *)out, result);
113
    out += 64;
114
    in += 16 * 3;
115
    inl -= 16 * 3;
116
  } while (inl >= 16 * 3);
117
118
  *left = inl;
119
  return out;
120
}
121
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
122
123
static zend_always_inline unsigned char *php_base64_encode_impl(const unsigned char *in, size_t inl, unsigned char *out, zend_long flags) /* {{{ */
124
12
{
125
#if defined(__aarch64__) || defined(_M_ARM64)
126
  if (inl >= 16 * 3) {
127
    size_t left = 0;
128
    out = neon_base64_encode(in, inl, out, &left);
129
    in += inl - left;
130
    inl = left;
131
  }
132
#endif
133
134
79
  while (inl > 2) { /* keep going until we have less than 24 bits */
135
67
    *out++ = base64_table[in[0] >> 2];
136
67
    *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
137
67
    *out++ = base64_table[((in[1] & 0x0f) << 2) + (in[2] >> 6)];
138
67
    *out++ = base64_table[in[2] & 0x3f];
139
140
67
    in += 3;
141
67
    inl -= 3; /* we just handle 3 octets of data */
142
67
  }
143
144
  /* now deal with the tail end of things */
145
12
  if (inl != 0) {
146
12
    *out++ = base64_table[in[0] >> 2];
147
12
    if (inl > 1) {
148
10
      *out++ = base64_table[((in[0] & 0x03) << 4) + (in[1] >> 4)];
149
10
      *out++ = base64_table[(in[1] & 0x0f) << 2];
150
10
      if ((flags & PHP_BASE64_NO_PADDING) == 0) {
151
10
        *out++ = base64_pad;
152
10
      }
153
10
    } else {
154
2
      *out++ = base64_table[(in[0] & 0x03) << 4];
155
2
      if ((flags & PHP_BASE64_NO_PADDING) == 0) {
156
2
        *out++ = base64_pad;
157
2
        *out++ = base64_pad;
158
2
      }
159
2
    }
160
12
  }
161
162
12
  *out = '\0';
163
164
12
  return out;
165
12
}
166
/* }}} */
167
168
#if defined(__aarch64__) || defined(_M_ARM64)
169
static zend_always_inline uint8x16_t decode_fromascii(const uint8x16_t input, uint8x16_t *error, const uint8x16x2_t shiftLUT, const uint8x16x2_t maskLUT, const uint8x16x2_t bitposLUT) {
170
  const uint8x16_t higher_nibble = vshrq_n_u8(input, 4);
171
  const uint8x16_t lower_nibble = vandq_u8(input, vdupq_n_u8(0x0f));
172
  const uint8x16_t sh = vqtbl2q_u8(shiftLUT, higher_nibble);
173
  const uint8x16_t eq_2f = vceqq_u8(input, vdupq_n_u8(0x2f));
174
  const uint8x16_t shift = vbslq_u8(eq_2f, vdupq_n_u8(16), sh);
175
  const uint8x16_t M = vqtbl2q_u8(maskLUT, lower_nibble);
176
  const uint8x16_t bit = vqtbl2q_u8(bitposLUT, higher_nibble);
177
  *error = vceqq_u8(vandq_u8(M, bit), vdupq_n_u8(0));
178
  return vaddq_u8(input, shift);
179
}
180
181
static zend_always_inline size_t neon_base64_decode(const unsigned char *in, size_t inl, unsigned char *out, size_t *left) {
182
  unsigned char *out_orig = out;
183
  const uint8_t shiftLUT_[32] = {
184
    0,   0,  19,   4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
185
    0,   0,   0,   0,   0,   0,   0,   0,
186
    0,   0,  19,   4, (uint8_t)-65, (uint8_t)-65, (uint8_t)-71, (uint8_t)-71,
187
    0,   0,   0,   0,   0,   0,   0,   0};
188
  const uint8_t maskLUT_[32] = {
189
    /* 0        : 0b1010_1000*/ 0xa8,
190
    /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
191
    /* 10       : 0b1111_0000*/ 0xf0,
192
    /* 11       : 0b0101_0100*/ 0x54,
193
    /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
194
    /* 15       : 0b0101_0100*/ 0x54,
195
196
    /* 0        : 0b1010_1000*/ 0xa8,
197
    /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
198
    /* 10       : 0b1111_0000*/ 0xf0,
199
    /* 11       : 0b0101_0100*/ 0x54,
200
    /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
201
    /* 15       : 0b0101_0100*/ 0x54
202
  };
203
  const uint8_t bitposLUT_[32] = {
204
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
205
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
206
207
    0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
208
    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
209
  };
210
  const uint8x16x2_t shiftLUT = *((const uint8x16x2_t *)shiftLUT_);
211
  const uint8x16x2_t maskLUT = *((const uint8x16x2_t *)maskLUT_);
212
  const uint8x16x2_t bitposLUT = *((const uint8x16x2_t *)bitposLUT_);;
213
214
  do {
215
    const uint8x16x4_t x = vld4q_u8((const unsigned char *)in);
216
    uint8x16_t error_a;
217
    uint8x16_t error_b;
218
    uint8x16_t error_c;
219
    uint8x16_t error_d;
220
    uint8x16_t field_a = decode_fromascii(x.val[0], &error_a, shiftLUT, maskLUT, bitposLUT);
221
    uint8x16_t field_b = decode_fromascii(x.val[1], &error_b, shiftLUT, maskLUT, bitposLUT);
222
    uint8x16_t field_c = decode_fromascii(x.val[2], &error_c, shiftLUT, maskLUT, bitposLUT);
223
    uint8x16_t field_d = decode_fromascii(x.val[3], &error_d, shiftLUT, maskLUT, bitposLUT);
224
225
    const uint8x16_t err = vorrq_u8(vorrq_u8(error_a, error_b), vorrq_u8(error_c, error_d));
226
    union {uint8_t mem[16]; uint64_t dw[2]; } error;
227
    vst1q_u8(error.mem, err);
228
229
    /* Check that the input only contains bytes belonging to the alphabet of
230
       Base64. If there are errors, decode the rest of the string with the
231
       scalar decoder. */
232
    if (error.dw[0] | error.dw[1])
233
      break;
234
235
    uint8x16x3_t result;
236
    result.val[0] = vorrq_u8(vshrq_n_u8(field_b, 4), vshlq_n_u8(field_a, 2));
237
    result.val[1] = vorrq_u8(vshrq_n_u8(field_c, 2), vshlq_n_u8(field_b, 4));
238
    result.val[2] = vorrq_u8(field_d, vshlq_n_u8(field_c, 6));
239
240
    vst3q_u8((unsigned char *)out, result);
241
    out += 16 * 3;
242
    in += 16 * 4;
243
    inl -= 16 * 4;
244
  } while (inl >= 16 * 4);
245
  *left = inl;
246
  return out - out_orig;
247
}
248
#endif /* defined(__aarch64__) || defined(_M_ARM64) */
249
250
static zend_always_inline int php_base64_decode_impl(const unsigned char *in, size_t inl, unsigned char *out, size_t *outl, bool strict) /* {{{ */
251
0
{
252
0
  int ch;
253
0
  size_t i = 0, padding = 0, j = *outl;
254
255
#if defined(__aarch64__) || defined(_M_ARM64)
256
  if (inl >= 16 * 4) {
257
    size_t left = 0;
258
    j += neon_base64_decode(in, inl, out, &left);
259
    i = inl - left;
260
    in += i;
261
    inl = left;
262
  }
263
#endif
264
265
  /* run through the whole string, converting as we go */
266
0
  while (inl-- > 0) {
267
0
    ch = *in++;
268
0
    if (ch == base64_pad) {
269
0
      padding++;
270
0
      continue;
271
0
    }
272
273
0
    ch = base64_reverse_table[ch];
274
0
    if (!strict) {
275
      /* skip unknown characters and whitespace */
276
0
      if (ch < 0) {
277
0
        continue;
278
0
      }
279
0
    } else {
280
      /* skip whitespace */
281
0
      if (ch == -1) {
282
0
        continue;
283
0
      }
284
      /* fail on bad characters or if any data follows padding */
285
0
      if (ch == -2 || padding) {
286
0
        goto fail;
287
0
      }
288
0
    }
289
290
0
    switch (i % 4) {
291
0
      case 0:
292
0
        out[j] = ch << 2;
293
0
        break;
294
0
      case 1:
295
0
        out[j++] |= ch >> 4;
296
0
        out[j] = (ch & 0x0f) << 4;
297
0
        break;
298
0
      case 2:
299
0
        out[j++] |= ch >>2;
300
0
        out[j] = (ch & 0x03) << 6;
301
0
        break;
302
0
      case 3:
303
0
        out[j++] |= ch;
304
0
        break;
305
0
    }
306
0
    i++;
307
0
  }
308
309
  /* fail if the input is truncated (only one char in last group) */
310
0
  if (strict && i % 4 == 1) {
311
0
    goto fail;
312
0
  }
313
314
  /* fail if the padding length is wrong (not VV==, VVV=), but accept zero padding
315
   * RFC 4648: "In some circumstances, the use of padding [--] is not required" */
316
0
  if (strict && padding && (padding > 2 || (i + padding) % 4 != 0)) {
317
0
    goto fail;
318
0
  }
319
320
0
  *outl = j;
321
0
  out[j] = '\0';
322
323
0
  return 1;
324
325
0
fail:
326
0
  return 0;
327
0
}
328
/* }}} */
329
330
/* {{{ php_base64_encode */
331
332
#ifdef ZEND_INTRIN_AVX2_NATIVE
333
# undef ZEND_INTRIN_SSSE3_NATIVE
334
# undef ZEND_INTRIN_SSSE3_RESOLVER
335
# undef ZEND_INTRIN_SSSE3_FUNC_PROTO
336
# undef ZEND_INTRIN_SSSE3_FUNC_PTR
337
#elif defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_SSSE3_NATIVE)
338
# undef ZEND_INTRIN_SSSE3_NATIVE
339
# undef ZEND_INTRIN_SSSE3_RESOLVER
340
# define ZEND_INTRIN_SSSE3_RESOLVER 1
341
# define ZEND_INTRIN_SSSE3_FUNC_PROTO 1
342
# undef ZEND_INTRIN_SSSE3_FUNC_DECL
343
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
344
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
345
# else
346
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
347
# endif
348
#elif defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_SSSE3_NATIVE)
349
# undef ZEND_INTRIN_SSSE3_NATIVE
350
# undef ZEND_INTRIN_SSSE3_RESOLVER
351
# define ZEND_INTRIN_SSSE3_RESOLVER 1
352
# define ZEND_INTRIN_SSSE3_FUNC_PTR 1
353
# undef ZEND_INTRIN_SSSE3_FUNC_DECL
354
# ifdef HAVE_FUNC_ATTRIBUTE_TARGET
355
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func __attribute__((target("ssse3")))
356
# else
357
#  define ZEND_INTRIN_SSSE3_FUNC_DECL(func) ZEND_API func
358
# endif
359
#endif
360
361
/* Only enable avx512 resolver if avx2 use resolver also */
362
#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_FUNC_PROTO)
363
#define BASE64_INTRIN_AVX512_FUNC_PROTO 1
364
#endif
365
#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_FUNC_PTR)
366
#define BASE64_INTRIN_AVX512_FUNC_PTR 1
367
#endif
368
#if defined(ZEND_INTRIN_AVX2_FUNC_PROTO) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PROTO)
369
#define BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO 1
370
#endif
371
#if defined(ZEND_INTRIN_AVX2_FUNC_PTR) && defined(ZEND_INTRIN_AVX512_VBMI_FUNC_PTR)
372
#define BASE64_INTRIN_AVX512_VBMI_FUNC_PTR 1
373
#endif
374
375
#ifdef ZEND_INTRIN_AVX2_NATIVE
376
# include <immintrin.h>
377
#elif defined(ZEND_INTRIN_SSSE3_NATIVE)
378
# include <tmmintrin.h>
379
#elif defined(ZEND_INTRIN_SSSE3_RESOLVER) || defined(ZEND_INTRIN_AVX2_RESOLVER)
380
# ifdef ZEND_INTRIN_AVX2_RESOLVER
381
#  include <immintrin.h>
382
# else
383
#  include <tmmintrin.h>
384
# endif /* (ZEND_INTRIN_SSSE3_RESOLVER || ZEND_INTRIN_AVX2_RESOLVER) */
385
# include "Zend/zend_cpuinfo.h"
386
387
# if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
388
ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags));
389
ZEND_INTRIN_AVX512_FUNC_DECL(zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict));
390
# endif
391
# if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
392
ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags));
393
ZEND_INTRIN_AVX512_VBMI_FUNC_DECL(zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict));
394
# endif
395
396
# ifdef ZEND_INTRIN_AVX2_RESOLVER
397
ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags));
398
ZEND_INTRIN_AVX2_FUNC_DECL(zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict));
399
# endif
400
401
# ifdef ZEND_INTRIN_SSSE3_RESOLVER
402
ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags));
403
ZEND_INTRIN_SSSE3_FUNC_DECL(zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict));
404
# endif
405
406
zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags);
407
zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict);
408
409
# if (defined(ZEND_INTRIN_AVX2_FUNC_PROTO) || defined(ZEND_INTRIN_SSSE3_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO))
410
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) __attribute__((ifunc("resolve_base64_encode")));
411
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) __attribute__((ifunc("resolve_base64_decode")));
412
413
typedef zend_string *(*base64_encode_func_t)(const unsigned char *, size_t, zend_long flags);
414
typedef zend_string *(*base64_decode_func_t)(const unsigned char *, size_t, bool);
415
416
ZEND_NO_SANITIZE_ADDRESS
417
ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
418
16
static base64_encode_func_t resolve_base64_encode(void) {
419
16
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
420
16
  if (zend_cpu_supports_avx512_vbmi()) {
421
0
    return php_base64_encode_avx512_vbmi;
422
0
  } else
423
16
# endif
424
16
# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
425
16
  if (zend_cpu_supports_avx512()) {
426
0
    return php_base64_encode_avx512;
427
0
  } else
428
16
# endif
429
16
# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
430
16
  if (zend_cpu_supports_avx2()) {
431
16
    return php_base64_encode_avx2;
432
16
  } else
433
0
# endif
434
0
#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
435
0
  if (zend_cpu_supports_ssse3()) {
436
0
    return php_base64_encode_ssse3;
437
0
  }
438
0
#endif
439
0
  return php_base64_encode_default;
440
16
}
441
442
ZEND_NO_SANITIZE_ADDRESS
443
ZEND_ATTRIBUTE_UNUSED /* clang mistakenly warns about this */
444
16
static base64_decode_func_t resolve_base64_decode(void) {
445
16
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO
446
16
  if (zend_cpu_supports_avx512_vbmi()) {
447
0
    return php_base64_decode_ex_avx512_vbmi;
448
0
  } else
449
16
# endif
450
16
# ifdef BASE64_INTRIN_AVX512_FUNC_PROTO
451
16
  if (zend_cpu_supports_avx512()) {
452
0
    return php_base64_decode_ex_avx512;
453
0
  } else
454
16
# endif
455
16
# ifdef ZEND_INTRIN_AVX2_FUNC_PROTO
456
16
  if (zend_cpu_supports_avx2()) {
457
16
    return php_base64_decode_ex_avx2;
458
16
  } else
459
0
# endif
460
0
#ifdef ZEND_INTRIN_SSSE3_FUNC_PROTO
461
0
  if (zend_cpu_supports_ssse3()) {
462
0
    return php_base64_decode_ex_ssse3;
463
0
  }
464
0
#endif
465
0
  return php_base64_decode_ex_default;
466
16
}
467
# else /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
468
469
PHPAPI zend_string *(*php_base64_encode_ptr)(const unsigned char *str, size_t length, zend_long flags) = NULL;
470
PHPAPI zend_string *(*php_base64_decode_ex_ptr)(const unsigned char *str, size_t length, bool strict) = NULL;
471
472
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags) {
473
  return php_base64_encode_ptr(str, length, flags);
474
}
475
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict) {
476
  return php_base64_decode_ex_ptr(str, length, strict);
477
}
478
479
PHP_MINIT_FUNCTION(base64_intrin)
480
{
481
# ifdef BASE64_INTRIN_AVX512_VBMI_FUNC_PTR
482
  if (zend_cpu_supports_avx512_vbmi()) {
483
    php_base64_encode_ptr = php_base64_encode_avx512_vbmi;
484
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx512_vbmi;
485
  } else
486
# endif
487
# ifdef BASE64_INTRIN_AVX512_FUNC_PTR
488
  if (zend_cpu_supports_avx512()) {
489
    php_base64_encode_ptr = php_base64_encode_avx512;
490
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx512;
491
  } else
492
# endif
493
# ifdef ZEND_INTRIN_AVX2_FUNC_PTR
494
  if (zend_cpu_supports_avx2()) {
495
    php_base64_encode_ptr = php_base64_encode_avx2;
496
    php_base64_decode_ex_ptr = php_base64_decode_ex_avx2;
497
  } else
498
# endif
499
#ifdef ZEND_INTRIN_SSSE3_FUNC_PTR
500
  if (zend_cpu_supports_ssse3()) {
501
    php_base64_encode_ptr = php_base64_encode_ssse3;
502
    php_base64_decode_ex_ptr = php_base64_decode_ex_ssse3;
503
  } else
504
#endif
505
  {
506
    php_base64_encode_ptr = php_base64_encode_default;
507
    php_base64_decode_ex_ptr = php_base64_decode_ex_default;
508
  }
509
  return SUCCESS;
510
}
511
# endif /* (ZEND_INTRIN_AVX2_FUNC_PROTO || ZEND_INTRIN_SSSE3_FUNC_PROTO) */
512
#endif /* ZEND_INTRIN_AVX2_NATIVE */
513
514
#if defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_VBMI_FUNC_PTR)
515
zend_string *php_base64_encode_avx512_vbmi(const unsigned char *str, size_t length, zend_long flags)
516
0
{
517
0
  const unsigned char *c = str;
518
0
  unsigned char *o;
519
0
  zend_string *result;
520
521
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
522
0
  o = (unsigned char *)ZSTR_VAL(result);
523
524
0
  const __m512i shuffle_splitting = _mm512_setr_epi32(
525
0
    0x01020001, 0x04050304, 0x07080607, 0x0a0b090a, 0x0d0e0c0d, 0x10110f10,
526
0
    0x13141213, 0x16171516, 0x191a1819, 0x1c1d1b1c, 0x1f201e1f, 0x22232122,
527
0
    0x25262425, 0x28292728, 0x2b2c2a2b, 0x2e2f2d2e);
528
0
  const __m512i multi_shifts = _mm512_set1_epi64(0x3036242a1016040a);
529
0
  const char *ascii_lookup_tbl = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
530
0
  const __m512i ascii_lookup = _mm512_loadu_si512((__m512i *)ascii_lookup_tbl);
531
532
0
  while (length > 63) {
533
    /* Step 1: load input data */
534
0
    __m512i str = _mm512_loadu_si512((const __m512i *)c);
535
536
    /* Step 2: splitting 24-bit words into 32-bit lanes */
537
0
    str = _mm512_permutexvar_epi8(shuffle_splitting, str);
538
539
    /* Step 3: moving 6-bit word to sperate bytes */
540
0
    str = _mm512_multishift_epi64_epi8(multi_shifts, str);
541
542
    /* Step 4: conversion to ASCII */
543
0
    str = _mm512_permutexvar_epi8(str, ascii_lookup);
544
545
    /* Step 5: store the final result */
546
0
    _mm512_storeu_si512((__m512i *)o, str);
547
0
    c += 48;
548
0
    o += 64;
549
0
    length -= 48;
550
0
  }
551
552
0
  o = php_base64_encode_impl(c, length, o, flags);
553
554
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
555
556
0
  return result;
557
0
}
558
559
zend_string *php_base64_decode_ex_avx512_vbmi(const unsigned char *str, size_t length, bool strict)
560
0
{
561
0
  const unsigned char *c = str;
562
0
  unsigned char *o;
563
0
  size_t outl = 0;
564
0
  zend_string *result;
565
566
0
  result = zend_string_alloc(length, 0);
567
0
  o = (unsigned char *)ZSTR_VAL(result);
568
569
0
  const __m512i lookup_0 = _mm512_setr_epi32(
570
0
    0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x80808080,
571
0
    0x80808080, 0x80808080, 0x80808080, 0x80808080, 0x3e808080, 0x3f808080,
572
0
    0x37363534, 0x3b3a3938, 0x80803d3c, 0x80808080);
573
0
  const __m512i lookup_1 = _mm512_setr_epi32(
574
0
    0x02010080, 0x06050403, 0x0a090807, 0x0e0d0c0b, 0x1211100f, 0x16151413,
575
0
    0x80191817, 0x80808080, 0x1c1b1a80, 0x201f1e1d, 0x24232221, 0x28272625,
576
0
    0x2c2b2a29, 0x302f2e2d, 0x80333231, 0x80808080);
577
578
0
  const __m512i merge_mask1 = _mm512_set1_epi32(0x01400140);
579
0
  const __m512i merge_mask2 = _mm512_set1_epi32(0x00011000);
580
581
0
  const __m512i continuous_mask = _mm512_setr_epi32(
582
0
    0x06000102, 0x090a0405, 0x0c0d0e08, 0x16101112, 0x191a1415, 0x1c1d1e18,
583
0
    0x26202122, 0x292a2425, 0x2c2d2e28, 0x36303132, 0x393a3435, 0x3c3d3e38,
584
0
    0x00000000, 0x00000000, 0x00000000, 0x00000000);
585
586
0
  while (length > 64) {
587
    /* Step 1: load input data */
588
0
    const __m512i input = _mm512_loadu_si512((__m512i *)c);
589
590
    /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */
591
0
    __m512i str = _mm512_permutex2var_epi8(lookup_0, input, lookup_1);
592
0
    const uint64_t mask = _mm512_movepi8_mask(_mm512_or_epi64(str, input)); /* convert MSBs to the mask */
593
0
    if (mask) {
594
0
      break;
595
0
    }
596
597
    /* Step 3: pack four fields within 32-bit words into 24-bit words. */
598
0
    const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, merge_mask1);
599
0
    str = _mm512_madd_epi16(merge_ab_and_bc, merge_mask2);
600
601
    /* Step 4: move 3-byte words into the continuous array. */
602
0
    str = _mm512_permutexvar_epi8(continuous_mask, str);
603
604
    /* Step 5: store the final result */
605
0
    _mm512_storeu_si512((__m512i *)o, str);
606
607
0
    c += 64;
608
0
    o += 48;
609
0
    outl += 48;
610
0
    length -= 64;
611
0
  }
612
613
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
614
0
    zend_string_efree(result);
615
0
    return NULL;
616
0
  }
617
618
0
  ZSTR_LEN(result) = outl;
619
620
0
  return result;
621
0
}
622
#endif
623
624
#if defined(BASE64_INTRIN_AVX512_FUNC_PROTO) || defined(BASE64_INTRIN_AVX512_FUNC_PTR)
625
zend_string *php_base64_encode_avx512(const unsigned char *str, size_t length, zend_long flags)
626
0
{
627
0
  const unsigned char *c = str;
628
0
  unsigned char *o;
629
0
  zend_string *result;
630
631
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
632
0
  o = (unsigned char *)ZSTR_VAL(result);
633
634
0
  while (length > 63) {
635
    /* Step 1: load input data */
636
    /* [????|????|????|????|PPPO|OONN|NMMM|LLLK|KKJJ|JIII|HHHG|GGFF|FEEE|DDDC|CCBB|BAAA] */
637
0
    __m512i str = _mm512_loadu_si512((const __m512i *)c);
638
639
    /* Step 2: splitting 24-bit words into 32-bit lanes */
640
    /* [0000|PPPO|OONN|NMMM|0000|LLLK|KKJJ|JIII|0000|HHHG|GGFF|FEEE|0000|DDDC|CCBB|BAAA] */
641
0
    str = _mm512_permutexvar_epi32(
642
0
      _mm512_set_epi32(-1, 11, 10, 9, -1, 8, 7, 6, -1, 5, 4, 3, -1, 2, 1, 0), str);
643
    /* [D1 D2 D0 D1|C1 C2 C0 C1|B1 B2 B0 B1|A1 A2 A0 A1] x 4 */
644
0
    str = _mm512_shuffle_epi8(str, _mm512_set4_epi32(0x0a0b090a, 0x07080607, 0x04050304, 0x01020001));
645
646
    /* Step 3: moving 6-bit word to sperate bytes */
647
    /* in:  [bbbbcccc|ccdddddd|aaaaaabb|bbbbcccc] */
648
    /* t0:  [0000cccc|cc000000|aaaaaa00|00000000] */
649
0
    const __m512i t0 = _mm512_and_si512(str, _mm512_set1_epi32(0x0fc0fc00));
650
    /* t1:  [00000000|00cccccc|00000000|00aaaaaa] */
651
0
    const __m512i t1 = _mm512_srlv_epi16(t0, _mm512_set1_epi32(0x0006000a));
652
    /* t2:  [ccdddddd|00000000|aabbbbbb|cccc0000] */
653
0
    const __m512i t2 = _mm512_sllv_epi16(str, _mm512_set1_epi32(0x00080004));
654
    /* str: [00dddddd|00cccccc|00bbbbbb|00aaaaaa] */
655
0
    str = _mm512_ternarylogic_epi32(_mm512_set1_epi32(0x3f003f00), t2, t1, 0xca);
656
657
    /* Step 4: conversion to ASCII */
658
0
    __m512i result = _mm512_subs_epu8(str, _mm512_set1_epi8(51));
659
0
    const __mmask64 less = _mm512_cmpgt_epi8_mask(_mm512_set1_epi8(26), str);
660
0
    result = _mm512_mask_mov_epi8(result, less, _mm512_set1_epi8(13));
661
0
    const __m512i lut = _mm512_set4_epi32(0x000041f0, 0xedfcfcfc, 0xfcfcfcfc, 0xfcfcfc47);
662
0
    result = _mm512_shuffle_epi8(lut, result);
663
0
    result = _mm512_add_epi8(result, str);
664
665
    /* Step 5: store the final result */
666
0
    _mm512_storeu_si512((__m512i *)o, result);
667
0
    c += 48;
668
0
    o += 64;
669
0
    length -= 48;
670
0
  }
671
672
0
  o = php_base64_encode_impl(c, length, o, flags);
673
674
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
675
676
0
  return result;
677
0
}
678
679
#define build_dword(b0, b1, b2, b3)         \
680
  ((uint32_t)(uint8_t)b0 << 0) | ((uint32_t)(uint8_t)b1 << 8) | \
681
  ((uint32_t)(uint8_t)b2 << 16) | ((uint32_t)(uint8_t)b3 << 24)
682
683
#define _mm512_set4lanes_epi8(b0, b1, b2, b3, b4, b5, b6, b7, b8, b9, b10, b11, b12, b13, b14, b15) \
684
0
  _mm512_setr4_epi32(build_dword(b0, b1, b2, b3), build_dword(b4, b5, b6, b7),      \
685
0
         build_dword(b8, b9, b10, b11), build_dword(b12, b13, b14, b15))
686
687
zend_string *php_base64_decode_ex_avx512(const unsigned char *str, size_t length, bool strict)
688
0
{
689
0
  const unsigned char *c = str;
690
0
  unsigned char *o;
691
0
  size_t outl = 0;
692
0
  zend_string *result;
693
694
0
  result = zend_string_alloc(length, 0);
695
0
  o = (unsigned char *)ZSTR_VAL(result);
696
697
0
  while (length > 64) {
698
    /* Step 1: load input data */
699
0
    __m512i str = _mm512_loadu_si512((__m512i *)c);
700
701
    /* Step 2: translation into 6-bit values(saved on bytes) from ASCII and error detection */
702
0
    const __m512i higher_nibble = _mm512_and_si512(_mm512_srli_epi32(str, 4), _mm512_set1_epi8(0x0f));
703
0
    const __m512i lower_nibble = _mm512_and_si512(str, _mm512_set1_epi8(0x0f));
704
0
    const __m512i shiftLUT = _mm512_set4lanes_epi8(
705
0
        0, 0, 19, 4, -65, -65, -71, -71, 0, 0, 0, 0, 0, 0, 0, 0);
706
0
    const __m512i maskLUT = _mm512_set4lanes_epi8(
707
0
        /* 0        : 0b1010_1000*/ 0xa8,
708
0
        /* 1 .. 9   : 0b1111_1000*/ 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8, 0xf8,
709
0
        /* 10       : 0b1111_0000*/ 0xf0,
710
0
        /* 11       : 0b0101_0100*/ 0x54,
711
0
        /* 12 .. 14 : 0b0101_0000*/ 0x50, 0x50, 0x50,
712
0
        /* 15       : 0b0101_0100*/ 0x54);
713
0
    const __m512i bitposLUT = _mm512_set4lanes_epi8(
714
0
        0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80,
715
0
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
716
0
    const __m512i M = _mm512_shuffle_epi8(maskLUT, lower_nibble);
717
0
    const __m512i bit = _mm512_shuffle_epi8(bitposLUT, higher_nibble);
718
0
    const uint64_t match = _mm512_test_epi8_mask(M, bit);
719
0
    if (match != (uint64_t)-1) {
720
0
      break;
721
0
    }
722
0
    const __m512i sh = _mm512_shuffle_epi8(shiftLUT, higher_nibble);
723
0
    const __mmask64 eq_2f = _mm512_cmpeq_epi8_mask(str, _mm512_set1_epi8(0x2f));
724
0
    const __m512i shift = _mm512_mask_mov_epi8(sh, eq_2f, _mm512_set1_epi8(16));
725
0
    str = _mm512_add_epi8(str, shift);
726
727
    /* Step 3: pack four fields within 32-bit words into 24-bit words. */
728
0
    const __m512i merge_ab_and_bc = _mm512_maddubs_epi16(str, _mm512_set1_epi32(0x01400140));
729
0
    str = _mm512_madd_epi16(merge_ab_and_bc, _mm512_set1_epi32(0x00011000));
730
731
    /* Step 4: move 3-byte words into the continuous array. */
732
0
    const __m512i t1 = _mm512_shuffle_epi8(str,
733
0
      _mm512_set4lanes_epi8(2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
734
0
    const __m512i s6 = _mm512_setr_epi32(0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 0, 0, 0, 0);
735
0
    const __m512i t2 = _mm512_permutexvar_epi32(s6, t1);
736
737
    /* Step 5: store the final result */
738
0
    _mm512_storeu_si512((__m512i *)o, t2);
739
740
0
    c += 64;
741
0
    o += 48;
742
0
    outl += 48;
743
0
    length -= 64;
744
0
  }
745
746
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
747
0
    zend_string_efree(result);
748
0
    return NULL;
749
0
  }
750
751
0
  ZSTR_LEN(result) = outl;
752
753
0
  return result;
754
0
}
755
#endif
756
757
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
758
# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
759
static __m256i php_base64_encode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
760
static __m256i php_base64_encode_avx2_translate(__m256i in) __attribute__((target("avx2")));
761
# endif
762
static __m256i php_base64_encode_avx2_reshuffle(__m256i in)
763
17
{
764
  /* This one works with shifted (4 bytes) input in order to
765
   * be able to work efficiently in the 2 128-bit lanes */
766
17
  __m256i t0, t1, t2, t3;
767
768
  /* input, bytes MSB to LSB:
769
   * 0 0 0 0 x w v u t s r q p o n m
770
   * l k j i h g f e d c b a 0 0 0 0 */
771
17
  in = _mm256_shuffle_epi8(in, _mm256_set_epi8(
772
17
    10, 11,  9, 10,
773
17
     7,  8,  6,  7,
774
17
     4,  5,  3,  4,
775
17
     1,  2,  0,  1,
776
777
17
    14, 15, 13, 14,
778
17
    11, 12, 10, 11,
779
17
     8,  9,  7,  8,
780
17
     5,  6,  4,  5));
781
782
17
  t0 = _mm256_and_si256(in, _mm256_set1_epi32(0x0fc0fc00));
783
784
17
  t1 = _mm256_mulhi_epu16(t0, _mm256_set1_epi32(0x04000040));
785
786
17
  t2 = _mm256_and_si256(in, _mm256_set1_epi32(0x003f03f0));
787
788
17
  t3 = _mm256_mullo_epi16(t2, _mm256_set1_epi32(0x01000010));
789
790
17
  return _mm256_or_si256(t1, t3);
791
  /* 00xxxxxx 00wwwwXX 00vvWWWW 00VVVVVV
792
   * 00uuuuuu 00ttttUU 00ssTTTT 00SSSSSS
793
   * 00rrrrrr 00qqqqRR 00ppQQQQ 00PPPPPP
794
   * 00oooooo 00nnnnOO 00mmNNNN 00MMMMMM
795
   * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
796
   * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
797
   * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
798
   * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
799
17
}
800
801
static __m256i php_base64_encode_avx2_translate(__m256i in)
802
17
{
803
17
  __m256i lut, indices, mask;
804
805
17
  lut = _mm256_setr_epi8(
806
17
      65, 71, -4, -4, -4, -4, -4, -4,
807
17
      -4, -4, -4, -4, -19, -16, 0, 0,
808
17
      65, 71, -4, -4, -4, -4, -4, -4,
809
17
      -4, -4, -4, -4, -19, -16, 0, 0);
810
811
17
  indices = _mm256_subs_epu8(in, _mm256_set1_epi8(51));
812
813
17
  mask = _mm256_cmpgt_epi8(in, _mm256_set1_epi8(25));
814
815
17
  indices = _mm256_sub_epi8(indices, mask);
816
817
17
  return _mm256_add_epi8(in, _mm256_shuffle_epi8(lut, indices));
818
819
17
}
820
#endif /* ZEND_INTRIN_AVX2_NATIVE || (ZEND_INTRIN_AVX2_RESOLVER && !ZEND_INTRIN_SSSE3_NATIVE) */
821
822
#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
823
824
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
825
static __m128i php_base64_encode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
826
static __m128i php_base64_encode_ssse3_translate(__m128i in) __attribute__((target("ssse3")));
827
# endif
828
829
static __m128i php_base64_encode_ssse3_reshuffle(__m128i in)
830
0
{
831
0
  __m128i t0, t1, t2, t3;
832
833
  /* input, bytes MSB to LSB:
834
   * 0 0 0 0 l k j i h g f e d c b a */
835
0
  in = _mm_shuffle_epi8(in, _mm_set_epi8(
836
0
        10, 11,  9, 10,
837
0
        7,  8,  6,  7,
838
0
        4,  5,  3,  4,
839
0
        1,  2,  0,  1));
840
841
0
  t0 = _mm_and_si128(in, _mm_set1_epi32(0x0fc0fc00));
842
843
0
  t1 = _mm_mulhi_epu16(t0, _mm_set1_epi32(0x04000040));
844
845
0
  t2 = _mm_and_si128(in, _mm_set1_epi32(0x003f03f0));
846
847
0
  t3 = _mm_mullo_epi16(t2, _mm_set1_epi32(0x01000010));
848
849
  /* output (upper case are MSB, lower case are LSB):
850
   * 00llllll 00kkkkLL 00jjKKKK 00JJJJJJ
851
   * 00iiiiii 00hhhhII 00ggHHHH 00GGGGGG
852
   * 00ffffff 00eeeeFF 00ddEEEE 00DDDDDD
853
   * 00cccccc 00bbbbCC 00aaBBBB 00AAAAAA */
854
0
  return _mm_or_si128(t1, t3);
855
0
}
856
857
static __m128i php_base64_encode_ssse3_translate(__m128i in)
858
0
{
859
0
  __m128i mask, indices;
860
0
  __m128i lut = _mm_setr_epi8(
861
0
      65,  71, -4, -4,
862
0
      -4,  -4, -4, -4,
863
0
      -4,  -4, -4, -4,
864
0
      -19, -16,  0,  0
865
0
      );
866
867
  /* Translate values 0..63 to the Base64 alphabet. There are five sets:
868
   * #  From      To         Abs    Index  Characters
869
   * 0  [0..25]   [65..90]   +65        0  ABCDEFGHIJKLMNOPQRSTUVWXYZ
870
   * 1  [26..51]  [97..122]  +71        1  abcdefghijklmnopqrstuvwxyz
871
   * 2  [52..61]  [48..57]    -4  [2..11]  0123456789
872
   * 3  [62]      [43]       -19       12  +
873
   * 4  [63]      [47]       -16       13  / */
874
875
  /* Create LUT indices from input:
876
   * the index for range #0 is right, others are 1 less than expected: */
877
0
  indices = _mm_subs_epu8(in, _mm_set1_epi8(51));
878
879
  /* mask is 0xFF (-1) for range #[1..4] and 0x00 for range #0: */
880
0
  mask = _mm_cmpgt_epi8(in, _mm_set1_epi8(25));
881
882
  /* subtract -1, so add 1 to indices for range #[1..4], All indices are now correct: */
883
0
  indices = _mm_sub_epi8(indices, mask);
884
885
  /* Add offsets to input values: */
886
0
  return _mm_add_epi8(in, _mm_shuffle_epi8(lut, indices));
887
0
}
888
889
#define PHP_BASE64_ENCODE_SSSE3_LOOP        \
890
0
  while (length > 15) {             \
891
0
    __m128i s = _mm_loadu_si128((__m128i *)c);  \
892
0
                          \
893
0
    s = php_base64_encode_ssse3_reshuffle(s); \
894
0
                          \
895
0
    s = php_base64_encode_ssse3_translate(s); \
896
0
                          \
897
0
    _mm_storeu_si128((__m128i *)o, s);      \
898
0
    c += 12;                  \
899
0
    o += 16;                  \
900
0
    length -= 12;               \
901
0
  }
902
903
#endif /* ZEND_INTRIN_SSSE3_NATIVE || (ZEND_INTRIN_SSSE3_RESOLVER && !ZEND_INTRIN_AVX2_NATIVE) */
904
905
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
906
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
907
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags)
908
# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
909
zend_string *php_base64_encode_avx2(const unsigned char *str, size_t length, zend_long flags)
910
# else /* ZEND_INTRIN_SSSE3_RESOLVER */
911
zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)
912
# endif
913
12
{
914
12
  const unsigned char *c = str;
915
12
  unsigned char *o;
916
12
  zend_string *result;
917
918
12
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
919
12
  o = (unsigned char *)ZSTR_VAL(result);
920
12
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
921
12
  if (length > 31) {
922
5
    __m256i s = _mm256_loadu_si256((__m256i *)c);
923
924
5
    s = _mm256_permutevar8x32_epi32(s, _mm256_setr_epi32(0, 0, 1, 2, 3, 4, 5, 6));
925
926
17
    for (;;) {
927
17
      s = php_base64_encode_avx2_reshuffle(s);
928
929
17
      s = php_base64_encode_avx2_translate(s);
930
931
17
      _mm256_storeu_si256((__m256i *)o, s);
932
17
      c += 24;
933
17
      o += 32;
934
17
      length -= 24;
935
17
      if (length < 28) {
936
5
        break;
937
5
      }
938
12
      s = _mm256_loadu_si256((__m256i *)(c - 4));
939
12
    }
940
5
  }
941
# else
942
  PHP_BASE64_ENCODE_SSSE3_LOOP;
943
# endif
944
945
12
  o = php_base64_encode_impl(c, length, o, flags);
946
947
12
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
948
949
12
  return result;
950
12
}
951
952
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
953
zend_string *php_base64_encode_ssse3(const unsigned char *str, size_t length, zend_long flags)
954
0
{
955
0
  const unsigned char *c = str;
956
0
  unsigned char *o;
957
0
  zend_string *result;
958
959
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
960
0
  o = (unsigned char *)ZSTR_VAL(result);
961
962
0
  PHP_BASE64_ENCODE_SSSE3_LOOP;
963
964
0
  o = php_base64_encode_impl(c, length, o, flags);
965
966
0
  ZSTR_LEN(result) = (o - (unsigned char *)ZSTR_VAL(result));
967
968
0
  return result;
969
0
}
970
# endif
971
#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
972
973
/* }}} */
974
975
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
976
# if defined(ZEND_INTRIN_AVX2_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
977
static __m256i php_base64_decode_avx2_reshuffle(__m256i in) __attribute__((target("avx2")));
978
# endif
979
980
static __m256i php_base64_decode_avx2_reshuffle(__m256i in)
981
0
{
982
0
  __m256i merge_ab_and_bc, out;
983
984
0
  merge_ab_and_bc = _mm256_maddubs_epi16(in, _mm256_set1_epi32(0x01400140));
985
986
0
  out = _mm256_madd_epi16(merge_ab_and_bc, _mm256_set1_epi32(0x00011000));
987
988
0
  out = _mm256_shuffle_epi8(out, _mm256_setr_epi8(
989
0
        2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1,
990
0
        2, 1, 0, 6, 5, 4, 10, 9, 8, 14, 13, 12, -1, -1, -1, -1));
991
992
0
  return _mm256_permutevar8x32_epi32(out, _mm256_setr_epi32(0, 1, 2, 4, 5, 6, -1, -1));
993
0
}
994
#endif
995
996
#if defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
997
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(HAVE_FUNC_ATTRIBUTE_TARGET)
998
static __m128i php_base64_decode_ssse3_reshuffle(__m128i in) __attribute__((target("ssse3")));
999
# endif
1000
1001
static __m128i php_base64_decode_ssse3_reshuffle(__m128i in)
1002
0
{
1003
0
  __m128i merge_ab_and_bc, out;
1004
1005
0
  merge_ab_and_bc = _mm_maddubs_epi16(in, _mm_set1_epi32(0x01400140));
1006
  /* 0000kkkk LLllllll 0000JJJJ JJjjKKKK
1007
   * 0000hhhh IIiiiiii 0000GGGG GGggHHHH
1008
   * 0000eeee FFffffff 0000DDDD DDddEEEE
1009
   * 0000bbbb CCcccccc 0000AAAA AAaaBBBB */
1010
1011
0
  out = _mm_madd_epi16(merge_ab_and_bc, _mm_set1_epi32(0x00011000));
1012
  /* 00000000 JJJJJJjj KKKKkkkk LLllllll
1013
   * 00000000 GGGGGGgg HHHHhhhh IIiiiiii
1014
   * 00000000 DDDDDDdd EEEEeeee FFffffff
1015
   * 00000000 AAAAAAaa BBBBbbbb CCcccccc */
1016
1017
0
  return  _mm_shuffle_epi8(out, _mm_setr_epi8(
1018
0
     2,  1,  0,
1019
0
     6,  5,  4,
1020
0
    10,  9,  8,
1021
0
    14, 13, 12,
1022
0
    -1, -1, -1, -1));
1023
  /* 00000000 00000000 00000000 00000000
1024
   * LLllllll KKKKkkkk JJJJJJjj IIiiiiii
1025
   * HHHHhhhh GGGGGGgg FFffffff EEEEeeee
1026
   * DDDDDDdd CCcccccc BBBBbbbb AAAAAAaa */
1027
0
}
1028
1029
#define PHP_BASE64_DECODE_SSSE3_LOOP                \
1030
0
  while (length > 15 + 6 + 2) {                 \
1031
0
    __m128i lut_lo, lut_hi, lut_roll;             \
1032
0
    __m128i hi_nibbles, lo_nibbles, hi, lo;           \
1033
0
    __m128i s = _mm_loadu_si128((__m128i *)c);          \
1034
0
                                  \
1035
0
    lut_lo = _mm_setr_epi8(                   \
1036
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,   \
1037
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);  \
1038
0
    lut_hi = _mm_setr_epi8(                   \
1039
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,   \
1040
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);  \
1041
0
    lut_roll = _mm_setr_epi8(                 \
1042
0
        0,  16,  19,   4, -65, -65, -71, -71,       \
1043
0
        0,   0,   0,   0,   0,   0,   0,   0);        \
1044
0
                                  \
1045
0
    hi_nibbles  = _mm_and_si128(                \
1046
0
            _mm_srli_epi32(s, 4), _mm_set1_epi8(0x2f)); \
1047
0
    lo_nibbles  = _mm_and_si128(s, _mm_set1_epi8(0x2f));    \
1048
0
    hi          = _mm_shuffle_epi8(lut_hi, hi_nibbles);     \
1049
0
    lo          = _mm_shuffle_epi8(lut_lo, lo_nibbles);     \
1050
0
                                  \
1051
0
                                  \
1052
0
    if (UNEXPECTED(                       \
1053
0
      _mm_movemask_epi8(                    \
1054
0
        _mm_cmpgt_epi8(                   \
1055
0
          _mm_and_si128(lo, hi), _mm_set1_epi8(0))))) { \
1056
0
      break;                          \
1057
0
    } else {                         \
1058
0
      __m128i eq_2f, roll;                  \
1059
0
                                  \
1060
0
      eq_2f = _mm_cmpeq_epi8(s, _mm_set1_epi8(0x2f));     \
1061
0
      roll = _mm_shuffle_epi8(                \
1062
0
          lut_roll, _mm_add_epi8(eq_2f, hi_nibbles));   \
1063
0
                                  \
1064
0
      s = _mm_add_epi8(s, roll);                \
1065
0
      s = php_base64_decode_ssse3_reshuffle(s);       \
1066
0
                                  \
1067
0
      _mm_storeu_si128((__m128i *)o, s);            \
1068
0
                                  \
1069
0
      c += 16;                        \
1070
0
      o += 12;                        \
1071
0
      outl += 12;                       \
1072
0
      length -= 16;                     \
1073
0
    }                              \
1074
0
  }
1075
1076
#endif
1077
1078
#if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_NATIVE) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1079
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_SSSE3_NATIVE)
1080
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
1081
# elif defined(ZEND_INTRIN_AVX2_RESOLVER)
1082
zend_string *php_base64_decode_ex_avx2(const unsigned char *str, size_t length, bool strict)
1083
# else
1084
zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
1085
# endif
1086
0
{
1087
0
  const unsigned char *c = str;
1088
0
  unsigned char *o;
1089
0
  size_t outl = 0;
1090
0
  zend_string *result;
1091
1092
0
  result = zend_string_alloc(length, 0);
1093
0
  o = (unsigned char *)ZSTR_VAL(result);
1094
1095
  /* See: "Faster Base64 Encoding and Decoding using AVX2 Instructions"
1096
  * https://arxiv.org/pdf/1704.00605.pdf */
1097
0
# if defined(ZEND_INTRIN_AVX2_NATIVE) || defined(ZEND_INTRIN_AVX2_RESOLVER)
1098
0
  while (length > 31 + 11 + 2) {
1099
0
    __m256i lut_lo, lut_hi, lut_roll;
1100
0
    __m256i hi_nibbles, lo_nibbles, hi, lo;
1101
0
    __m256i str = _mm256_loadu_si256((__m256i *)c);
1102
1103
0
    lut_lo = _mm256_setr_epi8(
1104
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1105
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A,
1106
0
        0x15, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, 0x11,
1107
0
        0x11, 0x11, 0x13, 0x1A, 0x1B, 0x1B, 0x1B, 0x1A);
1108
1109
0
    lut_hi = _mm256_setr_epi8(
1110
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1111
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10,
1112
0
        0x10, 0x10, 0x01, 0x02, 0x04, 0x08, 0x04, 0x08,
1113
0
        0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10);
1114
1115
0
    lut_roll = _mm256_setr_epi8(
1116
0
        0,  16,  19,   4, -65, -65, -71, -71,
1117
0
        0,   0,   0,   0,   0,   0,   0,   0,
1118
0
        0,  16,  19,   4, -65, -65, -71, -71,
1119
0
        0,   0,   0,   0,   0,   0,   0,   0);
1120
1121
0
    hi_nibbles  = _mm256_and_si256(_mm256_srli_epi32(str, 4), _mm256_set1_epi8(0x2f));
1122
0
    lo_nibbles  = _mm256_and_si256(str, _mm256_set1_epi8(0x2f));
1123
0
    hi          = _mm256_shuffle_epi8(lut_hi, hi_nibbles);
1124
0
    lo          = _mm256_shuffle_epi8(lut_lo, lo_nibbles);
1125
1126
0
    if (!_mm256_testz_si256(lo, hi)) {
1127
0
      break;
1128
0
    } else {
1129
0
      __m256i eq_2f, roll;
1130
0
      eq_2f = _mm256_cmpeq_epi8(str, _mm256_set1_epi8(0x2f));
1131
0
      roll  = _mm256_shuffle_epi8(lut_roll, _mm256_add_epi8(eq_2f, hi_nibbles));
1132
1133
1134
0
      str = _mm256_add_epi8(str, roll);
1135
1136
0
      str = php_base64_decode_avx2_reshuffle(str);
1137
1138
0
      _mm256_storeu_si256((__m256i *)o, str);
1139
1140
0
      c += 32;
1141
0
      o += 24;
1142
0
      outl += 24;
1143
0
      length -= 32;
1144
0
    }
1145
0
  }
1146
# else
1147
  PHP_BASE64_DECODE_SSSE3_LOOP;
1148
# endif
1149
1150
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1151
0
    zend_string_efree(result);
1152
0
    return NULL;
1153
0
  }
1154
1155
0
  ZSTR_LEN(result) = outl;
1156
1157
0
  return result;
1158
0
}
1159
1160
# if defined(ZEND_INTRIN_SSSE3_RESOLVER) && defined(ZEND_INTRIN_AVX2_RESOLVER)
1161
zend_string *php_base64_decode_ex_ssse3(const unsigned char *str, size_t length, bool strict)
1162
0
{
1163
0
  const unsigned char *c = str;
1164
0
  unsigned char *o;
1165
0
  size_t outl = 0;
1166
0
  zend_string *result;
1167
1168
0
  result = zend_string_alloc(length, 0);
1169
0
  o = (unsigned char *)ZSTR_VAL(result);
1170
1171
0
  PHP_BASE64_DECODE_SSSE3_LOOP;
1172
1173
0
  if (!php_base64_decode_impl(c, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1174
0
    zend_string_efree(result);
1175
0
    return NULL;
1176
0
  }
1177
1178
0
  ZSTR_LEN(result) = outl;
1179
1180
0
  return result;
1181
0
}
1182
# endif
1183
#endif /* ZEND_INTRIN_AVX2_NATIVE || ZEND_INTRIN_AVX2_RESOLVER || ZEND_INTRIN_SSSE3_NATIVE || ZEND_INTRIN_SSSE3_RESOLVER */
1184
1185
#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1186
#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1187
zend_string *php_base64_encode_default(const unsigned char *str, size_t length, zend_long flags)
1188
#else
1189
PHPAPI zend_string *php_base64_encode_ex(const unsigned char *str, size_t length, zend_long flags)
1190
#endif
1191
0
{
1192
0
  unsigned char *p;
1193
0
  zend_string *result;
1194
1195
0
  result = zend_string_safe_alloc(((length + 2) / 3), 4 * sizeof(char), 0, 0);
1196
0
  p = (unsigned char *)ZSTR_VAL(result);
1197
1198
0
  p = php_base64_encode_impl(str, length, p, flags);
1199
1200
0
  ZSTR_LEN(result) = (p - (unsigned char *)ZSTR_VAL(result));
1201
1202
0
  return result;
1203
0
}
1204
#endif
1205
1206
#if !defined(ZEND_INTRIN_AVX2_NATIVE) && !defined(ZEND_INTRIN_SSSE3_NATIVE)
1207
#if defined(ZEND_INTRIN_AVX2_RESOLVER) || defined(ZEND_INTRIN_SSSE3_RESOLVER)
1208
zend_string *php_base64_decode_ex_default(const unsigned char *str, size_t length, bool strict)
1209
#else
1210
PHPAPI zend_string *php_base64_decode_ex(const unsigned char *str, size_t length, bool strict)
1211
#endif
1212
0
{
1213
0
  zend_string *result;
1214
0
  size_t outl = 0;
1215
1216
0
  result = zend_string_alloc(length, 0);
1217
1218
0
  if (!php_base64_decode_impl(str, length, (unsigned char*)ZSTR_VAL(result), &outl, strict)) {
1219
0
    zend_string_efree(result);
1220
0
    return NULL;
1221
0
  }
1222
1223
0
  ZSTR_LEN(result) = outl;
1224
1225
0
  return result;
1226
0
}
1227
#endif
1228
/* }}} */
1229
1230
/* {{{ Encodes string using MIME base64 algorithm */
1231
PHP_FUNCTION(base64_encode)
1232
12
{
1233
12
  char *str;
1234
12
  size_t str_len;
1235
12
  zend_string *result;
1236
1237
36
  ZEND_PARSE_PARAMETERS_START(1, 1)
1238
48
    Z_PARAM_STRING(str, str_len)
1239
12
  ZEND_PARSE_PARAMETERS_END();
1240
1241
12
  result = php_base64_encode((unsigned char*)str, str_len);
1242
12
  RETURN_STR(result);
1243
12
}
1244
/* }}} */
1245
1246
/* {{{ Decodes string using MIME base64 algorithm */
1247
PHP_FUNCTION(base64_decode)
1248
0
{
1249
0
  char *str;
1250
0
  bool strict = 0;
1251
0
  size_t str_len;
1252
0
  zend_string *result;
1253
1254
0
  ZEND_PARSE_PARAMETERS_START(1, 2)
1255
0
    Z_PARAM_STRING(str, str_len)
1256
0
    Z_PARAM_OPTIONAL
1257
0
    Z_PARAM_BOOL(strict)
1258
0
  ZEND_PARSE_PARAMETERS_END();
1259
1260
0
  result = php_base64_decode_ex((unsigned char*)str, str_len, strict);
1261
0
  if (result != NULL) {
1262
0
    RETURN_STR(result);
1263
0
  } else {
1264
0
    RETURN_FALSE;
1265
0
  }
1266
0
}
1267
/* }}} */