Coverage Report

Created: 2025-08-29 06:43

/src/simdutf/src/scalar/base64.h
Line
Count
Source (jump to first uncovered line)
1
#ifndef SIMDUTF_BASE64_H
2
#define SIMDUTF_BASE64_H
3
4
#include <algorithm>
5
#include <cstddef>
6
#include <cstdint>
7
#include <cstring>
8
#include <iostream>
9
10
namespace simdutf {
11
namespace scalar {
12
namespace {
13
namespace base64 {
14
15
// This function is not expected to be fast. Do not use in long loops.
16
// In most instances you should be using is_ignorable.
17
template <class char_type> bool is_ascii_white_space(char_type c) {
18
  return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
19
}
20
21
0
template <class char_type> bool is_eight_byte(char_type c) {
22
0
  if (sizeof(char_type) == 1) {
23
0
    return true;
24
0
  }
25
0
  return uint8_t(c) == c;
26
0
}
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char>(char)
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char16_t>(char16_t)
27
28
template <class char_type>
29
0
bool is_ignorable(char_type c, simdutf::base64_options options) {
30
0
  const uint8_t *to_base64 =
31
0
      (options & base64_default_or_url)
32
0
          ? tables::base64::to_base64_default_or_url_value
33
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
34
0
                                    : tables::base64::to_base64_value);
35
0
  const bool ignore_garbage =
36
0
      (options == base64_options::base64_url_accept_garbage) ||
37
0
      (options == base64_options::base64_default_accept_garbage) ||
38
0
      (options == base64_options::base64_default_or_url_accept_garbage);
39
0
  uint8_t code = to_base64[uint8_t(c)];
40
0
  if (is_eight_byte(c) && code <= 63) {
41
0
    return false;
42
0
  }
43
0
  if (is_eight_byte(c) && code == 64) {
44
0
    return true;
45
0
  }
46
0
  return ignore_garbage;
47
0
}
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char>(char, simdutf::base64_options)
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char16_t>(char16_t, simdutf::base64_options)
48
template <class char_type>
49
0
bool is_base64(char_type c, simdutf::base64_options options) {
50
0
  const uint8_t *to_base64 =
51
0
      (options & base64_default_or_url)
52
0
          ? tables::base64::to_base64_default_or_url_value
53
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
54
0
                                    : tables::base64::to_base64_value);
55
0
  uint8_t code = to_base64[uint8_t(c)];
56
0
  if (is_eight_byte(c) && code <= 63) {
57
0
    return true;
58
0
  }
59
0
  return false;
60
0
}
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char>(char, simdutf::base64_options)
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char16_t>(char16_t, simdutf::base64_options)
61
62
template <class char_type>
63
0
bool is_base64_or_padding(char_type c, simdutf::base64_options options) {
64
0
  const uint8_t *to_base64 =
65
0
      (options & base64_default_or_url)
66
0
          ? tables::base64::to_base64_default_or_url_value
67
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
68
0
                                    : tables::base64::to_base64_value);
69
0
  if (c == '=') {
70
0
    return true;
71
0
  }
72
0
  uint8_t code = to_base64[uint8_t(c)];
73
0
  if (is_eight_byte(c) && code <= 63) {
74
0
    return true;
75
0
  }
76
0
  return false;
77
0
}
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char>(char, simdutf::base64_options)
Unexecuted instantiation: simdutf.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char16_t>(char16_t, simdutf::base64_options)
78
79
template <class char_type>
80
bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) {
81
  return is_ignorable(c, options) || c == '=';
82
}
83
84
struct reduced_input {
85
  size_t equalsigns;    // number of padding characters '=', typically 0, 1, 2.
86
  size_t equallocation; // location of the first padding character if any
87
  size_t srclen;        // length of the input buffer before padding
88
  size_t full_input_length; // length of the input buffer with padding but
89
                            // without ignorable characters
90
};
91
92
// find the end of the base64 input buffer
93
// It returns the number of padding characters, the location of the first
94
// padding character if any, the length of the input buffer before padding
95
// and the length of the input buffer with padding. The input buffer is not
96
// modified. The function assumes that there are at most two padding characters.
97
template <class char_type>
98
reduced_input find_end(const char_type *src, size_t srclen,
99
0
                       simdutf::base64_options options) {
100
0
  const uint8_t *to_base64 =
101
0
      (options & base64_default_or_url)
102
0
          ? tables::base64::to_base64_default_or_url_value
103
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
104
0
                                    : tables::base64::to_base64_value);
105
0
  const bool ignore_garbage =
106
0
      (options == base64_options::base64_url_accept_garbage) ||
107
0
      (options == base64_options::base64_default_accept_garbage) ||
108
0
      (options == base64_options::base64_default_or_url_accept_garbage);
109
110
0
  size_t equalsigns = 0;
111
  // We intentionally include trailing spaces in the full input length.
112
  // See https://github.com/simdutf/simdutf/issues/824
113
0
  size_t full_input_length = srclen;
114
  // skip trailing spaces
115
0
  while (!ignore_garbage && srclen > 0 &&
116
0
         scalar::base64::is_eight_byte(src[srclen - 1]) &&
117
0
         to_base64[uint8_t(src[srclen - 1])] == 64) {
118
0
    srclen--;
119
0
  }
120
0
  size_t equallocation =
121
0
      srclen; // location of the first padding character if any
122
0
  if (ignore_garbage) {
123
    // Technically, we don't need to find the first padding character, we can
124
    // just change our algorithms, but it adds substantial complexity.
125
0
    auto it = simdutf::find(src, src + srclen, '=');
126
0
    if (it != src + srclen) {
127
0
      equallocation = it - src;
128
0
      equalsigns = 1;
129
0
      srclen = equallocation;
130
0
      full_input_length = equallocation + 1;
131
0
    }
132
0
    return {equalsigns, equallocation, srclen, full_input_length};
133
0
  }
134
0
  if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') {
135
    // This is the last '=' sign.
136
0
    equallocation = srclen - 1;
137
0
    srclen--;
138
0
    equalsigns = 1;
139
    // skip trailing spaces
140
0
    while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) &&
141
0
           to_base64[uint8_t(src[srclen - 1])] == 64) {
142
0
      srclen--;
143
0
    }
144
0
    if (srclen > 0 && src[srclen - 1] == '=') {
145
      // This is the second '=' sign.
146
0
      equallocation = srclen - 1;
147
0
      srclen--;
148
0
      equalsigns = 2;
149
0
    }
150
0
  }
151
0
  return {equalsigns, equallocation, srclen, full_input_length};
152
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::scalar::(anonymous namespace)::base64::reduced_input simdutf::scalar::(anonymous namespace)::base64::find_end<char>(char const*, unsigned long, simdutf::base64_options)
Unexecuted instantiation: simdutf.cpp:simdutf::scalar::(anonymous namespace)::base64::reduced_input simdutf::scalar::(anonymous namespace)::base64::find_end<char16_t>(char16_t const*, unsigned long, simdutf::base64_options)
153
154
// Returns true upon success. The destination buffer must be large enough.
155
// This functions assumes that the padding (=) has been removed.
156
// if check_capacity is true, it will check that the destination buffer is
157
// large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL.
158
template <bool check_capacity, class char_type>
159
full_result base64_tail_decode_impl(
160
    char *dst, size_t outlen, const char_type *src, size_t length,
161
    size_t padding_characters, // number of padding characters
162
                               // '=', typically 0, 1, 2.
163
0
    base64_options options, last_chunk_handling_options last_chunk_options) {
164
0
  char *dstend = dst + outlen;
165
0
  (void)dstend;
166
  // This looks like 10 branches, but we expect the compiler to resolve this to
167
  // two branches (easily predicted):
168
0
  const uint8_t *to_base64 =
169
0
      (options & base64_default_or_url)
170
0
          ? tables::base64::to_base64_default_or_url_value
171
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
172
0
                                    : tables::base64::to_base64_value);
173
0
  const uint32_t *d0 =
174
0
      (options & base64_default_or_url)
175
0
          ? tables::base64::base64_default_or_url::d0
176
0
          : ((options & base64_url) ? tables::base64::base64_url::d0
177
0
                                    : tables::base64::base64_default::d0);
178
0
  const uint32_t *d1 =
179
0
      (options & base64_default_or_url)
180
0
          ? tables::base64::base64_default_or_url::d1
181
0
          : ((options & base64_url) ? tables::base64::base64_url::d1
182
0
                                    : tables::base64::base64_default::d1);
183
0
  const uint32_t *d2 =
184
0
      (options & base64_default_or_url)
185
0
          ? tables::base64::base64_default_or_url::d2
186
0
          : ((options & base64_url) ? tables::base64::base64_url::d2
187
0
                                    : tables::base64::base64_default::d2);
188
0
  const uint32_t *d3 =
189
0
      (options & base64_default_or_url)
190
0
          ? tables::base64::base64_default_or_url::d3
191
0
          : ((options & base64_url) ? tables::base64::base64_url::d3
192
0
                                    : tables::base64::base64_default::d3);
193
0
  const bool ignore_garbage =
194
0
      (options == base64_options::base64_url_accept_garbage) ||
195
0
      (options == base64_options::base64_default_accept_garbage) ||
196
0
      (options == base64_options::base64_default_or_url_accept_garbage);
197
198
0
  const char_type *srcend = src + length;
199
0
  const char_type *srcinit = src;
200
0
  const char *dstinit = dst;
201
202
0
  uint32_t x;
203
0
  size_t idx;
204
0
  uint8_t buffer[4];
205
0
  while (true) {
206
0
    while (src + 4 <= srcend && is_eight_byte(src[0]) &&
207
0
           is_eight_byte(src[1]) && is_eight_byte(src[2]) &&
208
0
           is_eight_byte(src[3]) &&
209
0
           (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] |
210
0
                d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) {
211
0
      if (match_system(endianness::BIG)) {
212
0
        x = scalar::u32_swap_bytes(x);
213
0
      }
214
0
      if (check_capacity && dstend - dst < 3) {
215
0
        return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit),
216
0
                size_t(dst - dstinit)};
217
0
      }
218
0
      std::memcpy(dst, &x, 3); // optimization opportunity: copy 4 bytes
219
0
      dst += 3;
220
0
      src += 4;
221
0
    }
222
0
    const char_type *srccur = src;
223
0
    idx = 0;
224
    // we need at least four characters.
225
0
#ifdef __clang__
226
    // If possible, we read four characters at a time. (It is an optimization.)
227
0
    if (ignore_garbage && src + 4 <= srcend) {
228
0
      char_type c0 = src[0];
229
0
      char_type c1 = src[1];
230
0
      char_type c2 = src[2];
231
0
      char_type c3 = src[3];
232
233
0
      uint8_t code0 = to_base64[uint8_t(c0)];
234
0
      uint8_t code1 = to_base64[uint8_t(c1)];
235
0
      uint8_t code2 = to_base64[uint8_t(c2)];
236
0
      uint8_t code3 = to_base64[uint8_t(c3)];
237
238
0
      buffer[idx] = code0;
239
0
      idx += (is_eight_byte(c0) && code0 <= 63);
240
0
      buffer[idx] = code1;
241
0
      idx += (is_eight_byte(c1) && code1 <= 63);
242
0
      buffer[idx] = code2;
243
0
      idx += (is_eight_byte(c2) && code2 <= 63);
244
0
      buffer[idx] = code3;
245
0
      idx += (is_eight_byte(c3) && code3 <= 63);
246
0
      src += 4;
247
0
    }
248
0
#endif
249
0
    while ((idx < 4) && (src < srcend)) {
250
0
      char_type c = *src;
251
252
0
      uint8_t code = to_base64[uint8_t(c)];
253
0
      buffer[idx] = uint8_t(code);
254
0
      if (is_eight_byte(c) && code <= 63) {
255
0
        idx++;
256
0
      } else if (!ignore_garbage &&
257
0
                 (code > 64 || !scalar::base64::is_eight_byte(c))) {
258
0
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
259
0
                size_t(dst - dstinit)};
260
0
      } else {
261
        // We have a space or a newline or garbage. We ignore it.
262
0
      }
263
0
      src++;
264
0
    }
265
0
    if (idx != 4) {
266
0
      simdutf_log_assert(idx < 4, "idx should be less than 4");
267
      // We never should have that the number of base64 characters + the
268
      // number of padding characters is more than 4.
269
0
      if (!ignore_garbage && (idx + padding_characters > 4)) {
270
0
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
271
0
                size_t(dst - dstinit), true};
272
0
      }
273
274
      // The idea here is that in loose mode,
275
      // if there is padding at all, it must be used
276
      // to form 4-wise chunk. However, in loose mode,
277
      // we do accept no padding at all.
278
0
      if (!ignore_garbage &&
279
0
          last_chunk_options == last_chunk_handling_options::loose &&
280
0
          (idx >= 2) && padding_characters > 0 &&
281
0
          ((idx + padding_characters) & 3) != 0) {
282
0
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
283
0
                size_t(dst - dstinit), true};
284
0
      } else
285
286
        // The idea here is that in strict mode, we do not want to accept
287
        // incomplete base64 chunks. So if the chunk was otherwise valid, we
288
        // return BASE64_INPUT_REMAINDER.
289
0
        if (!ignore_garbage &&
290
0
            last_chunk_options == last_chunk_handling_options::strict &&
291
0
            (idx >= 2) && ((idx + padding_characters) & 3) != 0) {
292
          // The partial chunk was at src - idx
293
0
          return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
294
0
                  size_t(dst - dstinit), true};
295
0
        } else
296
          // If there is a partial chunk with insufficient padding, with
297
          // stop_before_partial, we need to just ignore it. In "only full"
298
          // mode, skip the minute there are padding characters.
299
0
          if ((last_chunk_options ==
300
0
                   last_chunk_handling_options::stop_before_partial &&
301
0
               (padding_characters + idx < 4) && (idx != 0) &&
302
0
               (idx >= 2 || padding_characters == 0)) ||
303
0
              (last_chunk_options ==
304
0
                   last_chunk_handling_options::only_full_chunks &&
305
0
               (idx >= 2 || padding_characters == 0))) {
306
            // partial means that we are *not* going to consume the read
307
            // characters. We need to rewind the src pointer.
308
0
            src = srccur;
309
0
            return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
310
0
          } else {
311
0
            if (idx == 2) {
312
0
              uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) +
313
0
                                (uint32_t(buffer[1]) << 2 * 6);
314
0
              if (!ignore_garbage &&
315
0
                  (last_chunk_options == last_chunk_handling_options::strict) &&
316
0
                  (triple & 0xffff)) {
317
0
                return {BASE64_EXTRA_BITS, size_t(src - srcinit),
318
0
                        size_t(dst - dstinit)};
319
0
              }
320
0
              if (check_capacity && dstend - dst < 1) {
321
0
                return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
322
0
                        size_t(dst - dstinit)};
323
0
              }
324
0
              if (match_system(endianness::BIG)) {
325
0
                triple <<= 8;
326
0
                std::memcpy(dst, &triple, 1);
327
0
              } else {
328
0
                triple = scalar::u32_swap_bytes(triple);
329
0
                triple >>= 8;
330
0
                std::memcpy(dst, &triple, 1);
331
0
              }
332
0
              dst += 1;
333
0
            } else if (idx == 3) {
334
0
              uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) +
335
0
                                (uint32_t(buffer[1]) << 2 * 6) +
336
0
                                (uint32_t(buffer[2]) << 1 * 6);
337
0
              if (!ignore_garbage &&
338
0
                  (last_chunk_options == last_chunk_handling_options::strict) &&
339
0
                  (triple & 0xff)) {
340
0
                return {BASE64_EXTRA_BITS, size_t(src - srcinit),
341
0
                        size_t(dst - dstinit)};
342
0
              }
343
0
              if (check_capacity && dstend - dst < 2) {
344
0
                return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
345
0
                        size_t(dst - dstinit)};
346
0
              }
347
0
              if (match_system(endianness::BIG)) {
348
0
                triple <<= 8;
349
0
                std::memcpy(dst, &triple, 2);
350
0
              } else {
351
0
                triple = scalar::u32_swap_bytes(triple);
352
0
                triple >>= 8;
353
0
                std::memcpy(dst, &triple, 2);
354
0
              }
355
0
              dst += 2;
356
0
            } else if (!ignore_garbage && idx == 1 &&
357
0
                       (!is_partial(last_chunk_options) ||
358
0
                        (is_partial(last_chunk_options) &&
359
0
                         padding_characters > 0))) {
360
0
              return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
361
0
                      size_t(dst - dstinit)};
362
0
            } else if (!ignore_garbage && idx == 0 && padding_characters > 0) {
363
0
              return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
364
0
                      size_t(dst - dstinit), true};
365
0
            }
366
0
            return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
367
0
          }
368
0
    }
369
0
    if (check_capacity && dstend - dst < 3) {
370
0
      return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
371
0
              size_t(dst - dstinit)};
372
0
    }
373
0
    uint32_t triple =
374
0
        (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) +
375
0
        (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6);
376
0
    if (match_system(endianness::BIG)) {
377
0
      triple <<= 8;
378
0
      std::memcpy(dst, &triple, 3);
379
0
    } else {
380
0
      triple = scalar::u32_swap_bytes(triple);
381
0
      triple >>= 8;
382
0
      std::memcpy(dst, &triple, 3);
383
0
    }
384
0
    dst += 3;
385
0
  }
386
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<true, char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<true, char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<false, char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_impl<false, char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
387
388
template <class char_type>
389
full_result
390
base64_tail_decode(char *dst, const char_type *src, size_t length,
391
                   size_t padding_characters, // number of padding characters
392
                                              // '=', typically 0, 1, 2.
393
                   base64_options options,
394
0
                   last_chunk_handling_options last_chunk_options) {
395
0
  return base64_tail_decode_impl<false>(dst, 0, src, length, padding_characters,
396
0
                                        options, last_chunk_options);
397
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode<char>(char*, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode<char16_t>(char*, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
398
399
// like base64_tail_decode, but it will not write past the end of the output
400
// buffer. The outlen parameter is modified to reflect the number of bytes
401
// written. This functions assumes that the padding (=) has been removed.
402
//
403
template <class char_type>
404
full_result base64_tail_decode_safe(
405
    char *dst, size_t outlen, const char_type *src, size_t length,
406
    size_t padding_characters, // number of padding characters
407
                               // '=', typically 0, 1, 2.
408
0
    base64_options options, last_chunk_handling_options last_chunk_options) {
409
0
  return base64_tail_decode_impl<true>(dst, outlen, src, length,
410
0
                                       padding_characters, options,
411
0
                                       last_chunk_options);
412
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_safe<char>(char*, unsigned long, char const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_tail_decode_safe<char16_t>(char*, unsigned long, char16_t const*, unsigned long, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
413
414
inline full_result
415
patch_tail_result(full_result r, size_t previous_input, size_t previous_output,
416
                  size_t equallocation, size_t full_input_length,
417
0
                  last_chunk_handling_options last_chunk_options) {
418
0
  r.input_count += previous_input;
419
0
  r.output_count += previous_output;
420
0
  if (r.padding_error) {
421
0
    r.input_count = equallocation;
422
0
  }
423
424
0
  if (r.error == error_code::SUCCESS) {
425
0
    if (!is_partial(last_chunk_options)) {
426
      // A success when we are not in stop_before_partial mode.
427
      // means that we have consumed the whole input buffer.
428
0
      r.input_count = full_input_length;
429
0
    } else if (r.output_count % 3 != 0) {
430
0
      r.input_count = full_input_length;
431
0
    }
432
0
  }
433
0
  return r;
434
0
}
435
436
// Returns the number of bytes written. The destination buffer must be large
437
// enough. It will add padding (=) if needed.
438
size_t tail_encode_base64(char *dst, const char *src, size_t srclen,
439
0
                          base64_options options) {
440
  // By default, we use padding if we are not using the URL variant.
441
  // This is check with ((options & base64_url) == 0) which returns true if we
442
  // are not using the URL variant. However, we also allow 'inversion' of the
443
  // convention with the base64_reverse_padding option. If the
444
  // base64_reverse_padding option is set, we use padding if we are using the
445
  // URL variant, and we omit it if we are not using the URL variant. This is
446
  // checked with
447
  // ((options & base64_reverse_padding) == base64_reverse_padding).
448
0
  bool use_padding =
449
0
      ((options & base64_url) == 0) ^
450
0
      ((options & base64_reverse_padding) == base64_reverse_padding);
451
  // This looks like 3 branches, but we expect the compiler to resolve this to
452
  // a single branch:
453
0
  const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0
454
0
                                          : tables::base64::base64_default::e0;
455
0
  const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1
456
0
                                          : tables::base64::base64_default::e1;
457
0
  const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2
458
0
                                          : tables::base64::base64_default::e2;
459
0
  char *out = dst;
460
0
  size_t i = 0;
461
0
  uint8_t t1, t2, t3;
462
0
  for (; i + 2 < srclen; i += 3) {
463
0
    t1 = uint8_t(src[i]);
464
0
    t2 = uint8_t(src[i + 1]);
465
0
    t3 = uint8_t(src[i + 2]);
466
0
    *out++ = e0[t1];
467
0
    *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
468
0
    *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
469
0
    *out++ = e2[t3];
470
0
  }
471
0
  switch (srclen - i) {
472
0
  case 0:
473
0
    break;
474
0
  case 1:
475
0
    t1 = uint8_t(src[i]);
476
0
    *out++ = e0[t1];
477
0
    *out++ = e1[(t1 & 0x03) << 4];
478
0
    if (use_padding) {
479
0
      *out++ = '=';
480
0
      *out++ = '=';
481
0
    }
482
0
    break;
483
0
  default: /* case 2 */
484
0
    t1 = uint8_t(src[i]);
485
0
    t2 = uint8_t(src[i + 1]);
486
0
    *out++ = e0[t1];
487
0
    *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
488
0
    *out++ = e2[(t2 & 0x0F) << 2];
489
0
    if (use_padding) {
490
0
      *out++ = '=';
491
0
    }
492
0
  }
493
0
  return (size_t)(out - dst);
494
0
}
495
496
template <class char_type>
497
simdutf_warn_unused size_t maximal_binary_length_from_base64(
498
0
    const char_type *input, size_t length) noexcept {
499
  // We follow https://infra.spec.whatwg.org/#forgiving-base64-decode
500
0
  size_t padding = 0;
501
0
  if (length > 0) {
502
0
    if (input[length - 1] == '=') {
503
0
      padding++;
504
0
      if (length > 1 && input[length - 2] == '=') {
505
0
        padding++;
506
0
      }
507
0
    }
508
0
  }
509
0
  size_t actual_length = length - padding;
510
0
  if (actual_length % 4 <= 1) {
511
0
    return actual_length / 4 * 3;
512
0
  }
513
  // if we have a valid input, then the remainder must be 2 or 3 adding one or
514
  // two extra bytes.
515
0
  return actual_length / 4 * 3 + (actual_length % 4) - 1;
516
0
}
Unexecuted instantiation: simdutf.cpp:unsigned long simdutf::scalar::(anonymous namespace)::base64::maximal_binary_length_from_base64<char>(char const*, unsigned long)
Unexecuted instantiation: simdutf.cpp:unsigned long simdutf::scalar::(anonymous namespace)::base64::maximal_binary_length_from_base64<char16_t>(char16_t const*, unsigned long)
517
518
template <typename char_type>
519
simdutf_warn_unused full_result base64_to_binary_details_impl(
520
    const char_type *input, size_t length, char *output, base64_options options,
521
0
    last_chunk_handling_options last_chunk_options) noexcept {
522
0
  const bool ignore_garbage =
523
0
      (options == base64_options::base64_url_accept_garbage) ||
524
0
      (options == base64_options::base64_default_accept_garbage) ||
525
0
      (options == base64_options::base64_default_or_url_accept_garbage);
526
0
  auto ri = simdutf::scalar::base64::find_end(input, length, options);
527
0
  size_t equallocation = ri.equallocation;
528
0
  size_t equalsigns = ri.equalsigns;
529
0
  length = ri.srclen;
530
0
  size_t full_input_length = ri.full_input_length;
531
0
  if (length == 0) {
532
0
    if (!ignore_garbage && equalsigns > 0) {
533
0
      return {INVALID_BASE64_CHARACTER, equallocation, 0};
534
0
    }
535
0
    return {SUCCESS, full_input_length, 0};
536
0
  }
537
0
  full_result r = scalar::base64::base64_tail_decode(
538
0
      output, input, length, equalsigns, options, last_chunk_options);
539
0
  r = scalar::base64::patch_tail_result(r, 0, 0, equallocation,
540
0
                                        full_input_length, last_chunk_options);
541
0
  if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
542
0
      equalsigns > 0 && !ignore_garbage) {
543
    // additional checks
544
0
    if ((r.output_count % 3 == 0) ||
545
0
        ((r.output_count % 3) + 1 + equalsigns != 4)) {
546
0
      return {INVALID_BASE64_CHARACTER, equallocation, r.output_count};
547
0
    }
548
0
  }
549
  // When is_partial(last_chunk_options) is true, we must either end with
550
  // the end of the stream (beyond whitespace) or right after a non-ignorable
551
  // character or at the very beginning of the stream.
552
  // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
553
0
  if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
554
0
      r.input_count < full_input_length) {
555
    // First check if we can extend the input to the end of the stream
556
0
    while (r.input_count < full_input_length &&
557
0
           base64_ignorable(*(input + r.input_count), options)) {
558
0
      r.input_count++;
559
0
    }
560
    // If we are still not at the end of the stream, then we must backtrack
561
    // to the last non-ignorable character.
562
0
    if (r.input_count < full_input_length) {
563
0
      while (r.input_count > 0 &&
564
0
             base64_ignorable(*(input + r.input_count - 1), options)) {
565
0
        r.input_count--;
566
0
      }
567
0
    }
568
0
  }
569
0
  return r;
570
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_impl<char>(char const*, unsigned long, char*, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_impl<char16_t>(char16_t const*, unsigned long, char*, simdutf::base64_options, simdutf::last_chunk_handling_options)
571
572
template <typename char_type>
573
simdutf_warn_unused full_result base64_to_binary_details_safe_impl(
574
    const char_type *input, size_t length, char *output, size_t outlen,
575
    base64_options options,
576
0
    last_chunk_handling_options last_chunk_options) noexcept {
577
0
  const bool ignore_garbage =
578
0
      (options == base64_options::base64_url_accept_garbage) ||
579
0
      (options == base64_options::base64_default_accept_garbage) ||
580
0
      (options == base64_options::base64_default_or_url_accept_garbage);
581
0
  auto ri = simdutf::scalar::base64::find_end(input, length, options);
582
0
  size_t equallocation = ri.equallocation;
583
0
  size_t equalsigns = ri.equalsigns;
584
0
  length = ri.srclen;
585
0
  size_t full_input_length = ri.full_input_length;
586
0
  if (length == 0) {
587
0
    if (!ignore_garbage && equalsigns > 0) {
588
0
      return {INVALID_BASE64_CHARACTER, equallocation, 0};
589
0
    }
590
0
    return {SUCCESS, full_input_length, 0};
591
0
  }
592
0
  full_result r = scalar::base64::base64_tail_decode_safe(
593
0
      output, outlen, input, length, equalsigns, options, last_chunk_options);
594
0
  r = scalar::base64::patch_tail_result(r, 0, 0, equallocation,
595
0
                                        full_input_length, last_chunk_options);
596
0
  if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
597
0
      equalsigns > 0 && !ignore_garbage) {
598
    // additional checks
599
0
    if ((r.output_count % 3 == 0) ||
600
0
        ((r.output_count % 3) + 1 + equalsigns != 4)) {
601
0
      return {INVALID_BASE64_CHARACTER, equallocation, r.output_count};
602
0
    }
603
0
  }
604
605
  // When is_partial(last_chunk_options) is true, we must either end with
606
  // the end of the stream (beyond whitespace) or right after a non-ignorable
607
  // character or at the very beginning of the stream.
608
  // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
609
0
  if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
610
0
      r.input_count < full_input_length) {
611
    // First check if we can extend the input to the end of the stream
612
0
    while (r.input_count < full_input_length &&
613
0
           base64_ignorable(*(input + r.input_count), options)) {
614
0
      r.input_count++;
615
0
    }
616
    // If we are still not at the end of the stream, then we must backtrack
617
    // to the last non-ignorable character.
618
0
    if (r.input_count < full_input_length) {
619
0
      while (r.input_count > 0 &&
620
0
             base64_ignorable(*(input + r.input_count - 1), options)) {
621
0
        r.input_count--;
622
0
      }
623
0
    }
624
0
  }
625
0
  return r;
626
0
}
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_safe_impl<char>(char const*, unsigned long, char*, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
Unexecuted instantiation: simdutf.cpp:simdutf::full_result simdutf::scalar::(anonymous namespace)::base64::base64_to_binary_details_safe_impl<char16_t>(char16_t const*, unsigned long, char*, unsigned long, simdutf::base64_options, simdutf::last_chunk_handling_options)
627
628
simdutf_warn_unused size_t
629
0
base64_length_from_binary(size_t length, base64_options options) noexcept {
630
  // By default, we use padding if we are not using the URL variant.
631
  // This is check with ((options & base64_url) == 0) which returns true if we
632
  // are not using the URL variant. However, we also allow 'inversion' of the
633
  // convention with the base64_reverse_padding option. If the
634
  // base64_reverse_padding option is set, we use padding if we are using the
635
  // URL variant, and we omit it if we are not using the URL variant. This is
636
  // checked with
637
  // ((options & base64_reverse_padding) == base64_reverse_padding).
638
0
  bool use_padding =
639
0
      ((options & base64_url) == 0) ^
640
0
      ((options & base64_reverse_padding) == base64_reverse_padding);
641
0
  if (!use_padding) {
642
0
    return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0);
643
0
  }
644
0
  return (length + 2) / 3 *
645
0
         4; // We use padding to make the length a multiple of 4.
646
0
}
647
648
// Return the length of the prefix that contains count base64 characters.
649
// Thus, if count is 3, the function returns the length of the prefix
650
// that contains 3 base64 characters.
651
// The function returns (size_t)-1 if there is not enough base64 characters in
652
// the input.
653
template <typename char_type>
654
simdutf_warn_unused size_t prefix_length(size_t count,
655
                                         simdutf::base64_options options,
656
                                         const char_type *input,
657
                                         size_t length) noexcept {
658
  size_t i = 0;
659
  while (i < length && is_ignorable(input[i], options)) {
660
    i++;
661
  }
662
  if (count == 0) {
663
    return i; // duh!
664
  }
665
  for (; i < length; i++) {
666
    if (is_ignorable(input[i], options)) {
667
      continue;
668
    }
669
    // We have a base64 character or a padding character.
670
    count--;
671
    if (count == 0) {
672
      return i + 1;
673
    }
674
  }
675
  simdutf_log_assert(false, "You never get here");
676
677
  return -1; // should never happen
678
}
679
680
} // namespace base64
681
} // unnamed namespace
682
} // namespace scalar
683
} // namespace simdutf
684
685
#endif