Coverage Report

Created: 2026-01-09 06:57

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/work/include/simdutf/scalar/base64.h
Line
Count
Source
1
#ifndef SIMDUTF_BASE64_H
2
#define SIMDUTF_BASE64_H
3
4
#include <algorithm>
5
#include <cstddef>
6
#include <cstdint>
7
#include <cstring>
8
#include <iostream>
9
10
namespace simdutf {
11
namespace scalar {
12
namespace {
13
namespace base64 {
14
15
// This function is not expected to be fast. Do not use in long loops.
16
// In most instances you should be using is_ignorable.
17
template <class char_type> bool is_ascii_white_space(char_type c) {
18
  return c == ' ' || c == '\t' || c == '\n' || c == '\r' || c == '\f';
19
}
20
21
0
template <class char_type> simdutf_constexpr23 bool is_eight_byte(char_type c) {
22
0
  if simdutf_constexpr (sizeof(char_type) == 1) {
23
0
    return true;
24
0
  }
25
0
  return uint8_t(c) == c;
26
0
}
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char>(char)
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_eight_byte<char16_t>(char16_t)
27
28
template <class char_type>
29
simdutf_constexpr23 bool is_ignorable(char_type c,
30
0
                                      simdutf::base64_options options) {
31
0
  const uint8_t *to_base64 =
32
0
      (options & base64_default_or_url)
33
0
          ? tables::base64::to_base64_default_or_url_value
34
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
35
0
                                    : tables::base64::to_base64_value);
36
0
  const bool ignore_garbage =
37
0
      (options == base64_options::base64_url_accept_garbage) ||
38
0
      (options == base64_options::base64_default_accept_garbage) ||
39
0
      (options == base64_options::base64_default_or_url_accept_garbage);
40
0
  uint8_t code = to_base64[uint8_t(c)];
41
0
  if (is_eight_byte(c) && code <= 63) {
42
0
    return false;
43
0
  }
44
0
  if (is_eight_byte(c) && code == 64) {
45
0
    return true;
46
0
  }
47
0
  return ignore_garbage;
48
0
}
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char>(char, simdutf::base64_options)
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_ignorable<char16_t>(char16_t, simdutf::base64_options)
49
template <class char_type>
50
simdutf_constexpr23 bool is_base64(char_type c,
51
0
                                   simdutf::base64_options options) {
52
0
  const uint8_t *to_base64 =
53
0
      (options & base64_default_or_url)
54
0
          ? tables::base64::to_base64_default_or_url_value
55
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
56
0
                                    : tables::base64::to_base64_value);
57
0
  uint8_t code = to_base64[uint8_t(c)];
58
0
  if (is_eight_byte(c) && code <= 63) {
59
0
    return true;
60
0
  }
61
0
  return false;
62
0
}
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char>(char, simdutf::base64_options)
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64<char16_t>(char16_t, simdutf::base64_options)
63
64
template <class char_type>
65
simdutf_constexpr23 bool is_base64_or_padding(char_type c,
66
0
                                              simdutf::base64_options options) {
67
0
  const uint8_t *to_base64 =
68
0
      (options & base64_default_or_url)
69
0
          ? tables::base64::to_base64_default_or_url_value
70
0
          : ((options & base64_url) ? tables::base64::to_base64_url_value
71
0
                                    : tables::base64::to_base64_value);
72
0
  if (c == '=') {
73
0
    return true;
74
0
  }
75
0
  uint8_t code = to_base64[uint8_t(c)];
76
0
  if (is_eight_byte(c) && code <= 63) {
77
0
    return true;
78
0
  }
79
0
  return false;
80
0
}
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char>(char, simdutf::base64_options)
Unexecuted instantiation: base64.cpp:bool simdutf::scalar::(anonymous namespace)::base64::is_base64_or_padding<char16_t>(char16_t, simdutf::base64_options)
81
82
template <class char_type>
83
bool is_ignorable_or_padding(char_type c, simdutf::base64_options options) {
84
  return is_ignorable(c, options) || c == '=';
85
}
86
87
struct reduced_input {
88
  size_t equalsigns;    // number of padding characters '=', typically 0, 1, 2.
89
  size_t equallocation; // location of the first padding character if any
90
  size_t srclen;        // length of the input buffer before padding
91
  size_t full_input_length; // length of the input buffer with padding but
92
                            // without ignorable characters
93
};
94
95
// find the end of the base64 input buffer
96
// It returns the number of padding characters, the location of the first
97
// padding character if any, the length of the input buffer before padding
98
// and the length of the input buffer with padding. The input buffer is not
99
// modified. The function assumes that there are at most two padding characters.
100
template <class char_type>
101
simdutf_constexpr23 reduced_input find_end(const char_type *src, size_t srclen,
102
                                           simdutf::base64_options options) {
103
  const uint8_t *to_base64 =
104
      (options & base64_default_or_url)
105
          ? tables::base64::to_base64_default_or_url_value
106
          : ((options & base64_url) ? tables::base64::to_base64_url_value
107
                                    : tables::base64::to_base64_value);
108
  const bool ignore_garbage =
109
      (options == base64_options::base64_url_accept_garbage) ||
110
      (options == base64_options::base64_default_accept_garbage) ||
111
      (options == base64_options::base64_default_or_url_accept_garbage);
112
113
  size_t equalsigns = 0;
114
  // We intentionally include trailing spaces in the full input length.
115
  // See https://github.com/simdutf/simdutf/issues/824
116
  size_t full_input_length = srclen;
117
  // skip trailing spaces
118
  while (!ignore_garbage && srclen > 0 &&
119
         scalar::base64::is_eight_byte(src[srclen - 1]) &&
120
         to_base64[uint8_t(src[srclen - 1])] == 64) {
121
    srclen--;
122
  }
123
  size_t equallocation =
124
      srclen; // location of the first padding character if any
125
  if (ignore_garbage) {
126
    // Technically, we don't need to find the first padding character, we can
127
    // just change our algorithms, but it adds substantial complexity.
128
    auto it = simdutf::find(src, src + srclen, '=');
129
    if (it != src + srclen) {
130
      equallocation = it - src;
131
      equalsigns = 1;
132
      srclen = equallocation;
133
      full_input_length = equallocation + 1;
134
    }
135
    return {equalsigns, equallocation, srclen, full_input_length};
136
  }
137
  if (!ignore_garbage && srclen > 0 && src[srclen - 1] == '=') {
138
    // This is the last '=' sign.
139
    equallocation = srclen - 1;
140
    srclen--;
141
    equalsigns = 1;
142
    // skip trailing spaces
143
    while (srclen > 0 && scalar::base64::is_eight_byte(src[srclen - 1]) &&
144
           to_base64[uint8_t(src[srclen - 1])] == 64) {
145
      srclen--;
146
    }
147
    if (srclen > 0 && src[srclen - 1] == '=') {
148
      // This is the second '=' sign.
149
      equallocation = srclen - 1;
150
      srclen--;
151
      equalsigns = 2;
152
    }
153
  }
154
  return {equalsigns, equallocation, srclen, full_input_length};
155
}
156
157
// Returns true upon success. The destination buffer must be large enough.
158
// This functions assumes that the padding (=) has been removed.
159
// if check_capacity is true, it will check that the destination buffer is
160
// large enough. If it is not, it will return OUTPUT_BUFFER_TOO_SMALL.
161
template <bool check_capacity, class char_type>
162
simdutf_constexpr23 full_result base64_tail_decode_impl(
163
    char *dst, size_t outlen, const char_type *src, size_t length,
164
    size_t padding_characters, // number of padding characters
165
                               // '=', typically 0, 1, 2.
166
    base64_options options, last_chunk_handling_options last_chunk_options) {
167
  char *dstend = dst + outlen;
168
  (void)dstend;
169
  // This looks like 10 branches, but we expect the compiler to resolve this to
170
  // two branches (easily predicted):
171
  const uint8_t *to_base64 =
172
      (options & base64_default_or_url)
173
          ? tables::base64::to_base64_default_or_url_value
174
          : ((options & base64_url) ? tables::base64::to_base64_url_value
175
                                    : tables::base64::to_base64_value);
176
  const uint32_t *d0 =
177
      (options & base64_default_or_url)
178
          ? tables::base64::base64_default_or_url::d0
179
          : ((options & base64_url) ? tables::base64::base64_url::d0
180
                                    : tables::base64::base64_default::d0);
181
  const uint32_t *d1 =
182
      (options & base64_default_or_url)
183
          ? tables::base64::base64_default_or_url::d1
184
          : ((options & base64_url) ? tables::base64::base64_url::d1
185
                                    : tables::base64::base64_default::d1);
186
  const uint32_t *d2 =
187
      (options & base64_default_or_url)
188
          ? tables::base64::base64_default_or_url::d2
189
          : ((options & base64_url) ? tables::base64::base64_url::d2
190
                                    : tables::base64::base64_default::d2);
191
  const uint32_t *d3 =
192
      (options & base64_default_or_url)
193
          ? tables::base64::base64_default_or_url::d3
194
          : ((options & base64_url) ? tables::base64::base64_url::d3
195
                                    : tables::base64::base64_default::d3);
196
  const bool ignore_garbage =
197
      (options == base64_options::base64_url_accept_garbage) ||
198
      (options == base64_options::base64_default_accept_garbage) ||
199
      (options == base64_options::base64_default_or_url_accept_garbage);
200
201
  const char_type *srcend = src + length;
202
  const char_type *srcinit = src;
203
  const char *dstinit = dst;
204
205
  uint32_t x;
206
  size_t idx;
207
  uint8_t buffer[4];
208
  while (true) {
209
    while (srcend - src >= 4 && is_eight_byte(src[0]) &&
210
           is_eight_byte(src[1]) && is_eight_byte(src[2]) &&
211
           is_eight_byte(src[3]) &&
212
           (x = d0[uint8_t(src[0])] | d1[uint8_t(src[1])] |
213
                d2[uint8_t(src[2])] | d3[uint8_t(src[3])]) < 0x01FFFFFF) {
214
      if (check_capacity && dstend - dst < 3) {
215
        return {OUTPUT_BUFFER_TOO_SMALL, size_t(src - srcinit),
216
                size_t(dst - dstinit)};
217
      }
218
      *dst++ = static_cast<char>(x & 0xFF);
219
      *dst++ = static_cast<char>((x >> 8) & 0xFF);
220
      *dst++ = static_cast<char>((x >> 16) & 0xFF);
221
      src += 4;
222
    }
223
    const char_type *srccur = src;
224
    idx = 0;
225
    // we need at least four characters.
226
#ifdef __clang__
227
    // If possible, we read four characters at a time. (It is an optimization.)
228
    if (ignore_garbage && src + 4 <= srcend) {
229
      char_type c0 = src[0];
230
      char_type c1 = src[1];
231
      char_type c2 = src[2];
232
      char_type c3 = src[3];
233
234
      uint8_t code0 = to_base64[uint8_t(c0)];
235
      uint8_t code1 = to_base64[uint8_t(c1)];
236
      uint8_t code2 = to_base64[uint8_t(c2)];
237
      uint8_t code3 = to_base64[uint8_t(c3)];
238
239
      buffer[idx] = code0;
240
      idx += (is_eight_byte(c0) && code0 <= 63);
241
      buffer[idx] = code1;
242
      idx += (is_eight_byte(c1) && code1 <= 63);
243
      buffer[idx] = code2;
244
      idx += (is_eight_byte(c2) && code2 <= 63);
245
      buffer[idx] = code3;
246
      idx += (is_eight_byte(c3) && code3 <= 63);
247
      src += 4;
248
    }
249
#endif
250
    while ((idx < 4) && (src < srcend)) {
251
      char_type c = *src;
252
253
      uint8_t code = to_base64[uint8_t(c)];
254
      buffer[idx] = uint8_t(code);
255
      if (is_eight_byte(c) && code <= 63) {
256
        idx++;
257
      } else if (!ignore_garbage &&
258
                 (code > 64 || !scalar::base64::is_eight_byte(c))) {
259
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
260
                size_t(dst - dstinit)};
261
      } else {
262
        // We have a space or a newline or garbage. We ignore it.
263
      }
264
      src++;
265
    }
266
    if (idx != 4) {
267
      simdutf_log_assert(idx < 4, "idx should be less than 4");
268
      // We never should have that the number of base64 characters + the
269
      // number of padding characters is more than 4.
270
      if (!ignore_garbage && (idx + padding_characters > 4)) {
271
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
272
                size_t(dst - dstinit), true};
273
      }
274
275
      // The idea here is that in loose mode,
276
      // if there is padding at all, it must be used
277
      // to form 4-wise chunk. However, in loose mode,
278
      // we do accept no padding at all.
279
      if (!ignore_garbage &&
280
          last_chunk_options == last_chunk_handling_options::loose &&
281
          (idx >= 2) && padding_characters > 0 &&
282
          ((idx + padding_characters) & 3) != 0) {
283
        return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
284
                size_t(dst - dstinit), true};
285
      } else
286
287
        // The idea here is that in strict mode, we do not want to accept
288
        // incomplete base64 chunks. So if the chunk was otherwise valid, we
289
        // return BASE64_INPUT_REMAINDER.
290
        if (!ignore_garbage &&
291
            last_chunk_options == last_chunk_handling_options::strict &&
292
            (idx >= 2) && ((idx + padding_characters) & 3) != 0) {
293
          // The partial chunk was at src - idx
294
          return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
295
                  size_t(dst - dstinit), true};
296
        } else
297
          // If there is a partial chunk with insufficient padding, with
298
          // stop_before_partial, we need to just ignore it. In "only full"
299
          // mode, skip the minute there are padding characters.
300
          if ((last_chunk_options ==
301
                   last_chunk_handling_options::stop_before_partial &&
302
               (padding_characters + idx < 4) && (idx != 0) &&
303
               (idx >= 2 || padding_characters == 0)) ||
304
              (last_chunk_options ==
305
                   last_chunk_handling_options::only_full_chunks &&
306
               (idx >= 2 || padding_characters == 0))) {
307
            // partial means that we are *not* going to consume the read
308
            // characters. We need to rewind the src pointer.
309
            src = srccur;
310
            return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
311
          } else {
312
            if (idx == 2) {
313
              uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) +
314
                                (uint32_t(buffer[1]) << 2 * 6);
315
              if (!ignore_garbage &&
316
                  (last_chunk_options == last_chunk_handling_options::strict) &&
317
                  (triple & 0xffff)) {
318
                return {BASE64_EXTRA_BITS, size_t(src - srcinit),
319
                        size_t(dst - dstinit)};
320
              }
321
              if (check_capacity && dstend - dst < 1) {
322
                return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
323
                        size_t(dst - dstinit)};
324
              }
325
              *dst++ = static_cast<char>((triple >> 16) & 0xFF);
326
            } else if (idx == 3) {
327
              uint32_t triple = (uint32_t(buffer[0]) << 3 * 6) +
328
                                (uint32_t(buffer[1]) << 2 * 6) +
329
                                (uint32_t(buffer[2]) << 1 * 6);
330
              if (!ignore_garbage &&
331
                  (last_chunk_options == last_chunk_handling_options::strict) &&
332
                  (triple & 0xff)) {
333
                return {BASE64_EXTRA_BITS, size_t(src - srcinit),
334
                        size_t(dst - dstinit)};
335
              }
336
              if (check_capacity && dstend - dst < 2) {
337
                return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
338
                        size_t(dst - dstinit)};
339
              }
340
              *dst++ = static_cast<char>((triple >> 16) & 0xFF);
341
              *dst++ = static_cast<char>((triple >> 8) & 0xFF);
342
            } else if (!ignore_garbage && idx == 1 &&
343
                       (!is_partial(last_chunk_options) ||
344
                        (is_partial(last_chunk_options) &&
345
                         padding_characters > 0))) {
346
              return {BASE64_INPUT_REMAINDER, size_t(src - srcinit),
347
                      size_t(dst - dstinit)};
348
            } else if (!ignore_garbage && idx == 0 && padding_characters > 0) {
349
              return {INVALID_BASE64_CHARACTER, size_t(src - srcinit),
350
                      size_t(dst - dstinit), true};
351
            }
352
            return {SUCCESS, size_t(src - srcinit), size_t(dst - dstinit)};
353
          }
354
    }
355
    if (check_capacity && dstend - dst < 3) {
356
      return {OUTPUT_BUFFER_TOO_SMALL, size_t(srccur - srcinit),
357
              size_t(dst - dstinit)};
358
    }
359
    uint32_t triple =
360
        (uint32_t(buffer[0]) << 3 * 6) + (uint32_t(buffer[1]) << 2 * 6) +
361
        (uint32_t(buffer[2]) << 1 * 6) + (uint32_t(buffer[3]) << 0 * 6);
362
    *dst++ = static_cast<char>((triple >> 16) & 0xFF);
363
    *dst++ = static_cast<char>((triple >> 8) & 0xFF);
364
    *dst++ = static_cast<char>(triple & 0xFF);
365
  }
366
}
367
368
template <class char_type>
369
simdutf_constexpr23 full_result base64_tail_decode(
370
    char *dst, const char_type *src, size_t length,
371
    size_t padding_characters, // number of padding characters
372
                               // '=', typically 0, 1, 2.
373
    base64_options options, last_chunk_handling_options last_chunk_options) {
374
  return base64_tail_decode_impl<false>(dst, 0, src, length, padding_characters,
375
                                        options, last_chunk_options);
376
}
377
378
// like base64_tail_decode, but it will not write past the end of the output
379
// buffer. The outlen parameter is modified to reflect the number of bytes
380
// written. This functions assumes that the padding (=) has been removed.
381
//
382
template <class char_type>
383
simdutf_constexpr23 full_result base64_tail_decode_safe(
384
    char *dst, size_t outlen, const char_type *src, size_t length,
385
    size_t padding_characters, // number of padding characters
386
                               // '=', typically 0, 1, 2.
387
    base64_options options, last_chunk_handling_options last_chunk_options) {
388
  return base64_tail_decode_impl<true>(dst, outlen, src, length,
389
                                       padding_characters, options,
390
                                       last_chunk_options);
391
}
392
393
inline simdutf_constexpr23 full_result
394
patch_tail_result(full_result r, size_t previous_input, size_t previous_output,
395
                  size_t equallocation, size_t full_input_length,
396
0
                  last_chunk_handling_options last_chunk_options) {
397
0
  r.input_count += previous_input;
398
0
  r.output_count += previous_output;
399
0
  if (r.padding_error) {
400
0
    r.input_count = equallocation;
401
0
  }
402
0
403
0
  if (r.error == error_code::SUCCESS) {
404
0
    if (!is_partial(last_chunk_options)) {
405
0
      // A success when we are not in stop_before_partial mode.
406
0
      // means that we have consumed the whole input buffer.
407
0
      r.input_count = full_input_length;
408
0
    } else if (r.output_count % 3 != 0) {
409
0
      r.input_count = full_input_length;
410
0
    }
411
0
  }
412
0
  return r;
413
0
}
414
415
// Returns the number of bytes written. The destination buffer must be large
416
// enough. It will add padding (=) if needed.
417
template <bool use_lines = false>
418
simdutf_constexpr23 size_t tail_encode_base64_impl(
419
    char *dst, const char *src, size_t srclen, base64_options options,
420
0
    size_t line_length = simdutf::default_line_length, size_t line_offset = 0) {
421
0
  if simdutf_constexpr (use_lines) {
422
0
    // sanitize line_length and starting_line_offset.
423
0
    // line_length must be greater than 3.
424
0
    if (line_length < 4) {
425
0
      line_length = 4;
426
0
    }
427
0
    simdutf_log_assert(line_offset <= line_length,
428
0
                       "line_offset should be less than line_length");
429
0
  }
430
0
  // By default, we use padding if we are not using the URL variant.
431
0
  // This is check with ((options & base64_url) == 0) which returns true if we
432
0
  // are not using the URL variant. However, we also allow 'inversion' of the
433
0
  // convention with the base64_reverse_padding option. If the
434
0
  // base64_reverse_padding option is set, we use padding if we are using the
435
0
  // URL variant, and we omit it if we are not using the URL variant. This is
436
0
  // checked with
437
0
  // ((options & base64_reverse_padding) == base64_reverse_padding).
438
0
  bool use_padding =
439
0
      ((options & base64_url) == 0) ^
440
0
      ((options & base64_reverse_padding) == base64_reverse_padding);
441
0
  // This looks like 3 branches, but we expect the compiler to resolve this to
442
0
  // a single branch:
443
0
  const char *e0 = (options & base64_url) ? tables::base64::base64_url::e0
444
0
                                          : tables::base64::base64_default::e0;
445
0
  const char *e1 = (options & base64_url) ? tables::base64::base64_url::e1
446
0
                                          : tables::base64::base64_default::e1;
447
0
  const char *e2 = (options & base64_url) ? tables::base64::base64_url::e2
448
0
                                          : tables::base64::base64_default::e2;
449
0
  char *out = dst;
450
0
  size_t i = 0;
451
0
  uint8_t t1, t2, t3;
452
0
  for (; i + 2 < srclen; i += 3) {
453
0
    t1 = uint8_t(src[i]);
454
0
    t2 = uint8_t(src[i + 1]);
455
0
    t3 = uint8_t(src[i + 2]);
456
0
    if simdutf_constexpr (use_lines) {
457
0
      if (line_offset + 3 >= line_length) {
458
0
        if (line_offset == line_length) {
459
0
          *out++ = '\n';
460
0
          *out++ = e0[t1];
461
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
462
0
          *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
463
0
          *out++ = e2[t3];
464
0
          line_offset = 4;
465
0
        } else if (line_offset + 1 == line_length) {
466
0
          *out++ = e0[t1];
467
0
          *out++ = '\n';
468
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
469
0
          *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
470
0
          *out++ = e2[t3];
471
0
          line_offset = 3;
472
0
        } else if (line_offset + 2 == line_length) {
473
0
          *out++ = e0[t1];
474
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
475
0
          *out++ = '\n';
476
0
          *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
477
0
          *out++ = e2[t3];
478
0
          line_offset = 2;
479
0
        } else if (line_offset + 3 == line_length) {
480
0
          *out++ = e0[t1];
481
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
482
0
          *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
483
0
          *out++ = '\n';
484
0
          *out++ = e2[t3];
485
0
          line_offset = 1;
486
0
        }
487
0
      } else {
488
0
        *out++ = e0[t1];
489
0
        *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
490
0
        *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
491
0
        *out++ = e2[t3];
492
0
        line_offset += 4;
493
0
      }
494
0
    } else {
495
0
      *out++ = e0[t1];
496
0
      *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
497
0
      *out++ = e1[((t2 & 0x0F) << 2) | ((t3 >> 6) & 0x03)];
498
0
      *out++ = e2[t3];
499
0
    }
500
0
  }
501
0
  switch (srclen - i) {
502
0
  case 0:
503
0
    break;
504
0
  case 1:
505
0
    t1 = uint8_t(src[i]);
506
0
    if simdutf_constexpr (use_lines) {
507
0
      if (use_padding) {
508
0
        if (line_offset + 3 >= line_length) {
509
0
          if (line_offset == line_length) {
510
0
            *out++ = '\n';
511
0
            *out++ = e0[t1];
512
0
            *out++ = e1[(t1 & 0x03) << 4];
513
0
            *out++ = '=';
514
0
            *out++ = '=';
515
0
          } else if (line_offset + 1 == line_length) {
516
0
            *out++ = e0[t1];
517
0
            *out++ = '\n';
518
0
            *out++ = e1[(t1 & 0x03) << 4];
519
0
            *out++ = '=';
520
0
            *out++ = '=';
521
0
          } else if (line_offset + 2 == line_length) {
522
0
            *out++ = e0[t1];
523
0
            *out++ = e1[(t1 & 0x03) << 4];
524
0
            *out++ = '\n';
525
0
            *out++ = '=';
526
0
            *out++ = '=';
527
0
          } else if (line_offset + 3 == line_length) {
528
0
            *out++ = e0[t1];
529
0
            *out++ = e1[(t1 & 0x03) << 4];
530
0
            *out++ = '=';
531
0
            *out++ = '\n';
532
0
            *out++ = '=';
533
0
          }
534
0
        } else {
535
0
          *out++ = e0[t1];
536
0
          *out++ = e1[(t1 & 0x03) << 4];
537
0
          *out++ = '=';
538
0
          *out++ = '=';
539
0
        }
540
0
      } else {
541
0
        if (line_offset + 2 >= line_length) {
542
0
          if (line_offset == line_length) {
543
0
            *out++ = '\n';
544
0
            *out++ = e0[uint8_t(src[i])];
545
0
            *out++ = e1[(uint8_t(src[i]) & 0x03) << 4];
546
0
          } else if (line_offset + 1 == line_length) {
547
0
            *out++ = e0[uint8_t(src[i])];
548
0
            *out++ = '\n';
549
0
            *out++ = e1[(uint8_t(src[i]) & 0x03) << 4];
550
0
          } else {
551
0
            *out++ = e0[uint8_t(src[i])];
552
0
            *out++ = e1[(uint8_t(src[i]) & 0x03) << 4];
553
0
            // *out++ = '\n'; ==> no newline at the end of the output
554
0
          }
555
0
        } else {
556
0
          *out++ = e0[uint8_t(src[i])];
557
0
          *out++ = e1[(uint8_t(src[i]) & 0x03) << 4];
558
0
        }
559
0
      }
560
0
    } else {
561
0
      *out++ = e0[t1];
562
0
      *out++ = e1[(t1 & 0x03) << 4];
563
0
      if (use_padding) {
564
0
        *out++ = '=';
565
0
        *out++ = '=';
566
0
      }
567
0
    }
568
0
    break;
569
0
  default: /* case 2 */
570
0
    t1 = uint8_t(src[i]);
571
0
    t2 = uint8_t(src[i + 1]);
572
0
    if simdutf_constexpr (use_lines) {
573
0
      if (use_padding) {
574
0
        if (line_offset + 3 >= line_length) {
575
0
          if (line_offset == line_length) {
576
0
            *out++ = '\n';
577
0
            *out++ = e0[t1];
578
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
579
0
            *out++ = e2[(t2 & 0x0F) << 2];
580
0
            *out++ = '=';
581
0
          } else if (line_offset + 1 == line_length) {
582
0
            *out++ = e0[t1];
583
0
            *out++ = '\n';
584
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
585
0
            *out++ = e2[(t2 & 0x0F) << 2];
586
0
            *out++ = '=';
587
0
          } else if (line_offset + 2 == line_length) {
588
0
            *out++ = e0[t1];
589
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
590
0
            *out++ = '\n';
591
0
            *out++ = e2[(t2 & 0x0F) << 2];
592
0
            *out++ = '=';
593
0
          } else if (line_offset + 3 == line_length) {
594
0
            *out++ = e0[t1];
595
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
596
0
            *out++ = e2[(t2 & 0x0F) << 2];
597
0
            *out++ = '\n';
598
0
            *out++ = '=';
599
0
          }
600
0
        } else {
601
0
          *out++ = e0[t1];
602
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
603
0
          *out++ = e2[(t2 & 0x0F) << 2];
604
0
          *out++ = '=';
605
0
        }
606
0
      } else {
607
0
        if (line_offset + 3 >= line_length) {
608
0
          if (line_offset == line_length) {
609
0
            *out++ = '\n';
610
0
            *out++ = e0[t1];
611
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
612
0
            *out++ = e2[(t2 & 0x0F) << 2];
613
0
          } else if (line_offset + 1 == line_length) {
614
0
            *out++ = e0[t1];
615
0
            *out++ = '\n';
616
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
617
0
            *out++ = e2[(t2 & 0x0F) << 2];
618
0
          } else if (line_offset + 2 == line_length) {
619
0
            *out++ = e0[t1];
620
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
621
0
            *out++ = '\n';
622
0
            *out++ = e2[(t2 & 0x0F) << 2];
623
0
          } else {
624
0
            *out++ = e0[t1];
625
0
            *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
626
0
            *out++ = e2[(t2 & 0x0F) << 2];
627
0
            // *out++ = '\n'; ==> no newline at the end of the output
628
0
          }
629
0
        } else {
630
0
          *out++ = e0[t1];
631
0
          *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
632
0
          *out++ = e2[(t2 & 0x0F) << 2];
633
0
        }
634
0
      }
635
0
    } else {
636
0
      *out++ = e0[t1];
637
0
      *out++ = e1[((t1 & 0x03) << 4) | ((t2 >> 4) & 0x0F)];
638
0
      *out++ = e2[(t2 & 0x0F) << 2];
639
0
      if (use_padding) {
640
0
        *out++ = '=';
641
0
      }
642
0
    }
643
0
  }
644
0
  return (size_t)(out - dst);
645
0
}
646
647
// Returns the number of bytes written. The destination buffer must be large
648
// enough. It will add padding (=) if needed.
649
inline simdutf_constexpr23 size_t tail_encode_base64(char *dst, const char *src,
650
                                                     size_t srclen,
651
0
                                                     base64_options options) {
652
0
  return tail_encode_base64_impl(dst, src, srclen, options);
653
0
}
654
655
template <class InputPtr>
656
simdutf_warn_unused simdutf_constexpr23 size_t
657
maximal_binary_length_from_base64(InputPtr input, size_t length) noexcept {
658
  // We process the padding characters ('=') at the end to make sure
659
  // that we return an exact result when the input has no ignorable characters
660
  // (e.g., spaces).
661
  size_t padding = 0;
662
  if (length > 0) {
663
    if (input[length - 1] == '=') {
664
      padding++;
665
      if (length > 1 && input[length - 2] == '=') {
666
        padding++;
667
      }
668
    }
669
  }
670
  // The input is not otherwise processed for ignorable characters or
671
  // validation, so that the function runs in constant time (very fast). In
672
  // practice, base64 inputs without ignorable characters are common and the
673
  // common case are line separated inputs with relatively long lines (e.g., 76
674
  // characters) which leads this function to a slight (1%) overestimation of
675
  // the output size.
676
  //
677
  // Of course, some inputs might contain an arbitrary number of spaces or
678
  // newlines, which would make this function return a very pessimistic output
679
  // size but systems that produce base64 outputs typically do not do that and
680
  // if they do, they do not care much about minimizing memory usage.
681
  //
682
  // In specialized applications, users may know that their input is line
683
  // separated, which can be checked very quickly by by iterating (e.g., over 76
684
  // character chunks, looking for the linefeed characters only). We could
685
  // provide a specialized function for that, but it is not clear that the added
686
  // complexity is worth it for us.
687
  //
688
  size_t actual_length = length - padding;
689
  if (actual_length % 4 <= 1) {
690
    return actual_length / 4 * 3;
691
  }
692
  // if we have a valid input, then the remainder must be 2 or 3 adding one or
693
  // two extra bytes.
694
  return actual_length / 4 * 3 + (actual_length % 4) - 1;
695
}
696
697
template <typename char_type>
698
simdutf_warn_unused simdutf_constexpr23 full_result
699
base64_to_binary_details_impl(
700
    const char_type *input, size_t length, char *output, base64_options options,
701
    last_chunk_handling_options last_chunk_options) noexcept {
702
  const bool ignore_garbage =
703
      (options == base64_options::base64_url_accept_garbage) ||
704
      (options == base64_options::base64_default_accept_garbage) ||
705
      (options == base64_options::base64_default_or_url_accept_garbage);
706
  auto ri = simdutf::scalar::base64::find_end(input, length, options);
707
  size_t equallocation = ri.equallocation;
708
  size_t equalsigns = ri.equalsigns;
709
  length = ri.srclen;
710
  size_t full_input_length = ri.full_input_length;
711
  if (length == 0) {
712
    if (!ignore_garbage && equalsigns > 0) {
713
      return {INVALID_BASE64_CHARACTER, equallocation, 0};
714
    }
715
    return {SUCCESS, full_input_length, 0};
716
  }
717
  full_result r = scalar::base64::base64_tail_decode(
718
      output, input, length, equalsigns, options, last_chunk_options);
719
  r = scalar::base64::patch_tail_result(r, 0, 0, equallocation,
720
                                        full_input_length, last_chunk_options);
721
  if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
722
      equalsigns > 0 && !ignore_garbage) {
723
    // additional checks
724
    if ((r.output_count % 3 == 0) ||
725
        ((r.output_count % 3) + 1 + equalsigns != 4)) {
726
      return {INVALID_BASE64_CHARACTER, equallocation, r.output_count};
727
    }
728
  }
729
  // When is_partial(last_chunk_options) is true, we must either end with
730
  // the end of the stream (beyond whitespace) or right after a non-ignorable
731
  // character or at the very beginning of the stream.
732
  // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
733
  if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
734
      r.input_count < full_input_length) {
735
    // First check if we can extend the input to the end of the stream
736
    while (r.input_count < full_input_length &&
737
           base64_ignorable(*(input + r.input_count), options)) {
738
      r.input_count++;
739
    }
740
    // If we are still not at the end of the stream, then we must backtrack
741
    // to the last non-ignorable character.
742
    if (r.input_count < full_input_length) {
743
      while (r.input_count > 0 &&
744
             base64_ignorable(*(input + r.input_count - 1), options)) {
745
        r.input_count--;
746
      }
747
    }
748
  }
749
  return r;
750
}
751
752
template <typename char_type>
753
simdutf_constexpr23 simdutf_warn_unused full_result
754
base64_to_binary_details_safe_impl(
755
    const char_type *input, size_t length, char *output, size_t outlen,
756
    base64_options options,
757
    last_chunk_handling_options last_chunk_options) noexcept {
758
  const bool ignore_garbage =
759
      (options == base64_options::base64_url_accept_garbage) ||
760
      (options == base64_options::base64_default_accept_garbage) ||
761
      (options == base64_options::base64_default_or_url_accept_garbage);
762
  auto ri = simdutf::scalar::base64::find_end(input, length, options);
763
  size_t equallocation = ri.equallocation;
764
  size_t equalsigns = ri.equalsigns;
765
  length = ri.srclen;
766
  size_t full_input_length = ri.full_input_length;
767
  if (length == 0) {
768
    if (!ignore_garbage && equalsigns > 0) {
769
      return {INVALID_BASE64_CHARACTER, equallocation, 0};
770
    }
771
    return {SUCCESS, full_input_length, 0};
772
  }
773
  full_result r = scalar::base64::base64_tail_decode_safe(
774
      output, outlen, input, length, equalsigns, options, last_chunk_options);
775
  r = scalar::base64::patch_tail_result(r, 0, 0, equallocation,
776
                                        full_input_length, last_chunk_options);
777
  if (!is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
778
      equalsigns > 0 && !ignore_garbage) {
779
    // additional checks
780
    if ((r.output_count % 3 == 0) ||
781
        ((r.output_count % 3) + 1 + equalsigns != 4)) {
782
      return {INVALID_BASE64_CHARACTER, equallocation, r.output_count};
783
    }
784
  }
785
786
  // When is_partial(last_chunk_options) is true, we must either end with
787
  // the end of the stream (beyond whitespace) or right after a non-ignorable
788
  // character or at the very beginning of the stream.
789
  // See https://tc39.es/proposal-arraybuffer-base64/spec/#sec-frombase64
790
  if (is_partial(last_chunk_options) && r.error == error_code::SUCCESS &&
791
      r.input_count < full_input_length) {
792
    // First check if we can extend the input to the end of the stream
793
    while (r.input_count < full_input_length &&
794
           base64_ignorable(*(input + r.input_count), options)) {
795
      r.input_count++;
796
    }
797
    // If we are still not at the end of the stream, then we must backtrack
798
    // to the last non-ignorable character.
799
    if (r.input_count < full_input_length) {
800
      while (r.input_count > 0 &&
801
             base64_ignorable(*(input + r.input_count - 1), options)) {
802
        r.input_count--;
803
      }
804
    }
805
  }
806
  return r;
807
}
808
809
simdutf_warn_unused simdutf_constexpr23 size_t
810
3.47k
base64_length_from_binary(size_t length, base64_options options) noexcept {
811
  // By default, we use padding if we are not using the URL variant.
812
  // This is check with ((options & base64_url) == 0) which returns true if we
813
  // are not using the URL variant. However, we also allow 'inversion' of the
814
  // convention with the base64_reverse_padding option. If the
815
  // base64_reverse_padding option is set, we use padding if we are using the
816
  // URL variant, and we omit it if we are not using the URL variant. This is
817
  // checked with
818
  // ((options & base64_reverse_padding) == base64_reverse_padding).
819
3.47k
  bool use_padding =
820
3.47k
      ((options & base64_url) == 0) ^
821
3.47k
      ((options & base64_reverse_padding) == base64_reverse_padding);
822
3.47k
  if (!use_padding) {
823
1.39k
    return length / 3 * 4 + ((length % 3) ? (length % 3) + 1 : 0);
824
1.39k
  }
825
2.07k
  return (length + 2) / 3 *
826
2.07k
         4; // We use padding to make the length a multiple of 4.
827
3.47k
}
828
829
simdutf_warn_unused simdutf_constexpr23 size_t
830
base64_length_from_binary_with_lines(size_t length, base64_options options,
831
3.53k
                                     size_t line_length) noexcept {
832
3.53k
  if (length == 0) {
833
63
    return 0;
834
63
  }
835
3.47k
  size_t base64_length =
836
3.47k
      scalar::base64::base64_length_from_binary(length, options);
837
3.47k
  if (line_length < 4) {
838
0
    line_length = 4;
839
0
  }
840
3.47k
  size_t lines =
841
3.47k
      (base64_length + line_length - 1) / line_length; // number of lines
842
3.47k
  return base64_length + lines - 1;
843
3.53k
}
844
845
// Return the length of the prefix that contains count base64 characters.
846
// Thus, if count is 3, the function returns the length of the prefix
847
// that contains 3 base64 characters.
848
// The function returns (size_t)-1 if there is not enough base64 characters in
849
// the input.
850
template <typename char_type>
851
simdutf_warn_unused size_t prefix_length(size_t count,
852
                                         simdutf::base64_options options,
853
                                         const char_type *input,
854
                                         size_t length) noexcept {
855
  size_t i = 0;
856
  while (i < length && is_ignorable(input[i], options)) {
857
    i++;
858
  }
859
  if (count == 0) {
860
    return i; // duh!
861
  }
862
  for (; i < length; i++) {
863
    if (is_ignorable(input[i], options)) {
864
      continue;
865
    }
866
    // We have a base64 character or a padding character.
867
    count--;
868
    if (count == 0) {
869
      return i + 1;
870
    }
871
  }
872
  simdutf_log_assert(false, "You never get here");
873
874
  return -1; // should never happen
875
}
876
877
} // namespace base64
878
} // unnamed namespace
879
} // namespace scalar
880
} // namespace simdutf
881
882
#endif