Coverage Report

Created: 2026-01-21 08:52

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/node/deps/nbytes/include/nbytes.h
Line
Count
Source
1
#ifndef NBYTES_H
2
#define NBYTES_H
3
#include <algorithm>
4
#include <cmath>
5
#include <cstddef>
6
#include <cstdint>
7
#include <cstdlib>
8
#include <cstring>
9
#include <string>
10
11
namespace nbytes {
12
13
#if NBYTES_DEVELOPMENT_CHECKS
14
#define NBYTES_STR(x) #x
15
#define NBYTES_REQUIRE(EXPR) \
16
  {                          \
17
    if (!(EXPR) { abort(); }) }
18
19
#define NBYTES_FAIL(MESSAGE)                         \
20
  do {                                               \
21
    std::cerr << "FAIL: " << (MESSAGE) << std::endl; \
22
    abort();                                         \
23
  } while (0);
24
#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)                                 \
25
  do {                                                                         \
26
    if (LHS != RHS) {                                                          \
27
      std::cerr << "Mismatch: '" << LHS << "' - '" << RHS << "'" << std::endl; \
28
      NBYTES_FAIL(MESSAGE);                                                    \
29
    }                                                                          \
30
  } while (0);
31
#define NBYTES_ASSERT_TRUE(COND)                                            \
32
  do {                                                                      \
33
    if (!(COND)) {                                                          \
34
      std::cerr << "Assert at line " << __LINE__ << " of file " << __FILE__ \
35
                << std::endl;                                               \
36
      NBYTES_FAIL(NBYTES_STR(COND));                                        \
37
    }                                                                       \
38
  } while (0);
39
#else
40
#define NBYTES_FAIL(MESSAGE)
41
#define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)
42
#define NBYTES_ASSERT_TRUE(COND)
43
#endif
44
45
0
[[noreturn]] inline void unreachable() {
46
0
#ifdef __GNUC__
47
0
  __builtin_unreachable();
48
#elif defined(_MSC_VER)
49
  __assume(false);
50
#else
51
#endif
52
0
}
53
54
// The nbytes (short for "node bytes") is a set of utility helpers for
55
// working with bytes that are extracted from Node.js' internals. The
56
// motivation for extracting these into a separate library is to make it
57
// easier for other projects to implement functionality that is compatible
58
// with Node.js' implementation of various byte manipulation functions.
59
60
// Round up a to the next highest multiple of b.
61
template <typename T>
62
0
constexpr T RoundUp(T a, T b) {
63
0
  return a % b != 0 ? a + b - (a % b) : a;
64
0
}
65
66
// Align ptr to an `alignment`-bytes boundary.
67
template <typename T, typename U>
68
0
constexpr T *AlignUp(T *ptr, U alignment) {
69
0
  return reinterpret_cast<T *>(
70
0
      RoundUp(reinterpret_cast<uintptr_t>(ptr), alignment));
71
0
}
Unexecuted instantiation: char* nbytes::AlignUp<char, unsigned long>(char*, unsigned long)
Unexecuted instantiation: unsigned short* nbytes::AlignUp<unsigned short, unsigned long>(unsigned short*, unsigned long)
72
73
template <typename T, typename U>
74
0
inline T AlignDown(T value, U alignment) {
75
0
  return reinterpret_cast<T>(
76
0
      (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));
77
0
}
78
79
template <typename T>
80
inline T MultiplyWithOverflowCheck(T a, T b) {
81
  auto ret = a * b;
82
  if (a != 0) {
83
    NBYTES_ASSERT_TRUE(b == ret / a);
84
  }
85
86
  return ret;
87
}
88
89
void ForceAsciiSlow(const char *src, char *dst, size_t len);
90
void ForceAscii(const char *src, char *dst, size_t len);
91
92
// ============================================================================
93
// Byte Swapping
94
95
// Swaps bytes in place. nbytes is the number of bytes to swap and must be a
96
// multiple of the word size (checked by function).
97
bool SwapBytes16(char *data, size_t nbytes);
98
bool SwapBytes32(char *data, size_t nbytes);
99
bool SwapBytes64(char *data, size_t nbytes);
100
101
// ============================================================================
102
// Base64 (legacy)
103
104
#ifdef _MSC_VER
105
#pragma warning(push)
106
// MSVC C4003: not enough actual parameters for macro 'identifier'
107
#pragma warning(disable : 4003)
108
#endif
109
110
extern const int8_t unbase64_table[256];
111
112
template <typename TypeName>
113
bool Base64DecodeGroupSlow(char *const dst, const size_t dstlen,
114
                           const TypeName *const src, const size_t srclen,
115
0
                           size_t *const i, size_t *const k) {
116
0
  uint8_t hi;
117
0
  uint8_t lo;
118
0
#define V(expr)                                                        \
119
0
  for (;;) {                                                           \
120
0
    const uint8_t c = static_cast<uint8_t>(src[*i]);                   \
121
0
    lo = unbase64_table[c];                                            \
122
0
    *i += 1;                                                           \
123
0
    if (lo < 64) break;                         /* Legal character. */ \
124
0
    if (c == '=' || *i >= srclen) return false; /* Stop decoding. */   \
125
0
  }                                                                    \
126
0
  expr;                                                                \
127
0
  if (*i >= srclen) return false;                                      \
128
0
  if (*k >= dstlen) return false;                                      \
129
0
  hi = lo;
130
0
  V(/* Nothing. */);
131
0
  V(dst[(*k)++] = ((hi & 0x3F) << 2) | ((lo & 0x30) >> 4));
132
0
  V(dst[(*k)++] = ((hi & 0x0F) << 4) | ((lo & 0x3C) >> 2));
133
0
  V(dst[(*k)++] = ((hi & 0x03) << 6) | ((lo & 0x3F) >> 0));
134
0
#undef V
135
0
  return true;  // Continue decoding.
136
0
}
Unexecuted instantiation: bool nbytes::Base64DecodeGroupSlow<char>(char*, unsigned long, char const*, unsigned long, unsigned long*, unsigned long*)
Unexecuted instantiation: bool nbytes::Base64DecodeGroupSlow<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long, unsigned long*, unsigned long*)
137
138
enum class Base64Mode { NORMAL, URL };
139
140
inline constexpr size_t Base64EncodedSize(
141
0
    size_t size, Base64Mode mode = Base64Mode::NORMAL) {
142
0
  return mode == Base64Mode::NORMAL ? ((size + 2) / 3 * 4)
143
0
                                    : static_cast<size_t>(std::ceil(
144
0
                                          static_cast<double>(size * 4) / 3));
145
0
}
146
147
// Doesn't check for padding at the end.  Can be 1-2 bytes over.
148
0
inline constexpr size_t Base64DecodedSizeFast(size_t size) {
149
  // 1-byte input cannot be decoded
150
0
  return size > 1 ? (size / 4) * 3 + (size % 4 + 1) / 2 : 0;
151
0
}
152
153
0
inline uint32_t ReadUint32BE(const unsigned char *p) {
154
0
  return static_cast<uint32_t>(p[0] << 24U) |
155
0
         static_cast<uint32_t>(p[1] << 16U) |
156
0
         static_cast<uint32_t>(p[2] << 8U) | static_cast<uint32_t>(p[3]);
157
0
}
158
159
template <typename TypeName>
160
0
size_t Base64DecodedSize(const TypeName *src, size_t size) {
161
  // 1-byte input cannot be decoded
162
0
  if (size < 2) return 0;
163
164
0
  if (src[size - 1] == '=') {
165
0
    size--;
166
0
    if (src[size - 1] == '=') size--;
167
0
  }
168
0
  return Base64DecodedSizeFast(size);
169
0
}
Unexecuted instantiation: unsigned long nbytes::Base64DecodedSize<char>(char const*, unsigned long)
Unexecuted instantiation: unsigned long nbytes::Base64DecodedSize<unsigned short>(unsigned short const*, unsigned long)
170
171
template <typename TypeName>
172
size_t Base64DecodeFast(char *const dst, const size_t dstlen,
173
                        const TypeName *const src, const size_t srclen,
174
0
                        const size_t decoded_size) {
175
0
  const size_t available = dstlen < decoded_size ? dstlen : decoded_size;
176
0
  const size_t max_k = available / 3 * 3;
177
0
  size_t max_i = srclen / 4 * 4;
178
0
  size_t i = 0;
179
0
  size_t k = 0;
180
0
  while (i < max_i && k < max_k) {
181
0
    const unsigned char txt[] = {
182
0
        static_cast<unsigned char>(
183
0
            unbase64_table[static_cast<uint8_t>(src[i + 0])]),
184
0
        static_cast<unsigned char>(
185
0
            unbase64_table[static_cast<uint8_t>(src[i + 1])]),
186
0
        static_cast<unsigned char>(
187
0
            unbase64_table[static_cast<uint8_t>(src[i + 2])]),
188
0
        static_cast<unsigned char>(
189
0
            unbase64_table[static_cast<uint8_t>(src[i + 3])]),
190
0
    };
191
192
0
    const uint32_t v = ReadUint32BE(txt);
193
    // If MSB is set, input contains whitespace or is not valid base64.
194
0
    if (v & 0x80808080) {
195
0
      if (!Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k)) return k;
196
0
      max_i = i + (srclen - i) / 4 * 4;  // Align max_i again.
197
0
    } else {
198
0
      dst[k + 0] = ((v >> 22) & 0xFC) | ((v >> 20) & 0x03);
199
0
      dst[k + 1] = ((v >> 12) & 0xF0) | ((v >> 10) & 0x0F);
200
0
      dst[k + 2] = ((v >> 2) & 0xC0) | ((v >> 0) & 0x3F);
201
0
      i += 4;
202
0
      k += 3;
203
0
    }
204
0
  }
205
0
  if (i < srclen && k < dstlen) {
206
0
    Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k);
207
0
  }
208
0
  return k;
209
0
}
Unexecuted instantiation: unsigned long nbytes::Base64DecodeFast<char>(char*, unsigned long, char const*, unsigned long, unsigned long)
Unexecuted instantiation: unsigned long nbytes::Base64DecodeFast<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long, unsigned long)
210
211
template <typename TypeName>
212
size_t Base64Decode(char *const dst, const size_t dstlen,
213
0
                    const TypeName *const src, const size_t srclen) {
214
0
  const size_t decoded_size = Base64DecodedSize(src, srclen);
215
0
  return Base64DecodeFast(dst, dstlen, src, srclen, decoded_size);
216
0
}
Unexecuted instantiation: unsigned long nbytes::Base64Decode<char>(char*, unsigned long, char const*, unsigned long)
Unexecuted instantiation: unsigned long nbytes::Base64Decode<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long)
217
218
#ifdef _MSC_VER
219
#pragma warning(pop)
220
#endif
221
222
// ============================================================================
223
// Hex (legacy)
224
225
extern const int8_t unhex_table[256];
226
227
template <typename TypeName>
228
static size_t HexDecode(char *buf, size_t len, const TypeName *src,
229
0
                        const size_t srcLen) {
230
0
  size_t i;
231
0
  for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
232
0
    unsigned a = unhex_table[static_cast<uint8_t>(src[i * 2 + 0])];
233
0
    unsigned b = unhex_table[static_cast<uint8_t>(src[i * 2 + 1])];
234
0
    if (!~a || !~b) return i;
235
0
    buf[i] = (a << 4) | b;
236
0
  }
237
238
0
  return i;
239
0
}
Unexecuted instantiation: string_bytes.cc:unsigned long nbytes::HexDecode<char>(char*, unsigned long, char const*, unsigned long)
Unexecuted instantiation: string_bytes.cc:unsigned long nbytes::HexDecode<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long)
240
241
size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen);
242
243
std::string HexEncode(const char *src, size_t slen);
244
245
// ============================================================================
246
// StringSearch
247
248
namespace stringsearch {
249
250
template <typename T>
251
class Vector {
252
 public:
253
  Vector(T *data, size_t length, bool isForward)
254
0
      : start_(data), length_(length), is_forward_(isForward) {
255
0
    CHECK(length > 0 && data != nullptr);
256
0
  }
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::Vector(unsigned short const*, unsigned long, bool)
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::Vector(unsigned char const*, unsigned long, bool)
257
258
  // Returns the start of the memory range.
259
  // For vector v this is NOT necessarily &v[0], see forward().
260
0
  const T *start() const { return start_; }
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::start() const
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::start() const
261
262
  // Returns the length of the vector, in characters.
263
0
  size_t length() const { return length_; }
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::length() const
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::length() const
264
265
  // Returns true if the Vector is front-to-back, false if back-to-front.
266
  // In the latter case, v[0] corresponds to the *end* of the memory range.
267
0
  bool forward() const { return is_forward_; }
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::forward() const
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::forward() const
268
269
  // Access individual vector elements - checks bounds in debug mode.
270
0
  T &operator[](size_t index) const {
271
0
    NBYTES_ASSERT_TRUE(index < length_);
272
0
    return start_[is_forward_ ? index : (length_ - index - 1)];
273
0
  }
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::operator[](unsigned long) const
Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::operator[](unsigned long) const
274
275
 private:
276
  T *start_;
277
  size_t length_;
278
  bool is_forward_;
279
};
280
281
//---------------------------------------------------------------------
282
// String Search object.
283
//---------------------------------------------------------------------
284
285
// Class holding constants and methods that apply to all string search variants,
286
// independently of subject and pattern char size.
287
class StringSearchBase {
288
 protected:
289
  // Cap on the maximal shift in the Boyer-Moore implementation. By setting a
290
  // limit, we can fix the size of tables. For a needle longer than this limit,
291
  // search will not be optimal, since we only build tables for a suffix
292
  // of the string, but it is a safe approximation.
293
  static const int kBMMaxShift = 250;
294
295
  // Reduce alphabet to this size.
296
  // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size
297
  // proportional to the input alphabet. We reduce the alphabet size by
298
  // equating input characters modulo a smaller alphabet size. This gives
299
  // a potentially less efficient searching, but is a safe approximation.
300
  // For needles using only characters in the same Unicode 256-code point page,
301
  // there is no search speed degradation.
302
  static const int kLatin1AlphabetSize = 256;
303
  static const int kUC16AlphabetSize = 256;
304
305
  // Bad-char shift table stored in the state. It's length is the alphabet size.
306
  // For patterns below this length, the skip length of Boyer-Moore is too short
307
  // to compensate for the algorithmic overhead compared to simple brute force.
308
  static const int kBMMinPatternLength = 8;
309
310
  // Store for the BoyerMoore(Horspool) bad char shift table.
311
  int bad_char_shift_table_[kUC16AlphabetSize];
312
  // Store for the BoyerMoore good suffix shift table.
313
  int good_suffix_shift_table_[kBMMaxShift + 1];
314
  // Table used temporarily while building the BoyerMoore good suffix
315
  // shift table.
316
  int suffix_table_[kBMMaxShift + 1];
317
};
318
319
template <typename Char>
320
class StringSearch : private StringSearchBase {
321
 public:
322
  typedef stringsearch::Vector<const Char> Vector;
323
324
0
  explicit StringSearch(Vector pattern) : pattern_(pattern), start_(0) {
325
0
    if (pattern.length() >= kBMMaxShift) {
326
0
      start_ = pattern.length() - kBMMaxShift;
327
0
    }
328
329
0
    size_t pattern_length = pattern_.length();
330
0
    NBYTES_ASSERT_TRUE(pattern_length > 0);
331
0
    if (pattern_length < kBMMinPatternLength) {
332
0
      if (pattern_length == 1) {
333
0
        strategy_ = SearchStrategy::kSingleChar;
334
0
        return;
335
0
      }
336
0
      strategy_ = SearchStrategy::kLinear;
337
0
      return;
338
0
    }
339
0
    strategy_ = SearchStrategy::kInitial;
340
0
  }
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::StringSearch(nbytes::stringsearch::Vector<unsigned short const>)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::StringSearch(nbytes::stringsearch::Vector<unsigned char const>)
341
342
0
  size_t Search(Vector subject, size_t index) {
343
0
    switch (strategy_) {
344
0
      case kBoyerMooreHorspool:
345
0
        return BoyerMooreHorspoolSearch(subject, index);
346
0
      case kBoyerMoore:
347
0
        return BoyerMooreSearch(subject, index);
348
0
      case kInitial:
349
0
        return InitialSearch(subject, index);
350
0
      case kLinear:
351
0
        return LinearSearch(subject, index);
352
0
      case kSingleChar:
353
0
        return SingleCharSearch(subject, index);
354
0
    }
355
0
    unreachable();
356
0
  }
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::Search(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::Search(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
357
358
0
  static inline int AlphabetSize() {
359
0
    if (sizeof(Char) == 1) {
360
      // Latin1 needle.
361
0
      return kLatin1AlphabetSize;
362
0
    } else {
363
      // UC16 needle.
364
0
      return kUC16AlphabetSize;
365
0
    }
366
367
0
    static_assert(
368
0
        sizeof(Char) == sizeof(uint8_t) || sizeof(Char) == sizeof(uint16_t),
369
0
        "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");
370
0
  }
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::AlphabetSize()
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::AlphabetSize()
371
372
 private:
373
  typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);
374
  size_t SingleCharSearch(Vector subject, size_t start_index);
375
  size_t LinearSearch(Vector subject, size_t start_index);
376
  size_t InitialSearch(Vector subject, size_t start_index);
377
  size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);
378
  size_t BoyerMooreSearch(Vector subject, size_t start_index);
379
380
  void PopulateBoyerMooreHorspoolTable();
381
382
  void PopulateBoyerMooreTable();
383
384
0
  static inline int CharOccurrence(int *bad_char_occurrence, Char char_code) {
385
0
    if (sizeof(Char) == 1) {
386
0
      return bad_char_occurrence[static_cast<int>(char_code)];
387
0
    }
388
    // Both pattern and subject are UC16. Reduce character to equivalence class.
389
0
    int equiv_class = char_code % kUC16AlphabetSize;
390
0
    return bad_char_occurrence[equiv_class];
391
0
  }
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::CharOccurrence(int*, unsigned short)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::CharOccurrence(int*, unsigned char)
392
393
  enum SearchStrategy {
394
    kBoyerMooreHorspool,
395
    kBoyerMoore,
396
    kInitial,
397
    kLinear,
398
    kSingleChar,
399
  };
400
401
  // The pattern to search for.
402
  Vector pattern_;
403
  SearchStrategy strategy_;
404
  // Cache value of Max(0, pattern_length() - kBMMaxShift)
405
  size_t start_;
406
};
407
408
0
inline uint8_t GetHighestValueByte(uint16_t character) {
409
0
  return std::max(static_cast<uint8_t>(character & 0xFF),
410
0
                  static_cast<uint8_t>(character >> 8));
411
0
}
412
413
0
inline uint8_t GetHighestValueByte(uint8_t character) { return character; }
414
415
// Searches for a byte value in a memory buffer, back to front.
416
// Uses memrchr(3) on systems which support it, for speed.
417
// Falls back to a vanilla for loop on non-GNU systems such as Windows.
418
inline const void *MemrchrFill(const void *haystack, uint8_t needle,
419
0
                               size_t haystack_len) {
420
0
#ifdef _GNU_SOURCE
421
0
  return memrchr(haystack, needle, haystack_len);
422
#else
423
  const uint8_t *haystack8 = static_cast<const uint8_t *>(haystack);
424
  for (size_t i = haystack_len - 1; i != static_cast<size_t>(-1); i--) {
425
    if (haystack8[i] == needle) {
426
      return haystack8 + i;
427
    }
428
  }
429
  return nullptr;
430
#endif
431
0
}
432
433
// Finds the first occurrence of *two-byte* character pattern[0] in the string
434
// `subject`. Does not check that the whole pattern matches.
435
template <typename Char>
436
inline size_t FindFirstCharacter(Vector<const Char> pattern,
437
0
                                 Vector<const Char> subject, size_t index) {
438
0
  const Char pattern_first_char = pattern[0];
439
0
  const size_t max_n = (subject.length() - pattern.length() + 1);
440
441
  // For speed, search for the more `rare` of the two bytes in pattern[0]
442
  // using memchr / memrchr (which are much faster than a simple for loop).
443
0
  const uint8_t search_byte = GetHighestValueByte(pattern_first_char);
444
0
  size_t pos = index;
445
0
  do {
446
0
    const size_t bytes_to_search = (max_n - pos) * sizeof(Char);
447
0
    const void *void_pos;
448
0
    if (subject.forward()) {
449
      // Assert that bytes_to_search won't overflow
450
0
      NBYTES_ASSERT_TRUE(pos <= max_n);
451
0
      NBYTES_ASSERT_TRUE(max_n - pos <= SIZE_MAX / sizeof(Char));
452
0
      void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);
453
0
    } else {
454
0
      NBYTES_ASSERT_TRUE(pos <= subject.length());
455
0
      NBYTES_ASSERT_TRUE(subject.length() - pos <= SIZE_MAX / sizeof(Char));
456
0
      void_pos = MemrchrFill(subject.start() + pattern.length() - 1,
457
0
                             search_byte, bytes_to_search);
458
0
    }
459
0
    const Char *char_pos = static_cast<const Char *>(void_pos);
460
0
    if (char_pos == nullptr) return subject.length();
461
462
    // Then, for each match, verify that the full two bytes match pattern[0].
463
0
    char_pos = AlignDown(char_pos, sizeof(Char));
464
0
    size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
465
0
    pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);
466
0
    if (subject[pos] == pattern_first_char) {
467
      // Match found, hooray.
468
0
      return pos;
469
0
    }
470
    // Search byte matched, but the other byte of pattern[0] didn't. Keep going.
471
0
  } while (++pos < max_n);
472
473
0
  return subject.length();
474
0
}
475
476
// Finds the first occurrence of the byte pattern[0] in string `subject`.
477
// Does not verify that the whole pattern matches.
478
template <>
479
inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,
480
0
                                 Vector<const uint8_t> subject, size_t index) {
481
0
  const uint8_t pattern_first_char = pattern[0];
482
0
  const size_t subj_len = subject.length();
483
0
  const size_t max_n = (subject.length() - pattern.length() + 1);
484
485
0
  const void *pos;
486
0
  if (subject.forward()) {
487
0
    pos = memchr(subject.start() + index, pattern_first_char, max_n - index);
488
0
  } else {
489
0
    pos = MemrchrFill(subject.start() + pattern.length() - 1,
490
0
                      pattern_first_char, max_n - index);
491
0
  }
492
0
  const uint8_t *char_pos = static_cast<const uint8_t *>(pos);
493
0
  if (char_pos == nullptr) {
494
0
    return subj_len;
495
0
  }
496
497
0
  size_t raw_pos = static_cast<size_t>(char_pos - subject.start());
498
0
  return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);
499
0
}
500
501
//---------------------------------------------------------------------
502
// Single Character Pattern Search Strategy
503
//---------------------------------------------------------------------
504
505
template <typename Char>
506
0
size_t StringSearch<Char>::SingleCharSearch(Vector subject, size_t index) {
507
0
  NBYTES_ASSERT_TRUE(1 == pattern_.length());
508
0
  return FindFirstCharacter(pattern_, subject, index);
509
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::SingleCharSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::SingleCharSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
510
511
//---------------------------------------------------------------------
512
// Linear Search Strategy
513
//---------------------------------------------------------------------
514
515
// Simple linear search for short patterns. Never bails out.
516
template <typename Char>
517
0
size_t StringSearch<Char>::LinearSearch(Vector subject, size_t index) {
518
0
  NBYTES_ASSERT_TRUE(pattern_.length() > 1);
519
0
  const size_t n = subject.length() - pattern_.length();
520
0
  for (size_t i = index; i <= n; i++) {
521
0
    i = FindFirstCharacter(pattern_, subject, i);
522
0
    if (i == subject.length()) return subject.length();
523
0
    NBYTES_ASSERT_TRUE(i <= n);
524
525
0
    bool matches = true;
526
0
    for (size_t j = 1; j < pattern_.length(); j++) {
527
0
      if (pattern_[j] != subject[i + j]) {
528
0
        matches = false;
529
0
        break;
530
0
      }
531
0
    }
532
0
    if (matches) {
533
0
      return i;
534
0
    }
535
0
  }
536
0
  return subject.length();
537
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::LinearSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::LinearSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
538
539
//---------------------------------------------------------------------
540
// Boyer-Moore string search
541
//---------------------------------------------------------------------
542
543
template <typename Char>
544
size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,
545
0
                                            size_t start_index) {
546
0
  const size_t subject_length = subject.length();
547
0
  const size_t pattern_length = pattern_.length();
548
  // Only preprocess at most kBMMaxShift last characters of pattern.
549
0
  size_t start = start_;
550
551
0
  int *bad_char_occurrence = bad_char_shift_table_;
552
553
0
  auto good_suffix_get = [&](size_t idx) -> int {
554
0
    if (idx < start || idx - start > kBMMaxShift) return 0;
555
0
    return good_suffix_shift_table_[idx - start];
556
0
  };
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)::{lambda(unsigned long)#1}::operator()(unsigned long) const
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)::{lambda(unsigned long)#1}::operator()(unsigned long) const
557
558
0
  Char last_char = pattern_[pattern_length - 1];
559
0
  size_t index = start_index;
560
  // Continue search from i.
561
0
  while (index <= subject_length - pattern_length) {
562
0
    size_t j = pattern_length - 1;
563
0
    int c;
564
0
    while (last_char != (c = subject[index + j])) {
565
0
      int shift = j - CharOccurrence(bad_char_occurrence, c);
566
0
      index += shift;
567
0
      if (index > subject_length - pattern_length) {
568
0
        return subject.length();
569
0
      }
570
0
    }
571
0
    while (pattern_[j] == (c = subject[index + j])) {
572
0
      if (j == 0) {
573
0
        return index;
574
0
      }
575
0
      j--;
576
0
    }
577
0
    if (j < start) {
578
      // we have matched more than our tables allow us to be smart about.
579
      // Fall back on BMH shift.
580
0
      index +=
581
0
          pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char);
582
0
    } else {
583
0
      int gs_shift = good_suffix_get(j + 1);
584
0
      int bc_occ = CharOccurrence(bad_char_occurrence, c);
585
0
      int shift = j - bc_occ;
586
0
      if (gs_shift > shift) {
587
0
        shift = gs_shift;
588
0
      }
589
0
      index += shift;
590
0
    }
591
0
  }
592
593
0
  return subject.length();
594
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
595
596
template <typename Char>
597
0
void StringSearch<Char>::PopulateBoyerMooreTable() {
598
0
  const size_t pattern_length = pattern_.length();
599
0
  const size_t start = start_;
600
0
  const size_t length = pattern_length - start;
601
602
0
  auto shift_get = [&](size_t idx) -> int & {
603
0
    if (idx < start) abort();
604
0
    return good_suffix_shift_table_[idx - start];
605
0
  };
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreTable()::{lambda(unsigned long)#1}::operator()(unsigned long) const
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreTable()::{lambda(unsigned long)#1}::operator()(unsigned long) const
606
607
0
  auto suffix_get = [&](size_t idx) -> int & {
608
0
    if (idx < start) abort();
609
0
    return suffix_table_[idx - start];
610
0
  };
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreTable()::{lambda(unsigned long)#2}::operator()(unsigned long) const
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreTable()::{lambda(unsigned long)#2}::operator()(unsigned long) const
611
612
  // Initialize table.
613
0
  for (size_t i = start; i < pattern_length; i++) {
614
0
    shift_get(i) = length;
615
0
  }
616
0
  shift_get(pattern_length) = 1;
617
0
  suffix_get(pattern_length) = pattern_length + 1;
618
619
0
  if (pattern_length <= start) {
620
0
    return;
621
0
  }
622
623
  // Find suffixes.
624
0
  Char last_char = pattern_[pattern_length - 1];
625
0
  size_t suffix = pattern_length + 1;
626
0
  {
627
0
    size_t i = pattern_length;
628
0
    while (i > start) {
629
0
      Char c = pattern_[i - 1];
630
0
      while (suffix <= pattern_length && c != pattern_[suffix - 1]) {
631
0
        if (static_cast<size_t>(shift_get(suffix)) == length) {
632
0
          shift_get(suffix) = suffix - i;
633
0
        }
634
0
        suffix = suffix_get(suffix);
635
0
      }
636
0
      suffix_get(--i) = --suffix;
637
0
      if (suffix == pattern_length) {
638
        // No suffix to extend, so we check against last_char only.
639
0
        while ((i > start) && (pattern_[i - 1] != last_char)) {
640
0
          if (static_cast<size_t>(shift_get(pattern_length)) == length) {
641
0
            shift_get(pattern_length) = pattern_length - i;
642
0
          }
643
0
          suffix_get(--i) = pattern_length;
644
0
        }
645
0
        if (i > start) {
646
0
          suffix_get(--i) = --suffix;
647
0
        }
648
0
      }
649
0
    }
650
0
  }
651
652
  // Build shift table using suffixes.
653
0
  if (suffix < pattern_length) {
654
0
    for (size_t i = start; i <= pattern_length; i++) {
655
0
      if (static_cast<size_t>(shift_get(i)) == length) {
656
0
        shift_get(i) = suffix - start;
657
0
      }
658
0
      if (i == suffix) {
659
0
        suffix = suffix_get(suffix);
660
0
      }
661
0
    }
662
0
  }
663
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreTable()
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreTable()
664
665
//---------------------------------------------------------------------
666
// Boyer-Moore-Horspool string search.
667
//---------------------------------------------------------------------
668
669
template <typename Char>
670
size_t StringSearch<Char>::BoyerMooreHorspoolSearch(Vector subject,
671
0
                                                    size_t start_index) {
672
0
  const size_t subject_length = subject.length();
673
0
  const size_t pattern_length = pattern_.length();
674
0
  int *char_occurrences = bad_char_shift_table_;
675
0
  int64_t badness = -static_cast<int64_t>(pattern_length);
676
677
  // How bad we are doing without a good-suffix table.
678
0
  Char last_char = pattern_[pattern_length - 1];
679
0
  int last_char_shift =
680
0
      pattern_length - 1 - CharOccurrence(char_occurrences, last_char);
681
682
  // Perform search
683
0
  size_t index = start_index;  // No matches found prior to this index.
684
0
  while (index <= subject_length - pattern_length) {
685
0
    size_t j = pattern_length - 1;
686
0
    int subject_char;
687
0
    while (last_char != (subject_char = subject[index + j])) {
688
0
      int bc_occ = CharOccurrence(char_occurrences, subject_char);
689
0
      int shift = j - bc_occ;
690
0
      index += shift;
691
0
      badness += 1 - shift;  // at most zero, so badness cannot increase.
692
0
      if (index > subject_length - pattern_length) {
693
0
        return subject_length;
694
0
      }
695
0
    }
696
0
    j--;
697
0
    while (pattern_[j] == (subject[index + j])) {
698
0
      if (j == 0) {
699
0
        return index;
700
0
      }
701
0
      j--;
702
0
    }
703
0
    index += last_char_shift;
704
    // Badness increases by the number of characters we have
705
    // checked, and decreases by the number of characters we
706
    // can skip by shifting. It's a measure of how we are doing
707
    // compared to reading each character exactly once.
708
0
    badness += (pattern_length - j) - last_char_shift;
709
0
    if (badness > 0) {
710
0
      PopulateBoyerMooreTable();
711
0
      strategy_ = SearchStrategy::kBoyerMoore;
712
0
      return BoyerMooreSearch(subject, index);
713
0
    }
714
0
  }
715
0
  return subject.length();
716
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::BoyerMooreHorspoolSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::BoyerMooreHorspoolSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
717
718
template <typename Char>
719
0
void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() {
720
0
  const size_t pattern_length = pattern_.length();
721
722
0
  int *bad_char_occurrence = bad_char_shift_table_;
723
724
  // Only preprocess at most kBMMaxShift last characters of pattern.
725
0
  const size_t start = start_;
726
  // Run forwards to populate bad_char_table, so that *last* instance
727
  // of character equivalence class is the one registered.
728
  // Notice: Doesn't include the last character.
729
0
  const size_t table_size = AlphabetSize();
730
0
  if (start == 0) {
731
    // All patterns less than kBMMaxShift in length.
732
0
    memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));
733
0
  } else {
734
0
    for (size_t i = 0; i < table_size; i++) {
735
0
      bad_char_occurrence[i] = start - 1;
736
0
    }
737
0
  }
738
0
  for (size_t i = start; i < pattern_length - 1; i++) {
739
0
    Char c = pattern_[i];
740
0
    int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize();
741
0
    bad_char_occurrence[bucket] = i;
742
0
  }
743
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreHorspoolTable()
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreHorspoolTable()
744
745
//---------------------------------------------------------------------
746
// Linear string search with bailout to BMH.
747
//---------------------------------------------------------------------
748
749
// Simple linear search for short patterns, which bails out if the string
750
// isn't found very early in the subject. Upgrades to BoyerMooreHorspool.
751
template <typename Char>
752
0
size_t StringSearch<Char>::InitialSearch(Vector subject, size_t index) {
753
0
  const size_t pattern_length = pattern_.length();
754
  // Badness is a count of how much work we have done.  When we have
755
  // done enough work we decide it's probably worth switching to a better
756
  // algorithm.
757
0
  int64_t badness = -10 - (pattern_length << 2);
758
759
  // We know our pattern is at least 2 characters, we cache the first so
760
  // the common case of the first character not matching is faster.
761
0
  for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) {
762
0
    badness++;
763
0
    if (badness <= 0) {
764
0
      i = FindFirstCharacter(pattern_, subject, i);
765
0
      if (i == subject.length()) return subject.length();
766
0
      NBYTES_ASSERT_TRUE(i <= n);
767
0
      size_t j = 1;
768
0
      do {
769
0
        if (pattern_[j] != subject[i + j]) {
770
0
          break;
771
0
        }
772
0
        j++;
773
0
      } while (j < pattern_length);
774
0
      if (j == pattern_length) {
775
0
        return i;
776
0
      }
777
0
      badness += j;
778
0
    } else {
779
0
      PopulateBoyerMooreHorspoolTable();
780
0
      strategy_ = SearchStrategy::kBoyerMooreHorspool;
781
0
      return BoyerMooreHorspoolSearch(subject, i);
782
0
    }
783
0
  }
784
0
  return subject.length();
785
0
}
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::InitialSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::InitialSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
786
787
// Perform a single stand-alone search.
788
// If searching multiple times for the same pattern, a search
789
// object should be constructed once and the Search function then called
790
// for each search.
791
template <typename Char>
792
size_t SearchString(Vector<const Char> subject, Vector<const Char> pattern,
793
0
                    size_t start_index) {
794
0
  StringSearch<Char> search(pattern);
795
0
  return search.Search(subject, start_index);
796
0
}
Unexecuted instantiation: unsigned long nbytes::stringsearch::SearchString<unsigned short>(nbytes::stringsearch::Vector<unsigned short const>, nbytes::stringsearch::Vector<unsigned short const>, unsigned long)
Unexecuted instantiation: unsigned long nbytes::stringsearch::SearchString<unsigned char>(nbytes::stringsearch::Vector<unsigned char const>, nbytes::stringsearch::Vector<unsigned char const>, unsigned long)
797
}  // namespace stringsearch
798
799
template <typename Char>
800
size_t SearchString(const Char *haystack, size_t haystack_length,
801
                    const Char *needle, size_t needle_length,
802
0
                    size_t start_index, bool is_forward) {
803
0
  if (haystack_length < needle_length) return haystack_length;
804
  // To do a reverse search (lastIndexOf instead of indexOf) without redundant
805
  // code, create two vectors that are reversed views into the input strings.
806
  // For example, v_needle[0] would return the *last* character of the needle.
807
  // So we're searching for the first instance of rev(needle) in rev(haystack)
808
0
  stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);
809
0
  stringsearch::Vector<const Char> v_haystack(haystack, haystack_length,
810
0
                                              is_forward);
811
0
  size_t diff = haystack_length - needle_length;
812
0
  size_t relative_start_index;
813
0
  if (is_forward) {
814
0
    relative_start_index = start_index;
815
0
  } else if (diff < start_index) {
816
0
    relative_start_index = 0;
817
0
  } else {
818
0
    relative_start_index = diff - start_index;
819
0
  }
820
0
  size_t pos =
821
0
      stringsearch::SearchString(v_haystack, v_needle, relative_start_index);
822
0
  if (pos == haystack_length) {
823
    // not found
824
0
    return pos;
825
0
  }
826
0
  return is_forward ? pos : (haystack_length - needle_length - pos);
827
0
}
Unexecuted instantiation: unsigned long nbytes::SearchString<unsigned short>(unsigned short const*, unsigned long, unsigned short const*, unsigned long, unsigned long, bool)
Unexecuted instantiation: unsigned long nbytes::SearchString<unsigned char>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, bool)
828
829
template <size_t N>
830
size_t SearchString(const char *haystack, size_t haystack_length,
831
                    const char (&needle)[N]) {
832
  return SearchString(
833
      reinterpret_cast<const uint8_t *>(haystack), haystack_length,
834
      reinterpret_cast<const uint8_t *>(needle), N - 1, 0, true);
835
}
836
837
// ============================================================================
838
// Version metadata
839
72
#define NBYTES_VERSION "0.1.1"
840
841
enum {
842
  NBYTES_VERSION_MAJOR = 0,
843
  NBYTES_VERSION_MINOR = 1,
844
  NBYTES_VERSION_REVISION = 1,
845
};
846
847
}  // namespace nbytes
848
849
#endif  // NBYTES_H