/src/node/deps/nbytes/include/nbytes.h
Line  | Count  | Source  | 
1  |  | #ifndef NBYTES_H  | 
2  |  | #define NBYTES_H  | 
3  |  | #include <algorithm>  | 
4  |  | #include <cmath>  | 
5  |  | #include <cstddef>  | 
6  |  | #include <cstdint>  | 
7  |  | #include <cstring>  | 
8  |  | #include <string>  | 
9  |  |  | 
10  |  | namespace nbytes { | 
11  |  |  | 
12  |  | #if NBYTES_DEVELOPMENT_CHECKS  | 
13  |  | #define NBYTES_STR(x) #x  | 
14  |  | #define NBYTES_REQUIRE(EXPR) \  | 
15  |  |   {                          \ | 
16  |  |     if (!(EXPR) { abort(); }) } | 
17  |  |  | 
18  |  | #define NBYTES_FAIL(MESSAGE)                         \  | 
19  |  |   do {                                               \ | 
20  |  |     std::cerr << "FAIL: " << (MESSAGE) << std::endl; \  | 
21  |  |     abort();                                         \  | 
22  |  |   } while (0);  | 
23  |  | #define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)                                 \  | 
24  |  |   do {                                                                         \ | 
25  |  |     if (LHS != RHS) {                                                          \ | 
26  |  |       std::cerr << "Mismatch: '" << LHS << "' - '" << RHS << "'" << std::endl; \  | 
27  |  |       NBYTES_FAIL(MESSAGE);                                                    \  | 
28  |  |     }                                                                          \  | 
29  |  |   } while (0);  | 
30  |  | #define NBYTES_ASSERT_TRUE(COND)                                            \  | 
31  |  |   do {                                                                      \ | 
32  |  |     if (!(COND)) {                                                          \ | 
33  |  |       std::cerr << "Assert at line " << __LINE__ << " of file " << __FILE__ \  | 
34  |  |                 << std::endl;                                               \  | 
35  |  |       NBYTES_FAIL(NBYTES_STR(COND));                                        \  | 
36  |  |     }                                                                       \  | 
37  |  |   } while (0);  | 
38  |  | #else  | 
39  |  | #define NBYTES_FAIL(MESSAGE)  | 
40  |  | #define NBYTES_ASSERT_EQUAL(LHS, RHS, MESSAGE)  | 
41  |  | #define NBYTES_ASSERT_TRUE(COND)  | 
42  |  | #endif  | 
43  |  |  | 
44  | 0  | [[noreturn]] inline void unreachable() { | 
45  | 0  | #ifdef __GNUC__  | 
46  | 0  |   __builtin_unreachable();  | 
47  |  | #elif defined(_MSC_VER)  | 
48  |  |   __assume(false);  | 
49  |  | #else  | 
50  |  | #endif  | 
51  | 0  | }  | 
52  |  |  | 
53  |  | // The nbytes (short for "node bytes") is a set of utility helpers for  | 
54  |  | // working with bytes that are extracted from Node.js' internals. The  | 
55  |  | // motivation for extracting these into a separate library is to make it  | 
56  |  | // easier for other projects to implement functionality that is compatible  | 
57  |  | // with Node.js' implementation of various byte manipulation functions.  | 
58  |  |  | 
59  |  | // Round up a to the next highest multiple of b.  | 
60  |  | template <typename T>  | 
61  | 0  | constexpr T RoundUp(T a, T b) { | 
62  | 0  |   return a % b != 0 ? a + b - (a % b) : a;  | 
63  | 0  | }  | 
64  |  |  | 
65  |  | // Align ptr to an `alignment`-bytes boundary.  | 
66  |  | template <typename T, typename U>  | 
67  | 0  | constexpr T *AlignUp(T *ptr, U alignment) { | 
68  | 0  |   return reinterpret_cast<T *>(  | 
69  | 0  |       RoundUp(reinterpret_cast<uintptr_t>(ptr), alignment));  | 
70  | 0  | } Unexecuted instantiation: char* nbytes::AlignUp<char, unsigned long>(char*, unsigned long) Unexecuted instantiation: unsigned short* nbytes::AlignUp<unsigned short, unsigned long>(unsigned short*, unsigned long)  | 
71  |  |  | 
72  |  | template <typename T, typename U>  | 
73  | 0  | inline T AlignDown(T value, U alignment) { | 
74  | 0  |   return reinterpret_cast<T>(  | 
75  | 0  |       (reinterpret_cast<uintptr_t>(value) & ~(alignment - 1)));  | 
76  | 0  | }  | 
77  |  |  | 
78  |  | template <typename T>  | 
79  |  | inline T MultiplyWithOverflowCheck(T a, T b) { | 
80  |  |   auto ret = a * b;  | 
81  |  |   if (a != 0) { | 
82  |  |     NBYTES_ASSERT_TRUE(b == ret / a);  | 
83  |  |   }  | 
84  |  |  | 
85  |  |   return ret;  | 
86  |  | }  | 
87  |  |  | 
88  |  | void ForceAsciiSlow(const char *src, char *dst, size_t len);  | 
89  |  | void ForceAscii(const char *src, char *dst, size_t len);  | 
90  |  |  | 
91  |  | // ============================================================================  | 
92  |  | // Byte Swapping  | 
93  |  |  | 
94  |  | // Swaps bytes in place. nbytes is the number of bytes to swap and must be a  | 
95  |  | // multiple of the word size (checked by function).  | 
96  |  | bool SwapBytes16(char *data, size_t nbytes);  | 
97  |  | bool SwapBytes32(char *data, size_t nbytes);  | 
98  |  | bool SwapBytes64(char *data, size_t nbytes);  | 
99  |  |  | 
100  |  | // ============================================================================  | 
101  |  | // Base64 (legacy)  | 
102  |  |  | 
103  |  | #ifdef _MSC_VER  | 
104  |  | #pragma warning(push)  | 
105  |  | // MSVC C4003: not enough actual parameters for macro 'identifier'  | 
106  |  | #pragma warning(disable : 4003)  | 
107  |  | #endif  | 
108  |  |  | 
109  |  | extern const int8_t unbase64_table[256];  | 
110  |  |  | 
111  |  | template <typename TypeName>  | 
112  |  | bool Base64DecodeGroupSlow(char *const dst, const size_t dstlen,  | 
113  |  |                            const TypeName *const src, const size_t srclen,  | 
114  | 0  |                            size_t *const i, size_t *const k) { | 
115  | 0  |   uint8_t hi;  | 
116  | 0  |   uint8_t lo;  | 
117  | 0  | #define V(expr)                                                        \  | 
118  | 0  |   for (;;) {                                                           \ | 
119  | 0  |     const uint8_t c = static_cast<uint8_t>(src[*i]);                   \  | 
120  | 0  |     lo = unbase64_table[c];                                            \  | 
121  | 0  |     *i += 1;                                                           \  | 
122  | 0  |     if (lo < 64) break;                         /* Legal character. */ \  | 
123  | 0  |     if (c == '=' || *i >= srclen) return false; /* Stop decoding. */   \  | 
124  | 0  |   }                                                                    \  | 
125  | 0  |   expr;                                                                \  | 
126  | 0  |   if (*i >= srclen) return false;                                      \  | 
127  | 0  |   if (*k >= dstlen) return false;                                      \  | 
128  | 0  |   hi = lo;  | 
129  | 0  |   V(/* Nothing. */);  | 
130  | 0  |   V(dst[(*k)++] = ((hi & 0x3F) << 2) | ((lo & 0x30) >> 4));  | 
131  | 0  |   V(dst[(*k)++] = ((hi & 0x0F) << 4) | ((lo & 0x3C) >> 2));  | 
132  | 0  |   V(dst[(*k)++] = ((hi & 0x03) << 6) | ((lo & 0x3F) >> 0));  | 
133  | 0  | #undef V  | 
134  | 0  |   return true;  // Continue decoding.  | 
135  | 0  | } Unexecuted instantiation: bool nbytes::Base64DecodeGroupSlow<char>(char*, unsigned long, char const*, unsigned long, unsigned long*, unsigned long*) Unexecuted instantiation: bool nbytes::Base64DecodeGroupSlow<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long, unsigned long*, unsigned long*)  | 
136  |  |  | 
137  |  | enum class Base64Mode { NORMAL, URL }; | 
138  |  |  | 
139  |  | inline constexpr size_t Base64EncodedSize(  | 
140  | 0  |     size_t size, Base64Mode mode = Base64Mode::NORMAL) { | 
141  | 0  |   return mode == Base64Mode::NORMAL ? ((size + 2) / 3 * 4)  | 
142  | 0  |                                     : static_cast<size_t>(std::ceil(  | 
143  | 0  |                                           static_cast<double>(size * 4) / 3));  | 
144  | 0  | }  | 
145  |  |  | 
146  |  | // Doesn't check for padding at the end.  Can be 1-2 bytes over.  | 
147  | 0  | inline constexpr size_t Base64DecodedSizeFast(size_t size) { | 
148  |  |   // 1-byte input cannot be decoded  | 
149  | 0  |   return size > 1 ? (size / 4) * 3 + (size % 4 + 1) / 2 : 0;  | 
150  | 0  | }  | 
151  |  |  | 
152  | 0  | inline uint32_t ReadUint32BE(const unsigned char *p) { | 
153  | 0  |   return static_cast<uint32_t>(p[0] << 24U) |  | 
154  | 0  |          static_cast<uint32_t>(p[1] << 16U) |  | 
155  | 0  |          static_cast<uint32_t>(p[2] << 8U) | static_cast<uint32_t>(p[3]);  | 
156  | 0  | }  | 
157  |  |  | 
158  |  | template <typename TypeName>  | 
159  | 0  | size_t Base64DecodedSize(const TypeName *src, size_t size) { | 
160  |  |   // 1-byte input cannot be decoded  | 
161  | 0  |   if (size < 2) return 0;  | 
162  |  |  | 
163  | 0  |   if (src[size - 1] == '=') { | 
164  | 0  |     size--;  | 
165  | 0  |     if (src[size - 1] == '=') size--;  | 
166  | 0  |   }  | 
167  | 0  |   return Base64DecodedSizeFast(size);  | 
168  | 0  | } Unexecuted instantiation: unsigned long nbytes::Base64DecodedSize<char>(char const*, unsigned long) Unexecuted instantiation: unsigned long nbytes::Base64DecodedSize<unsigned short>(unsigned short const*, unsigned long)  | 
169  |  |  | 
170  |  | template <typename TypeName>  | 
171  |  | size_t Base64DecodeFast(char *const dst, const size_t dstlen,  | 
172  |  |                         const TypeName *const src, const size_t srclen,  | 
173  | 0  |                         const size_t decoded_size) { | 
174  | 0  |   const size_t available = dstlen < decoded_size ? dstlen : decoded_size;  | 
175  | 0  |   const size_t max_k = available / 3 * 3;  | 
176  | 0  |   size_t max_i = srclen / 4 * 4;  | 
177  | 0  |   size_t i = 0;  | 
178  | 0  |   size_t k = 0;  | 
179  | 0  |   while (i < max_i && k < max_k) { | 
180  | 0  |     const unsigned char txt[] = { | 
181  | 0  |         static_cast<unsigned char>(  | 
182  | 0  |             unbase64_table[static_cast<uint8_t>(src[i + 0])]),  | 
183  | 0  |         static_cast<unsigned char>(  | 
184  | 0  |             unbase64_table[static_cast<uint8_t>(src[i + 1])]),  | 
185  | 0  |         static_cast<unsigned char>(  | 
186  | 0  |             unbase64_table[static_cast<uint8_t>(src[i + 2])]),  | 
187  | 0  |         static_cast<unsigned char>(  | 
188  | 0  |             unbase64_table[static_cast<uint8_t>(src[i + 3])]),  | 
189  | 0  |     };  | 
190  |  | 
  | 
191  | 0  |     const uint32_t v = ReadUint32BE(txt);  | 
192  |  |     // If MSB is set, input contains whitespace or is not valid base64.  | 
193  | 0  |     if (v & 0x80808080) { | 
194  | 0  |       if (!Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k)) return k;  | 
195  | 0  |       max_i = i + (srclen - i) / 4 * 4;  // Align max_i again.  | 
196  | 0  |     } else { | 
197  | 0  |       dst[k + 0] = ((v >> 22) & 0xFC) | ((v >> 20) & 0x03);  | 
198  | 0  |       dst[k + 1] = ((v >> 12) & 0xF0) | ((v >> 10) & 0x0F);  | 
199  | 0  |       dst[k + 2] = ((v >> 2) & 0xC0) | ((v >> 0) & 0x3F);  | 
200  | 0  |       i += 4;  | 
201  | 0  |       k += 3;  | 
202  | 0  |     }  | 
203  | 0  |   }  | 
204  | 0  |   if (i < srclen && k < dstlen) { | 
205  | 0  |     Base64DecodeGroupSlow(dst, dstlen, src, srclen, &i, &k);  | 
206  | 0  |   }  | 
207  | 0  |   return k;  | 
208  | 0  | } Unexecuted instantiation: unsigned long nbytes::Base64DecodeFast<char>(char*, unsigned long, char const*, unsigned long, unsigned long) Unexecuted instantiation: unsigned long nbytes::Base64DecodeFast<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long, unsigned long)  | 
209  |  |  | 
210  |  | template <typename TypeName>  | 
211  |  | size_t Base64Decode(char *const dst, const size_t dstlen,  | 
212  | 0  |                     const TypeName *const src, const size_t srclen) { | 
213  | 0  |   const size_t decoded_size = Base64DecodedSize(src, srclen);  | 
214  | 0  |   return Base64DecodeFast(dst, dstlen, src, srclen, decoded_size);  | 
215  | 0  | } Unexecuted instantiation: unsigned long nbytes::Base64Decode<char>(char*, unsigned long, char const*, unsigned long) Unexecuted instantiation: unsigned long nbytes::Base64Decode<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long)  | 
216  |  |  | 
217  |  | #ifdef _MSC_VER  | 
218  |  | #pragma warning(pop)  | 
219  |  | #endif  | 
220  |  |  | 
221  |  | // ============================================================================  | 
222  |  | // Hex (legacy)  | 
223  |  |  | 
224  |  | extern const int8_t unhex_table[256];  | 
225  |  |  | 
226  |  | template <typename TypeName>  | 
227  |  | static size_t HexDecode(char *buf, size_t len, const TypeName *src,  | 
228  | 0  |                         const size_t srcLen) { | 
229  | 0  |   size_t i;  | 
230  | 0  |   for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) { | 
231  | 0  |     unsigned a = unhex_table[static_cast<uint8_t>(src[i * 2 + 0])];  | 
232  | 0  |     unsigned b = unhex_table[static_cast<uint8_t>(src[i * 2 + 1])];  | 
233  | 0  |     if (!~a || !~b) return i;  | 
234  | 0  |     buf[i] = (a << 4) | b;  | 
235  | 0  |   }  | 
236  |  |  | 
237  | 0  |   return i;  | 
238  | 0  | } Unexecuted instantiation: string_bytes.cc:unsigned long nbytes::HexDecode<char>(char*, unsigned long, char const*, unsigned long) Unexecuted instantiation: string_bytes.cc:unsigned long nbytes::HexDecode<unsigned short>(char*, unsigned long, unsigned short const*, unsigned long)  | 
239  |  |  | 
240  |  | size_t HexEncode(const char *src, size_t slen, char *dst, size_t dlen);  | 
241  |  |  | 
242  |  | std::string HexEncode(const char *src, size_t slen);  | 
243  |  |  | 
244  |  | // ============================================================================  | 
245  |  | // StringSearch  | 
246  |  |  | 
247  |  | namespace stringsearch { | 
248  |  |  | 
249  |  | template <typename T>  | 
250  |  | class Vector { | 
251  |  |  public:  | 
252  |  |   Vector(T *data, size_t length, bool isForward)  | 
253  | 0  |       : start_(data), length_(length), is_forward_(isForward) { | 
254  | 0  |     CHECK(length > 0 && data != nullptr);  | 
255  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::Vector(unsigned short const*, unsigned long, bool) Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::Vector(unsigned char const*, unsigned long, bool)  | 
256  |  |  | 
257  |  |   // Returns the start of the memory range.  | 
258  |  |   // For vector v this is NOT necessarily &v[0], see forward().  | 
259  | 0  |   const T *start() const { return start_; }Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::start() const Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::start() const  | 
260  |  |  | 
261  |  |   // Returns the length of the vector, in characters.  | 
262  | 0  |   size_t length() const { return length_; }Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::length() const Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::length() const  | 
263  |  |  | 
264  |  |   // Returns true if the Vector is front-to-back, false if back-to-front.  | 
265  |  |   // In the latter case, v[0] corresponds to the *end* of the memory range.  | 
266  | 0  |   bool forward() const { return is_forward_; }Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::forward() const Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::forward() const  | 
267  |  |  | 
268  |  |   // Access individual vector elements - checks bounds in debug mode.  | 
269  | 0  |   T &operator[](size_t index) const { | 
270  | 0  |     NBYTES_ASSERT_TRUE(index < length_);  | 
271  | 0  |     return start_[is_forward_ ? index : (length_ - index - 1)];  | 
272  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned short const>::operator[](unsigned long) const Unexecuted instantiation: nbytes::stringsearch::Vector<unsigned char const>::operator[](unsigned long) const  | 
273  |  |  | 
274  |  |  private:  | 
275  |  |   T *start_;  | 
276  |  |   size_t length_;  | 
277  |  |   bool is_forward_;  | 
278  |  | };  | 
279  |  |  | 
280  |  | //---------------------------------------------------------------------  | 
281  |  | // String Search object.  | 
282  |  | //---------------------------------------------------------------------  | 
283  |  |  | 
284  |  | // Class holding constants and methods that apply to all string search variants,  | 
285  |  | // independently of subject and pattern char size.  | 
286  |  | class StringSearchBase { | 
287  |  |  protected:  | 
288  |  |   // Cap on the maximal shift in the Boyer-Moore implementation. By setting a  | 
289  |  |   // limit, we can fix the size of tables. For a needle longer than this limit,  | 
290  |  |   // search will not be optimal, since we only build tables for a suffix  | 
291  |  |   // of the string, but it is a safe approximation.  | 
292  |  |   static const int kBMMaxShift = 250;  | 
293  |  |  | 
294  |  |   // Reduce alphabet to this size.  | 
295  |  |   // One of the tables used by Boyer-Moore and Boyer-Moore-Horspool has size  | 
296  |  |   // proportional to the input alphabet. We reduce the alphabet size by  | 
297  |  |   // equating input characters modulo a smaller alphabet size. This gives  | 
298  |  |   // a potentially less efficient searching, but is a safe approximation.  | 
299  |  |   // For needles using only characters in the same Unicode 256-code point page,  | 
300  |  |   // there is no search speed degradation.  | 
301  |  |   static const int kLatin1AlphabetSize = 256;  | 
302  |  |   static const int kUC16AlphabetSize = 256;  | 
303  |  |  | 
304  |  |   // Bad-char shift table stored in the state. It's length is the alphabet size.  | 
305  |  |   // For patterns below this length, the skip length of Boyer-Moore is too short  | 
306  |  |   // to compensate for the algorithmic overhead compared to simple brute force.  | 
307  |  |   static const int kBMMinPatternLength = 8;  | 
308  |  |  | 
309  |  |   // Store for the BoyerMoore(Horspool) bad char shift table.  | 
310  |  |   int bad_char_shift_table_[kUC16AlphabetSize];  | 
311  |  |   // Store for the BoyerMoore good suffix shift table.  | 
312  |  |   int good_suffix_shift_table_[kBMMaxShift + 1];  | 
313  |  |   // Table used temporarily while building the BoyerMoore good suffix  | 
314  |  |   // shift table.  | 
315  |  |   int suffix_table_[kBMMaxShift + 1];  | 
316  |  | };  | 
317  |  |  | 
318  |  | template <typename Char>  | 
319  |  | class StringSearch : private StringSearchBase { | 
320  |  |  public:  | 
321  |  |   typedef stringsearch::Vector<const Char> Vector;  | 
322  |  |  | 
323  | 0  |   explicit StringSearch(Vector pattern) : pattern_(pattern), start_(0) { | 
324  | 0  |     if (pattern.length() >= kBMMaxShift) { | 
325  | 0  |       start_ = pattern.length() - kBMMaxShift;  | 
326  | 0  |     }  | 
327  |  | 
  | 
328  | 0  |     size_t pattern_length = pattern_.length();  | 
329  | 0  |     NBYTES_ASSERT_TRUE(pattern_length > 0);  | 
330  | 0  |     if (pattern_length < kBMMinPatternLength) { | 
331  | 0  |       if (pattern_length == 1) { | 
332  | 0  |         strategy_ = SearchStrategy::kSingleChar;  | 
333  | 0  |         return;  | 
334  | 0  |       }  | 
335  | 0  |       strategy_ = SearchStrategy::kLinear;  | 
336  | 0  |       return;  | 
337  | 0  |     }  | 
338  | 0  |     strategy_ = SearchStrategy::kInitial;  | 
339  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::StringSearch(nbytes::stringsearch::Vector<unsigned short const>) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::StringSearch(nbytes::stringsearch::Vector<unsigned char const>)  | 
340  |  |  | 
341  | 0  |   size_t Search(Vector subject, size_t index) { | 
342  | 0  |     switch (strategy_) { | 
343  | 0  |       case kBoyerMooreHorspool:  | 
344  | 0  |         return BoyerMooreHorspoolSearch(subject, index);  | 
345  | 0  |       case kBoyerMoore:  | 
346  | 0  |         return BoyerMooreSearch(subject, index);  | 
347  | 0  |       case kInitial:  | 
348  | 0  |         return InitialSearch(subject, index);  | 
349  | 0  |       case kLinear:  | 
350  | 0  |         return LinearSearch(subject, index);  | 
351  | 0  |       case kSingleChar:  | 
352  | 0  |         return SingleCharSearch(subject, index);  | 
353  | 0  |     }  | 
354  | 0  |     unreachable();  | 
355  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::Search(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::Search(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
356  |  |  | 
357  | 0  |   static inline int AlphabetSize() { | 
358  | 0  |     if (sizeof(Char) == 1) { | 
359  |  |       // Latin1 needle.  | 
360  | 0  |       return kLatin1AlphabetSize;  | 
361  | 0  |     } else { | 
362  |  |       // UC16 needle.  | 
363  | 0  |       return kUC16AlphabetSize;  | 
364  | 0  |     }  | 
365  |  |  | 
366  | 0  |     static_assert(  | 
367  | 0  |         sizeof(Char) == sizeof(uint8_t) || sizeof(Char) == sizeof(uint16_t),  | 
368  | 0  |         "sizeof(Char) == sizeof(uint16_t) || sizeof(uint8_t)");  | 
369  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::AlphabetSize() Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::AlphabetSize()  | 
370  |  |  | 
371  |  |  private:  | 
372  |  |   typedef size_t (StringSearch::*SearchFunction)(Vector, size_t);  | 
373  |  |   size_t SingleCharSearch(Vector subject, size_t start_index);  | 
374  |  |   size_t LinearSearch(Vector subject, size_t start_index);  | 
375  |  |   size_t InitialSearch(Vector subject, size_t start_index);  | 
376  |  |   size_t BoyerMooreHorspoolSearch(Vector subject, size_t start_index);  | 
377  |  |   size_t BoyerMooreSearch(Vector subject, size_t start_index);  | 
378  |  |  | 
379  |  |   void PopulateBoyerMooreHorspoolTable();  | 
380  |  |  | 
381  |  |   void PopulateBoyerMooreTable();  | 
382  |  |  | 
383  | 0  |   static inline int CharOccurrence(int *bad_char_occurrence, Char char_code) { | 
384  | 0  |     if (sizeof(Char) == 1) { | 
385  | 0  |       return bad_char_occurrence[static_cast<int>(char_code)];  | 
386  | 0  |     }  | 
387  |  |     // Both pattern and subject are UC16. Reduce character to equivalence class.  | 
388  | 0  |     int equiv_class = char_code % kUC16AlphabetSize;  | 
389  | 0  |     return bad_char_occurrence[equiv_class];  | 
390  | 0  |   } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::CharOccurrence(int*, unsigned short) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::CharOccurrence(int*, unsigned char)  | 
391  |  |  | 
392  |  |   enum SearchStrategy { | 
393  |  |     kBoyerMooreHorspool,  | 
394  |  |     kBoyerMoore,  | 
395  |  |     kInitial,  | 
396  |  |     kLinear,  | 
397  |  |     kSingleChar,  | 
398  |  |   };  | 
399  |  |  | 
400  |  |   // The pattern to search for.  | 
401  |  |   Vector pattern_;  | 
402  |  |   SearchStrategy strategy_;  | 
403  |  |   // Cache value of Max(0, pattern_length() - kBMMaxShift)  | 
404  |  |   size_t start_;  | 
405  |  | };  | 
406  |  |  | 
407  | 0  | inline uint8_t GetHighestValueByte(uint16_t character) { | 
408  | 0  |   return std::max(static_cast<uint8_t>(character & 0xFF),  | 
409  | 0  |                   static_cast<uint8_t>(character >> 8));  | 
410  | 0  | }  | 
411  |  |  | 
412  | 0  | inline uint8_t GetHighestValueByte(uint8_t character) { return character; } | 
413  |  |  | 
414  |  | // Searches for a byte value in a memory buffer, back to front.  | 
415  |  | // Uses memrchr(3) on systems which support it, for speed.  | 
416  |  | // Falls back to a vanilla for loop on non-GNU systems such as Windows.  | 
417  |  | inline const void *MemrchrFill(const void *haystack, uint8_t needle,  | 
418  | 0  |                                size_t haystack_len) { | 
419  | 0  | #ifdef _GNU_SOURCE  | 
420  | 0  |   return memrchr(haystack, needle, haystack_len);  | 
421  |  | #else  | 
422  |  |   const uint8_t *haystack8 = static_cast<const uint8_t *>(haystack);  | 
423  |  |   for (size_t i = haystack_len - 1; i != static_cast<size_t>(-1); i--) { | 
424  |  |     if (haystack8[i] == needle) { | 
425  |  |       return haystack8 + i;  | 
426  |  |     }  | 
427  |  |   }  | 
428  |  |   return nullptr;  | 
429  |  | #endif  | 
430  | 0  | }  | 
431  |  |  | 
432  |  | // Finds the first occurrence of *two-byte* character pattern[0] in the string  | 
433  |  | // `subject`. Does not check that the whole pattern matches.  | 
434  |  | template <typename Char>  | 
435  |  | inline size_t FindFirstCharacter(Vector<const Char> pattern,  | 
436  | 0  |                                  Vector<const Char> subject, size_t index) { | 
437  | 0  |   const Char pattern_first_char = pattern[0];  | 
438  | 0  |   const size_t max_n = (subject.length() - pattern.length() + 1);  | 
439  |  |  | 
440  |  |   // For speed, search for the more `rare` of the two bytes in pattern[0]  | 
441  |  |   // using memchr / memrchr (which are much faster than a simple for loop).  | 
442  | 0  |   const uint8_t search_byte = GetHighestValueByte(pattern_first_char);  | 
443  | 0  |   size_t pos = index;  | 
444  | 0  |   do { | 
445  | 0  |     const size_t bytes_to_search = (max_n - pos) * sizeof(Char);  | 
446  | 0  |     const void *void_pos;  | 
447  | 0  |     if (subject.forward()) { | 
448  |  |       // Assert that bytes_to_search won't overflow  | 
449  | 0  |       NBYTES_ASSERT_TRUE(pos <= max_n);  | 
450  | 0  |       NBYTES_ASSERT_TRUE(max_n - pos <= SIZE_MAX / sizeof(Char));  | 
451  | 0  |       void_pos = memchr(subject.start() + pos, search_byte, bytes_to_search);  | 
452  | 0  |     } else { | 
453  | 0  |       NBYTES_ASSERT_TRUE(pos <= subject.length());  | 
454  | 0  |       NBYTES_ASSERT_TRUE(subject.length() - pos <= SIZE_MAX / sizeof(Char));  | 
455  | 0  |       void_pos = MemrchrFill(subject.start() + pattern.length() - 1,  | 
456  | 0  |                              search_byte, bytes_to_search);  | 
457  | 0  |     }  | 
458  | 0  |     const Char *char_pos = static_cast<const Char *>(void_pos);  | 
459  | 0  |     if (char_pos == nullptr) return subject.length();  | 
460  |  |  | 
461  |  |     // Then, for each match, verify that the full two bytes match pattern[0].  | 
462  | 0  |     char_pos = AlignDown(char_pos, sizeof(Char));  | 
463  | 0  |     size_t raw_pos = static_cast<size_t>(char_pos - subject.start());  | 
464  | 0  |     pos = subject.forward() ? raw_pos : (subject.length() - raw_pos - 1);  | 
465  | 0  |     if (subject[pos] == pattern_first_char) { | 
466  |  |       // Match found, hooray.  | 
467  | 0  |       return pos;  | 
468  | 0  |     }  | 
469  |  |     // Search byte matched, but the other byte of pattern[0] didn't. Keep going.  | 
470  | 0  |   } while (++pos < max_n);  | 
471  |  |  | 
472  | 0  |   return subject.length();  | 
473  | 0  | }  | 
474  |  |  | 
475  |  | // Finds the first occurrence of the byte pattern[0] in string `subject`.  | 
476  |  | // Does not verify that the whole pattern matches.  | 
477  |  | template <>  | 
478  |  | inline size_t FindFirstCharacter(Vector<const uint8_t> pattern,  | 
479  | 0  |                                  Vector<const uint8_t> subject, size_t index) { | 
480  | 0  |   const uint8_t pattern_first_char = pattern[0];  | 
481  | 0  |   const size_t subj_len = subject.length();  | 
482  | 0  |   const size_t max_n = (subject.length() - pattern.length() + 1);  | 
483  |  | 
  | 
484  | 0  |   const void *pos;  | 
485  | 0  |   if (subject.forward()) { | 
486  | 0  |     pos = memchr(subject.start() + index, pattern_first_char, max_n - index);  | 
487  | 0  |   } else { | 
488  | 0  |     pos = MemrchrFill(subject.start() + pattern.length() - 1,  | 
489  | 0  |                       pattern_first_char, max_n - index);  | 
490  | 0  |   }  | 
491  | 0  |   const uint8_t *char_pos = static_cast<const uint8_t *>(pos);  | 
492  | 0  |   if (char_pos == nullptr) { | 
493  | 0  |     return subj_len;  | 
494  | 0  |   }  | 
495  |  |  | 
496  | 0  |   size_t raw_pos = static_cast<size_t>(char_pos - subject.start());  | 
497  | 0  |   return subject.forward() ? raw_pos : (subj_len - raw_pos - 1);  | 
498  | 0  | }  | 
499  |  |  | 
500  |  | //---------------------------------------------------------------------  | 
501  |  | // Single Character Pattern Search Strategy  | 
502  |  | //---------------------------------------------------------------------  | 
503  |  |  | 
504  |  | template <typename Char>  | 
505  | 0  | size_t StringSearch<Char>::SingleCharSearch(Vector subject, size_t index) { | 
506  | 0  |   NBYTES_ASSERT_TRUE(1 == pattern_.length());  | 
507  | 0  |   return FindFirstCharacter(pattern_, subject, index);  | 
508  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::SingleCharSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::SingleCharSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
509  |  |  | 
510  |  | //---------------------------------------------------------------------  | 
511  |  | // Linear Search Strategy  | 
512  |  | //---------------------------------------------------------------------  | 
513  |  |  | 
514  |  | // Simple linear search for short patterns. Never bails out.  | 
515  |  | template <typename Char>  | 
516  | 0  | size_t StringSearch<Char>::LinearSearch(Vector subject, size_t index) { | 
517  | 0  |   NBYTES_ASSERT_TRUE(pattern_.length() > 1);  | 
518  | 0  |   const size_t n = subject.length() - pattern_.length();  | 
519  | 0  |   for (size_t i = index; i <= n; i++) { | 
520  | 0  |     i = FindFirstCharacter(pattern_, subject, i);  | 
521  | 0  |     if (i == subject.length()) return subject.length();  | 
522  | 0  |     NBYTES_ASSERT_TRUE(i <= n);  | 
523  |  | 
  | 
524  | 0  |     bool matches = true;  | 
525  | 0  |     for (size_t j = 1; j < pattern_.length(); j++) { | 
526  | 0  |       if (pattern_[j] != subject[i + j]) { | 
527  | 0  |         matches = false;  | 
528  | 0  |         break;  | 
529  | 0  |       }  | 
530  | 0  |     }  | 
531  | 0  |     if (matches) { | 
532  | 0  |       return i;  | 
533  | 0  |     }  | 
534  | 0  |   }  | 
535  | 0  |   return subject.length();  | 
536  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::LinearSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::LinearSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
537  |  |  | 
538  |  | //---------------------------------------------------------------------  | 
539  |  | // Boyer-Moore string search  | 
540  |  | //---------------------------------------------------------------------  | 
541  |  |  | 
542  |  | template <typename Char>  | 
543  |  | size_t StringSearch<Char>::BoyerMooreSearch(Vector subject,  | 
544  | 0  |                                             size_t start_index) { | 
545  | 0  |   const size_t subject_length = subject.length();  | 
546  | 0  |   const size_t pattern_length = pattern_.length();  | 
547  |  |   // Only preprocess at most kBMMaxShift last characters of pattern.  | 
548  | 0  |   size_t start = start_;  | 
549  |  | 
  | 
550  | 0  |   int *bad_char_occurrence = bad_char_shift_table_;  | 
551  | 0  |   int *good_suffix_shift = good_suffix_shift_table_ - start_;  | 
552  |  | 
  | 
553  | 0  |   Char last_char = pattern_[pattern_length - 1];  | 
554  | 0  |   size_t index = start_index;  | 
555  |  |   // Continue search from i.  | 
556  | 0  |   while (index <= subject_length - pattern_length) { | 
557  | 0  |     size_t j = pattern_length - 1;  | 
558  | 0  |     int c;  | 
559  | 0  |     while (last_char != (c = subject[index + j])) { | 
560  | 0  |       int shift = j - CharOccurrence(bad_char_occurrence, c);  | 
561  | 0  |       index += shift;  | 
562  | 0  |       if (index > subject_length - pattern_length) { | 
563  | 0  |         return subject.length();  | 
564  | 0  |       }  | 
565  | 0  |     }  | 
566  | 0  |     while (pattern_[j] == (c = subject[index + j])) { | 
567  | 0  |       if (j == 0) { | 
568  | 0  |         return index;  | 
569  | 0  |       }  | 
570  | 0  |       j--;  | 
571  | 0  |     }  | 
572  | 0  |     if (j < start) { | 
573  |  |       // we have matched more than our tables allow us to be smart about.  | 
574  |  |       // Fall back on BMH shift.  | 
575  | 0  |       index +=  | 
576  | 0  |           pattern_length - 1 - CharOccurrence(bad_char_occurrence, last_char);  | 
577  | 0  |     } else { | 
578  | 0  |       int gs_shift = good_suffix_shift[j + 1];  | 
579  | 0  |       int bc_occ = CharOccurrence(bad_char_occurrence, c);  | 
580  | 0  |       int shift = j - bc_occ;  | 
581  | 0  |       if (gs_shift > shift) { | 
582  | 0  |         shift = gs_shift;  | 
583  | 0  |       }  | 
584  | 0  |       index += shift;  | 
585  | 0  |     }  | 
586  | 0  |   }  | 
587  |  |  | 
588  | 0  |   return subject.length();  | 
589  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::BoyerMooreSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
590  |  |  | 
591  |  | template <typename Char>  | 
592  | 0  | void StringSearch<Char>::PopulateBoyerMooreTable() { | 
593  | 0  |   const size_t pattern_length = pattern_.length();  | 
594  |  |   // Only look at the last kBMMaxShift characters of pattern (from start_  | 
595  |  |   // to pattern_length).  | 
596  | 0  |   const size_t start = start_;  | 
597  | 0  |   const size_t length = pattern_length - start;  | 
598  |  |  | 
599  |  |   // Biased tables so that we can use pattern indices as table indices,  | 
600  |  |   // even if we only cover the part of the pattern from offset start.  | 
601  | 0  |   int *shift_table = good_suffix_shift_table_ - start_;  | 
602  | 0  |   int *suffix_table = suffix_table_ - start_;  | 
603  |  |  | 
604  |  |   // Initialize table.  | 
605  | 0  |   for (size_t i = start; i < pattern_length; i++) { | 
606  | 0  |     shift_table[i] = length;  | 
607  | 0  |   }  | 
608  | 0  |   shift_table[pattern_length] = 1;  | 
609  | 0  |   suffix_table[pattern_length] = pattern_length + 1;  | 
610  |  | 
  | 
611  | 0  |   if (pattern_length <= start) { | 
612  | 0  |     return;  | 
613  | 0  |   }  | 
614  |  |  | 
615  |  |   // Find suffixes.  | 
616  | 0  |   Char last_char = pattern_[pattern_length - 1];  | 
617  | 0  |   size_t suffix = pattern_length + 1;  | 
618  | 0  |   { | 
619  | 0  |     size_t i = pattern_length;  | 
620  | 0  |     while (i > start) { | 
621  | 0  |       Char c = pattern_[i - 1];  | 
622  | 0  |       while (suffix <= pattern_length && c != pattern_[suffix - 1]) { | 
623  | 0  |         if (static_cast<size_t>(shift_table[suffix]) == length) { | 
624  | 0  |           shift_table[suffix] = suffix - i;  | 
625  | 0  |         }  | 
626  | 0  |         suffix = suffix_table[suffix];  | 
627  | 0  |       }  | 
628  | 0  |       suffix_table[--i] = --suffix;  | 
629  | 0  |       if (suffix == pattern_length) { | 
630  |  |         // No suffix to extend, so we check against last_char only.  | 
631  | 0  |         while ((i > start) && (pattern_[i - 1] != last_char)) { | 
632  | 0  |           if (static_cast<size_t>(shift_table[pattern_length]) == length) { | 
633  | 0  |             shift_table[pattern_length] = pattern_length - i;  | 
634  | 0  |           }  | 
635  | 0  |           suffix_table[--i] = pattern_length;  | 
636  | 0  |         }  | 
637  | 0  |         if (i > start) { | 
638  | 0  |           suffix_table[--i] = --suffix;  | 
639  | 0  |         }  | 
640  | 0  |       }  | 
641  | 0  |     }  | 
642  | 0  |   }  | 
643  |  |   // Build shift table using suffixes.  | 
644  | 0  |   if (suffix < pattern_length) { | 
645  | 0  |     for (size_t i = start; i <= pattern_length; i++) { | 
646  | 0  |       if (static_cast<size_t>(shift_table[i]) == length) { | 
647  | 0  |         shift_table[i] = suffix - start;  | 
648  | 0  |       }  | 
649  | 0  |       if (i == suffix) { | 
650  | 0  |         suffix = suffix_table[suffix];  | 
651  | 0  |       }  | 
652  | 0  |     }  | 
653  | 0  |   }  | 
654  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreTable() Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreTable()  | 
655  |  |  | 
656  |  | //---------------------------------------------------------------------  | 
657  |  | // Boyer-Moore-Horspool string search.  | 
658  |  | //---------------------------------------------------------------------  | 
659  |  |  | 
660  |  | template <typename Char>  | 
661  |  | size_t StringSearch<Char>::BoyerMooreHorspoolSearch(Vector subject,  | 
662  | 0  |                                                     size_t start_index) { | 
663  | 0  |   const size_t subject_length = subject.length();  | 
664  | 0  |   const size_t pattern_length = pattern_.length();  | 
665  | 0  |   int *char_occurrences = bad_char_shift_table_;  | 
666  | 0  |   int64_t badness = -static_cast<int64_t>(pattern_length);  | 
667  |  |  | 
668  |  |   // How bad we are doing without a good-suffix table.  | 
669  | 0  |   Char last_char = pattern_[pattern_length - 1];  | 
670  | 0  |   int last_char_shift =  | 
671  | 0  |       pattern_length - 1 - CharOccurrence(char_occurrences, last_char);  | 
672  |  |  | 
673  |  |   // Perform search  | 
674  | 0  |   size_t index = start_index;  // No matches found prior to this index.  | 
675  | 0  |   while (index <= subject_length - pattern_length) { | 
676  | 0  |     size_t j = pattern_length - 1;  | 
677  | 0  |     int subject_char;  | 
678  | 0  |     while (last_char != (subject_char = subject[index + j])) { | 
679  | 0  |       int bc_occ = CharOccurrence(char_occurrences, subject_char);  | 
680  | 0  |       int shift = j - bc_occ;  | 
681  | 0  |       index += shift;  | 
682  | 0  |       badness += 1 - shift;  // at most zero, so badness cannot increase.  | 
683  | 0  |       if (index > subject_length - pattern_length) { | 
684  | 0  |         return subject_length;  | 
685  | 0  |       }  | 
686  | 0  |     }  | 
687  | 0  |     j--;  | 
688  | 0  |     while (pattern_[j] == (subject[index + j])) { | 
689  | 0  |       if (j == 0) { | 
690  | 0  |         return index;  | 
691  | 0  |       }  | 
692  | 0  |       j--;  | 
693  | 0  |     }  | 
694  | 0  |     index += last_char_shift;  | 
695  |  |     // Badness increases by the number of characters we have  | 
696  |  |     // checked, and decreases by the number of characters we  | 
697  |  |     // can skip by shifting. It's a measure of how we are doing  | 
698  |  |     // compared to reading each character exactly once.  | 
699  | 0  |     badness += (pattern_length - j) - last_char_shift;  | 
700  | 0  |     if (badness > 0) { | 
701  | 0  |       PopulateBoyerMooreTable();  | 
702  | 0  |       strategy_ = SearchStrategy::kBoyerMoore;  | 
703  | 0  |       return BoyerMooreSearch(subject, index);  | 
704  | 0  |     }  | 
705  | 0  |   }  | 
706  | 0  |   return subject.length();  | 
707  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::BoyerMooreHorspoolSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::BoyerMooreHorspoolSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
708  |  |  | 
709  |  | template <typename Char>  | 
710  | 0  | void StringSearch<Char>::PopulateBoyerMooreHorspoolTable() { | 
711  | 0  |   const size_t pattern_length = pattern_.length();  | 
712  |  | 
  | 
713  | 0  |   int *bad_char_occurrence = bad_char_shift_table_;  | 
714  |  |  | 
715  |  |   // Only preprocess at most kBMMaxShift last characters of pattern.  | 
716  | 0  |   const size_t start = start_;  | 
717  |  |   // Run forwards to populate bad_char_table, so that *last* instance  | 
718  |  |   // of character equivalence class is the one registered.  | 
719  |  |   // Notice: Doesn't include the last character.  | 
720  | 0  |   const size_t table_size = AlphabetSize();  | 
721  | 0  |   if (start == 0) { | 
722  |  |     // All patterns less than kBMMaxShift in length.  | 
723  | 0  |     memset(bad_char_occurrence, -1, table_size * sizeof(*bad_char_occurrence));  | 
724  | 0  |   } else { | 
725  | 0  |     for (size_t i = 0; i < table_size; i++) { | 
726  | 0  |       bad_char_occurrence[i] = start - 1;  | 
727  | 0  |     }  | 
728  | 0  |   }  | 
729  | 0  |   for (size_t i = start; i < pattern_length - 1; i++) { | 
730  | 0  |     Char c = pattern_[i];  | 
731  | 0  |     int bucket = (sizeof(Char) == 1) ? c : c % AlphabetSize();  | 
732  | 0  |     bad_char_occurrence[bucket] = i;  | 
733  | 0  |   }  | 
734  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::PopulateBoyerMooreHorspoolTable() Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::PopulateBoyerMooreHorspoolTable()  | 
735  |  |  | 
736  |  | //---------------------------------------------------------------------  | 
737  |  | // Linear string search with bailout to BMH.  | 
738  |  | //---------------------------------------------------------------------  | 
739  |  |  | 
740  |  | // Simple linear search for short patterns, which bails out if the string  | 
741  |  | // isn't found very early in the subject. Upgrades to BoyerMooreHorspool.  | 
742  |  | template <typename Char>  | 
743  | 0  | size_t StringSearch<Char>::InitialSearch(Vector subject, size_t index) { | 
744  | 0  |   const size_t pattern_length = pattern_.length();  | 
745  |  |   // Badness is a count of how much work we have done.  When we have  | 
746  |  |   // done enough work we decide it's probably worth switching to a better  | 
747  |  |   // algorithm.  | 
748  | 0  |   int64_t badness = -10 - (pattern_length << 2);  | 
749  |  |  | 
750  |  |   // We know our pattern is at least 2 characters, we cache the first so  | 
751  |  |   // the common case of the first character not matching is faster.  | 
752  | 0  |   for (size_t i = index, n = subject.length() - pattern_length; i <= n; i++) { | 
753  | 0  |     badness++;  | 
754  | 0  |     if (badness <= 0) { | 
755  | 0  |       i = FindFirstCharacter(pattern_, subject, i);  | 
756  | 0  |       if (i == subject.length()) return subject.length();  | 
757  | 0  |       NBYTES_ASSERT_TRUE(i <= n);  | 
758  | 0  |       size_t j = 1;  | 
759  | 0  |       do { | 
760  | 0  |         if (pattern_[j] != subject[i + j]) { | 
761  | 0  |           break;  | 
762  | 0  |         }  | 
763  | 0  |         j++;  | 
764  | 0  |       } while (j < pattern_length);  | 
765  | 0  |       if (j == pattern_length) { | 
766  | 0  |         return i;  | 
767  | 0  |       }  | 
768  | 0  |       badness += j;  | 
769  | 0  |     } else { | 
770  | 0  |       PopulateBoyerMooreHorspoolTable();  | 
771  | 0  |       strategy_ = SearchStrategy::kBoyerMooreHorspool;  | 
772  | 0  |       return BoyerMooreHorspoolSearch(subject, i);  | 
773  | 0  |     }  | 
774  | 0  |   }  | 
775  | 0  |   return subject.length();  | 
776  | 0  | } Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned short>::InitialSearch(nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: nbytes::stringsearch::StringSearch<unsigned char>::InitialSearch(nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
777  |  |  | 
778  |  | // Perform a single stand-alone search.  | 
779  |  | // If searching multiple times for the same pattern, a search  | 
780  |  | // object should be constructed once and the Search function then called  | 
781  |  | // for each search.  | 
782  |  | template <typename Char>  | 
783  |  | size_t SearchString(Vector<const Char> subject, Vector<const Char> pattern,  | 
784  | 0  |                     size_t start_index) { | 
785  | 0  |   StringSearch<Char> search(pattern);  | 
786  | 0  |   return search.Search(subject, start_index);  | 
787  | 0  | } Unexecuted instantiation: unsigned long nbytes::stringsearch::SearchString<unsigned short>(nbytes::stringsearch::Vector<unsigned short const>, nbytes::stringsearch::Vector<unsigned short const>, unsigned long) Unexecuted instantiation: unsigned long nbytes::stringsearch::SearchString<unsigned char>(nbytes::stringsearch::Vector<unsigned char const>, nbytes::stringsearch::Vector<unsigned char const>, unsigned long)  | 
788  |  | }  // namespace stringsearch  | 
789  |  |  | 
790  |  | template <typename Char>  | 
791  |  | size_t SearchString(const Char *haystack, size_t haystack_length,  | 
792  |  |                     const Char *needle, size_t needle_length,  | 
793  | 0  |                     size_t start_index, bool is_forward) { | 
794  | 0  |   if (haystack_length < needle_length) return haystack_length;  | 
795  |  |   // To do a reverse search (lastIndexOf instead of indexOf) without redundant  | 
796  |  |   // code, create two vectors that are reversed views into the input strings.  | 
797  |  |   // For example, v_needle[0] would return the *last* character of the needle.  | 
798  |  |   // So we're searching for the first instance of rev(needle) in rev(haystack)  | 
799  | 0  |   stringsearch::Vector<const Char> v_needle(needle, needle_length, is_forward);  | 
800  | 0  |   stringsearch::Vector<const Char> v_haystack(haystack, haystack_length,  | 
801  | 0  |                                               is_forward);  | 
802  | 0  |   size_t diff = haystack_length - needle_length;  | 
803  | 0  |   size_t relative_start_index;  | 
804  | 0  |   if (is_forward) { | 
805  | 0  |     relative_start_index = start_index;  | 
806  | 0  |   } else if (diff < start_index) { | 
807  | 0  |     relative_start_index = 0;  | 
808  | 0  |   } else { | 
809  | 0  |     relative_start_index = diff - start_index;  | 
810  | 0  |   }  | 
811  | 0  |   size_t pos =  | 
812  | 0  |       stringsearch::SearchString(v_haystack, v_needle, relative_start_index);  | 
813  | 0  |   if (pos == haystack_length) { | 
814  |  |     // not found  | 
815  | 0  |     return pos;  | 
816  | 0  |   }  | 
817  | 0  |   return is_forward ? pos : (haystack_length - needle_length - pos);  | 
818  | 0  | } Unexecuted instantiation: unsigned long nbytes::SearchString<unsigned short>(unsigned short const*, unsigned long, unsigned short const*, unsigned long, unsigned long, bool) Unexecuted instantiation: unsigned long nbytes::SearchString<unsigned char>(unsigned char const*, unsigned long, unsigned char const*, unsigned long, unsigned long, bool)  | 
819  |  |  | 
820  |  | template <size_t N>  | 
821  |  | size_t SearchString(const char *haystack, size_t haystack_length,  | 
822  |  |                     const char (&needle)[N]) { | 
823  |  |   return SearchString(  | 
824  |  |       reinterpret_cast<const uint8_t *>(haystack), haystack_length,  | 
825  |  |       reinterpret_cast<const uint8_t *>(needle), N - 1, 0, true);  | 
826  |  | }  | 
827  |  |  | 
828  |  | // ============================================================================  | 
829  |  | // Version metadata  | 
830  | 72  | #define NBYTES_VERSION "0.1.1"  | 
831  |  |  | 
832  |  | enum { | 
833  |  |   NBYTES_VERSION_MAJOR = 0,  | 
834  |  |   NBYTES_VERSION_MINOR = 1,  | 
835  |  |   NBYTES_VERSION_REVISION = 1,  | 
836  |  | };  | 
837  |  |  | 
838  |  | }  // namespace nbytes  | 
839  |  |  | 
840  |  | #endif  // NBYTES_H  |