/proc/self/cwd/external/re2~/re2/prog.cc
Line | Count | Source |
1 | | // Copyright 2007 The RE2 Authors. All Rights Reserved. |
2 | | // Use of this source code is governed by a BSD-style |
3 | | // license that can be found in the LICENSE file. |
4 | | |
5 | | // Compiled regular expression representation. |
6 | | // Tested by compile_test.cc |
7 | | |
8 | | #include "re2/prog.h" |
9 | | |
10 | | #include <stdint.h> |
11 | | #include <string.h> |
12 | | |
13 | | #include <algorithm> |
14 | | #include <string> |
15 | | #include <utility> |
16 | | #include <vector> |
17 | | |
18 | | #include "absl/log/absl_check.h" |
19 | | #include "absl/log/absl_log.h" |
20 | | #include "absl/strings/str_format.h" |
21 | | #include "absl/strings/string_view.h" |
22 | | #include "re2/bitmap256.h" |
23 | | #include "re2/pod_array.h" |
24 | | #include "re2/sparse_array.h" |
25 | | #include "re2/sparse_set.h" |
26 | | |
27 | | #if defined(__AVX2__) |
28 | | #include <immintrin.h> |
29 | | #ifdef _MSC_VER |
30 | | #include <intrin.h> |
31 | | #endif |
32 | | #endif |
33 | | |
34 | | namespace re2 { |
35 | | |
36 | | // Constructors per Inst opcode |
37 | | |
38 | 0 | void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) { |
39 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
40 | 0 | set_out_opcode(out, kInstAlt); |
41 | 0 | out1_ = out1; |
42 | 0 | } |
43 | | |
44 | 0 | void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) { |
45 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
46 | 0 | set_out_opcode(out, kInstByteRange); |
47 | 0 | lo_ = lo & 0xFF; |
48 | 0 | hi_ = hi & 0xFF; |
49 | 0 | hint_foldcase_ = foldcase&1; |
50 | 0 | } |
51 | | |
52 | 0 | void Prog::Inst::InitCapture(int cap, uint32_t out) { |
53 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
54 | 0 | set_out_opcode(out, kInstCapture); |
55 | 0 | cap_ = cap; |
56 | 0 | } |
57 | | |
58 | 0 | void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) { |
59 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
60 | 0 | set_out_opcode(out, kInstEmptyWidth); |
61 | 0 | empty_ = empty; |
62 | 0 | } |
63 | | |
64 | 0 | void Prog::Inst::InitMatch(int32_t id) { |
65 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
66 | 0 | set_opcode(kInstMatch); |
67 | 0 | match_id_ = id; |
68 | 0 | } |
69 | | |
70 | 0 | void Prog::Inst::InitNop(uint32_t out) { |
71 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
72 | 0 | set_opcode(kInstNop); |
73 | 0 | } |
74 | | |
75 | 0 | void Prog::Inst::InitFail() { |
76 | 0 | ABSL_DCHECK_EQ(out_opcode_, uint32_t{0}); |
77 | 0 | set_opcode(kInstFail); |
78 | 0 | } |
79 | | |
80 | 0 | std::string Prog::Inst::Dump() { |
81 | 0 | switch (opcode()) { |
82 | 0 | default: |
83 | 0 | return absl::StrFormat("opcode %d", static_cast<int>(opcode())); |
84 | | |
85 | 0 | case kInstAlt: |
86 | 0 | return absl::StrFormat("alt -> %d | %d", out(), out1_); |
87 | | |
88 | 0 | case kInstAltMatch: |
89 | 0 | return absl::StrFormat("altmatch -> %d | %d", out(), out1_); |
90 | | |
91 | 0 | case kInstByteRange: |
92 | 0 | return absl::StrFormat("byte%s [%02x-%02x] %d -> %d", |
93 | 0 | foldcase() ? "/i" : "", |
94 | 0 | lo_, hi_, hint(), out()); |
95 | | |
96 | 0 | case kInstCapture: |
97 | 0 | return absl::StrFormat("capture %d -> %d", cap_, out()); |
98 | | |
99 | 0 | case kInstEmptyWidth: |
100 | 0 | return absl::StrFormat("emptywidth %#x -> %d", |
101 | 0 | static_cast<int>(empty_), out()); |
102 | | |
103 | 0 | case kInstMatch: |
104 | 0 | return absl::StrFormat("match! %d", match_id()); |
105 | | |
106 | 0 | case kInstNop: |
107 | 0 | return absl::StrFormat("nop -> %d", out()); |
108 | | |
109 | 0 | case kInstFail: |
110 | 0 | return absl::StrFormat("fail"); |
111 | 0 | } |
112 | 0 | } |
113 | | |
114 | | Prog::Prog() |
115 | 0 | : anchor_start_(false), |
116 | 0 | anchor_end_(false), |
117 | 0 | reversed_(false), |
118 | 0 | did_flatten_(false), |
119 | 0 | did_onepass_(false), |
120 | 0 | start_(0), |
121 | 0 | start_unanchored_(0), |
122 | 0 | size_(0), |
123 | 0 | bytemap_range_(0), |
124 | 0 | prefix_foldcase_(false), |
125 | 0 | prefix_size_(0), |
126 | 0 | list_count_(0), |
127 | 0 | bit_state_text_max_size_(0), |
128 | 0 | dfa_mem_(0), |
129 | 0 | dfa_first_(NULL), |
130 | 0 | dfa_longest_(NULL) { |
131 | 0 | } |
132 | | |
133 | 0 | Prog::~Prog() { |
134 | 0 | DeleteDFA(dfa_longest_); |
135 | 0 | DeleteDFA(dfa_first_); |
136 | 0 | if (prefix_foldcase_) |
137 | 0 | delete[] prefix_dfa_; |
138 | 0 | } |
139 | | |
140 | | typedef SparseSet Workq; |
141 | | |
142 | 0 | static inline void AddToQueue(Workq* q, int id) { |
143 | 0 | if (id != 0) |
144 | 0 | q->insert(id); |
145 | 0 | } |
146 | | |
147 | 0 | static std::string ProgToString(Prog* prog, Workq* q) { |
148 | 0 | std::string s; |
149 | 0 | for (Workq::iterator i = q->begin(); i != q->end(); ++i) { |
150 | 0 | int id = *i; |
151 | 0 | Prog::Inst* ip = prog->inst(id); |
152 | 0 | s += absl::StrFormat("%d. %s\n", id, ip->Dump()); |
153 | 0 | AddToQueue(q, ip->out()); |
154 | 0 | if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch) |
155 | 0 | AddToQueue(q, ip->out1()); |
156 | 0 | } |
157 | 0 | return s; |
158 | 0 | } |
159 | | |
160 | 0 | static std::string FlattenedProgToString(Prog* prog, int start) { |
161 | 0 | std::string s; |
162 | 0 | for (int id = start; id < prog->size(); id++) { |
163 | 0 | Prog::Inst* ip = prog->inst(id); |
164 | 0 | if (ip->last()) |
165 | 0 | s += absl::StrFormat("%d. %s\n", id, ip->Dump()); |
166 | 0 | else |
167 | 0 | s += absl::StrFormat("%d+ %s\n", id, ip->Dump()); |
168 | 0 | } |
169 | 0 | return s; |
170 | 0 | } |
171 | | |
172 | 0 | std::string Prog::Dump() { |
173 | 0 | if (did_flatten_) |
174 | 0 | return FlattenedProgToString(this, start_); |
175 | | |
176 | 0 | Workq q(size_); |
177 | 0 | AddToQueue(&q, start_); |
178 | 0 | return ProgToString(this, &q); |
179 | 0 | } |
180 | | |
181 | 0 | std::string Prog::DumpUnanchored() { |
182 | 0 | if (did_flatten_) |
183 | 0 | return FlattenedProgToString(this, start_unanchored_); |
184 | | |
185 | 0 | Workq q(size_); |
186 | 0 | AddToQueue(&q, start_unanchored_); |
187 | 0 | return ProgToString(this, &q); |
188 | 0 | } |
189 | | |
190 | 0 | std::string Prog::DumpByteMap() { |
191 | 0 | std::string map; |
192 | 0 | for (int c = 0; c < 256; c++) { |
193 | 0 | int b = bytemap_[c]; |
194 | 0 | int lo = c; |
195 | 0 | while (c < 256-1 && bytemap_[c+1] == b) |
196 | 0 | c++; |
197 | 0 | int hi = c; |
198 | 0 | map += absl::StrFormat("[%02x-%02x] -> %d\n", lo, hi, b); |
199 | 0 | } |
200 | 0 | return map; |
201 | 0 | } |
202 | | |
203 | | // Is ip a guaranteed match at end of text, perhaps after some capturing? |
204 | 0 | static bool IsMatch(Prog* prog, Prog::Inst* ip) { |
205 | 0 | for (;;) { |
206 | 0 | switch (ip->opcode()) { |
207 | 0 | default: |
208 | 0 | ABSL_LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode(); |
209 | 0 | return false; |
210 | | |
211 | 0 | case kInstAlt: |
212 | 0 | case kInstAltMatch: |
213 | 0 | case kInstByteRange: |
214 | 0 | case kInstFail: |
215 | 0 | case kInstEmptyWidth: |
216 | 0 | return false; |
217 | | |
218 | 0 | case kInstCapture: |
219 | 0 | case kInstNop: |
220 | 0 | ip = prog->inst(ip->out()); |
221 | 0 | break; |
222 | | |
223 | 0 | case kInstMatch: |
224 | 0 | return true; |
225 | 0 | } |
226 | 0 | } |
227 | 0 | } |
228 | | |
229 | | // Peep-hole optimizer. |
230 | 0 | void Prog::Optimize() { |
231 | 0 | Workq q(size_); |
232 | | |
233 | | // Eliminate nops. Most are taken out during compilation |
234 | | // but a few are hard to avoid. |
235 | 0 | q.clear(); |
236 | 0 | AddToQueue(&q, start_); |
237 | 0 | for (Workq::iterator i = q.begin(); i != q.end(); ++i) { |
238 | 0 | int id = *i; |
239 | |
|
240 | 0 | Inst* ip = inst(id); |
241 | 0 | int j = ip->out(); |
242 | 0 | Inst* jp; |
243 | 0 | while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { |
244 | 0 | j = jp->out(); |
245 | 0 | } |
246 | 0 | ip->set_out(j); |
247 | 0 | AddToQueue(&q, ip->out()); |
248 | |
|
249 | 0 | if (ip->opcode() == kInstAlt) { |
250 | 0 | j = ip->out1(); |
251 | 0 | while (j != 0 && (jp=inst(j))->opcode() == kInstNop) { |
252 | 0 | j = jp->out(); |
253 | 0 | } |
254 | 0 | ip->out1_ = j; |
255 | 0 | AddToQueue(&q, ip->out1()); |
256 | 0 | } |
257 | 0 | } |
258 | | |
259 | | // Insert kInstAltMatch instructions |
260 | | // Look for |
261 | | // ip: Alt -> j | k |
262 | | // j: ByteRange [00-FF] -> ip |
263 | | // k: Match |
264 | | // or the reverse (the above is the greedy one). |
265 | | // Rewrite Alt to AltMatch. |
266 | 0 | q.clear(); |
267 | 0 | AddToQueue(&q, start_); |
268 | 0 | for (Workq::iterator i = q.begin(); i != q.end(); ++i) { |
269 | 0 | int id = *i; |
270 | 0 | Inst* ip = inst(id); |
271 | 0 | AddToQueue(&q, ip->out()); |
272 | 0 | if (ip->opcode() == kInstAlt) |
273 | 0 | AddToQueue(&q, ip->out1()); |
274 | |
|
275 | 0 | if (ip->opcode() == kInstAlt) { |
276 | 0 | Inst* j = inst(ip->out()); |
277 | 0 | Inst* k = inst(ip->out1()); |
278 | 0 | if (j->opcode() == kInstByteRange && j->out() == id && |
279 | 0 | j->lo() == 0x00 && j->hi() == 0xFF && |
280 | 0 | IsMatch(this, k)) { |
281 | 0 | ip->set_opcode(kInstAltMatch); |
282 | 0 | continue; |
283 | 0 | } |
284 | 0 | if (IsMatch(this, j) && |
285 | 0 | k->opcode() == kInstByteRange && k->out() == id && |
286 | 0 | k->lo() == 0x00 && k->hi() == 0xFF) { |
287 | 0 | ip->set_opcode(kInstAltMatch); |
288 | 0 | } |
289 | 0 | } |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | 0 | uint32_t Prog::EmptyFlags(absl::string_view text, const char* p) { |
294 | 0 | int flags = 0; |
295 | | |
296 | | // ^ and \A |
297 | 0 | if (p == text.data()) |
298 | 0 | flags |= kEmptyBeginText | kEmptyBeginLine; |
299 | 0 | else if (p[-1] == '\n') |
300 | 0 | flags |= kEmptyBeginLine; |
301 | | |
302 | | // $ and \z |
303 | 0 | if (p == text.data() + text.size()) |
304 | 0 | flags |= kEmptyEndText | kEmptyEndLine; |
305 | 0 | else if (p < text.data() + text.size() && p[0] == '\n') |
306 | 0 | flags |= kEmptyEndLine; |
307 | | |
308 | | // \b and \B |
309 | 0 | if (p == text.data() && p == text.data() + text.size()) { |
310 | | // no word boundary here |
311 | 0 | } else if (p == text.data()) { |
312 | 0 | if (IsWordChar(p[0])) |
313 | 0 | flags |= kEmptyWordBoundary; |
314 | 0 | } else if (p == text.data() + text.size()) { |
315 | 0 | if (IsWordChar(p[-1])) |
316 | 0 | flags |= kEmptyWordBoundary; |
317 | 0 | } else { |
318 | 0 | if (IsWordChar(p[-1]) != IsWordChar(p[0])) |
319 | 0 | flags |= kEmptyWordBoundary; |
320 | 0 | } |
321 | 0 | if (!(flags & kEmptyWordBoundary)) |
322 | 0 | flags |= kEmptyNonWordBoundary; |
323 | |
|
324 | 0 | return flags; |
325 | 0 | } |
326 | | |
327 | | // ByteMapBuilder implements a coloring algorithm. |
328 | | // |
329 | | // The first phase is a series of "mark and merge" batches: we mark one or more |
330 | | // [lo-hi] ranges, then merge them into our internal state. Batching is not for |
331 | | // performance; rather, it means that the ranges are treated indistinguishably. |
332 | | // |
333 | | // Internally, the ranges are represented using a bitmap that stores the splits |
334 | | // and a vector that stores the colors; both of them are indexed by the ranges' |
335 | | // last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at |
336 | | // hi (if not already split), then recolor each range in between. The color map |
337 | | // (i.e. from the old color to the new color) is maintained for the lifetime of |
338 | | // the batch and so underpins this somewhat obscure approach to set operations. |
339 | | // |
340 | | // The second phase builds the bytemap from our internal state: we recolor each |
341 | | // range, then store the new color (which is now the byte class) in each of the |
342 | | // corresponding array elements. Finally, we output the number of byte classes. |
343 | | class ByteMapBuilder { |
344 | | public: |
345 | 0 | ByteMapBuilder() { |
346 | | // Initial state: the [0-255] range has color 256. |
347 | | // This will avoid problems during the second phase, |
348 | | // in which we assign byte classes numbered from 0. |
349 | 0 | splits_.Set(255); |
350 | 0 | colors_[255] = 256; |
351 | 0 | nextcolor_ = 257; |
352 | 0 | } |
353 | | |
354 | | void Mark(int lo, int hi); |
355 | | void Merge(); |
356 | | void Build(uint8_t* bytemap, int* bytemap_range); |
357 | | |
358 | | private: |
359 | | int Recolor(int oldcolor); |
360 | | |
361 | | Bitmap256 splits_; |
362 | | int colors_[256]; |
363 | | int nextcolor_; |
364 | | std::vector<std::pair<int, int>> colormap_; |
365 | | std::vector<std::pair<int, int>> ranges_; |
366 | | |
367 | | ByteMapBuilder(const ByteMapBuilder&) = delete; |
368 | | ByteMapBuilder& operator=(const ByteMapBuilder&) = delete; |
369 | | }; |
370 | | |
371 | 0 | void ByteMapBuilder::Mark(int lo, int hi) { |
372 | 0 | ABSL_DCHECK_GE(lo, 0); |
373 | 0 | ABSL_DCHECK_GE(hi, 0); |
374 | 0 | ABSL_DCHECK_LE(lo, 255); |
375 | 0 | ABSL_DCHECK_LE(hi, 255); |
376 | 0 | ABSL_DCHECK_LE(lo, hi); |
377 | | |
378 | | // Ignore any [0-255] ranges. They cause us to recolor every range, which |
379 | | // has no effect on the eventual result and is therefore a waste of time. |
380 | 0 | if (lo == 0 && hi == 255) |
381 | 0 | return; |
382 | | |
383 | 0 | ranges_.emplace_back(lo, hi); |
384 | 0 | } |
385 | | |
386 | 0 | void ByteMapBuilder::Merge() { |
387 | 0 | for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin(); |
388 | 0 | it != ranges_.end(); |
389 | 0 | ++it) { |
390 | 0 | int lo = it->first-1; |
391 | 0 | int hi = it->second; |
392 | |
|
393 | 0 | if (0 <= lo && !splits_.Test(lo)) { |
394 | 0 | splits_.Set(lo); |
395 | 0 | int next = splits_.FindNextSetBit(lo+1); |
396 | 0 | colors_[lo] = colors_[next]; |
397 | 0 | } |
398 | 0 | if (!splits_.Test(hi)) { |
399 | 0 | splits_.Set(hi); |
400 | 0 | int next = splits_.FindNextSetBit(hi+1); |
401 | 0 | colors_[hi] = colors_[next]; |
402 | 0 | } |
403 | |
|
404 | 0 | int c = lo+1; |
405 | 0 | while (c < 256) { |
406 | 0 | int next = splits_.FindNextSetBit(c); |
407 | 0 | colors_[next] = Recolor(colors_[next]); |
408 | 0 | if (next == hi) |
409 | 0 | break; |
410 | 0 | c = next+1; |
411 | 0 | } |
412 | 0 | } |
413 | 0 | colormap_.clear(); |
414 | 0 | ranges_.clear(); |
415 | 0 | } |
416 | | |
417 | 0 | void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) { |
418 | | // Assign byte classes numbered from 0. |
419 | 0 | nextcolor_ = 0; |
420 | |
|
421 | 0 | int c = 0; |
422 | 0 | while (c < 256) { |
423 | 0 | int next = splits_.FindNextSetBit(c); |
424 | 0 | uint8_t b = static_cast<uint8_t>(Recolor(colors_[next])); |
425 | 0 | while (c <= next) { |
426 | 0 | bytemap[c] = b; |
427 | 0 | c++; |
428 | 0 | } |
429 | 0 | } |
430 | |
|
431 | 0 | *bytemap_range = nextcolor_; |
432 | 0 | } |
433 | | |
434 | 0 | int ByteMapBuilder::Recolor(int oldcolor) { |
435 | | // Yes, this is a linear search. There can be at most 256 |
436 | | // colors and there will typically be far fewer than that. |
437 | | // Also, we need to consider keys *and* values in order to |
438 | | // avoid recoloring a given range more than once per batch. |
439 | 0 | std::vector<std::pair<int, int>>::const_iterator it = |
440 | 0 | std::find_if(colormap_.begin(), colormap_.end(), |
441 | 0 | [=](const std::pair<int, int>& kv) -> bool { |
442 | 0 | return kv.first == oldcolor || kv.second == oldcolor; |
443 | 0 | }); |
444 | 0 | if (it != colormap_.end()) |
445 | 0 | return it->second; |
446 | 0 | int newcolor = nextcolor_; |
447 | 0 | nextcolor_++; |
448 | 0 | colormap_.emplace_back(oldcolor, newcolor); |
449 | 0 | return newcolor; |
450 | 0 | } |
451 | | |
452 | 0 | void Prog::ComputeByteMap() { |
453 | | // Fill in bytemap with byte classes for the program. |
454 | | // Ranges of bytes that are treated indistinguishably |
455 | | // will be mapped to a single byte class. |
456 | 0 | ByteMapBuilder builder; |
457 | | |
458 | | // Don't repeat the work for ^ and $. |
459 | 0 | bool marked_line_boundaries = false; |
460 | | // Don't repeat the work for \b and \B. |
461 | 0 | bool marked_word_boundaries = false; |
462 | |
|
463 | 0 | for (int id = 0; id < size(); id++) { |
464 | 0 | Inst* ip = inst(id); |
465 | 0 | if (ip->opcode() == kInstByteRange) { |
466 | 0 | int lo = ip->lo(); |
467 | 0 | int hi = ip->hi(); |
468 | 0 | builder.Mark(lo, hi); |
469 | 0 | if (ip->foldcase() && lo <= 'z' && hi >= 'a') { |
470 | 0 | int foldlo = lo; |
471 | 0 | int foldhi = hi; |
472 | 0 | if (foldlo < 'a') |
473 | 0 | foldlo = 'a'; |
474 | 0 | if (foldhi > 'z') |
475 | 0 | foldhi = 'z'; |
476 | 0 | if (foldlo <= foldhi) { |
477 | 0 | foldlo += 'A' - 'a'; |
478 | 0 | foldhi += 'A' - 'a'; |
479 | 0 | builder.Mark(foldlo, foldhi); |
480 | 0 | } |
481 | 0 | } |
482 | | // If this Inst is not the last Inst in its list AND the next Inst is |
483 | | // also a ByteRange AND the Insts have the same out, defer the merge. |
484 | 0 | if (!ip->last() && |
485 | 0 | inst(id+1)->opcode() == kInstByteRange && |
486 | 0 | ip->out() == inst(id+1)->out()) |
487 | 0 | continue; |
488 | 0 | builder.Merge(); |
489 | 0 | } else if (ip->opcode() == kInstEmptyWidth) { |
490 | 0 | if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) && |
491 | 0 | !marked_line_boundaries) { |
492 | 0 | builder.Mark('\n', '\n'); |
493 | 0 | builder.Merge(); |
494 | 0 | marked_line_boundaries = true; |
495 | 0 | } |
496 | 0 | if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) && |
497 | 0 | !marked_word_boundaries) { |
498 | | // We require two batches here: the first for ranges that are word |
499 | | // characters, the second for ranges that are not word characters. |
500 | 0 | for (bool isword : {true, false}) { |
501 | 0 | int j; |
502 | 0 | for (int i = 0; i < 256; i = j) { |
503 | 0 | for (j = i + 1; j < 256 && |
504 | 0 | Prog::IsWordChar(static_cast<uint8_t>(i)) == |
505 | 0 | Prog::IsWordChar(static_cast<uint8_t>(j)); |
506 | 0 | j++) |
507 | 0 | ; |
508 | 0 | if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword) |
509 | 0 | builder.Mark(i, j - 1); |
510 | 0 | } |
511 | 0 | builder.Merge(); |
512 | 0 | } |
513 | 0 | marked_word_boundaries = true; |
514 | 0 | } |
515 | 0 | } |
516 | 0 | } |
517 | |
|
518 | 0 | builder.Build(bytemap_, &bytemap_range_); |
519 | |
|
520 | 0 | if ((0)) { // For debugging, use trivial bytemap. |
521 | 0 | ABSL_LOG(ERROR) << "Using trivial bytemap."; |
522 | 0 | for (int i = 0; i < 256; i++) |
523 | 0 | bytemap_[i] = static_cast<uint8_t>(i); |
524 | 0 | bytemap_range_ = 256; |
525 | 0 | } |
526 | 0 | } |
527 | | |
528 | | // Prog::Flatten() implements a graph rewriting algorithm. |
529 | | // |
530 | | // The overall process is similar to epsilon removal, but retains some epsilon |
531 | | // transitions: those from Capture and EmptyWidth instructions; and those from |
532 | | // nullable subexpressions. (The latter avoids quadratic blowup in transitions |
533 | | // in the worst case.) It might be best thought of as Alt instruction elision. |
534 | | // |
535 | | // In conceptual terms, it divides the Prog into "trees" of instructions, then |
536 | | // traverses the "trees" in order to produce "lists" of instructions. A "tree" |
537 | | // is one or more instructions that grow from one "root" instruction to one or |
538 | | // more "leaf" instructions; if a "tree" has exactly one instruction, then the |
539 | | // "root" is also the "leaf". In most cases, a "root" is the successor of some |
540 | | // "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction) |
541 | | // and is considered a "successor root". A "leaf" can be a ByteRange, Capture, |
542 | | // EmptyWidth or Match instruction. However, this is insufficient for handling |
543 | | // nested nullable subexpressions correctly, so in some cases, a "root" is the |
544 | | // dominator of the instructions reachable from some "successor root" (i.e. it |
545 | | // has an unreachable predecessor) and is considered a "dominator root". Since |
546 | | // only Alt instructions can be "dominator roots" (other instructions would be |
547 | | // "leaves"), only Alt instructions are required to be marked as predecessors. |
548 | | // |
549 | | // Dividing the Prog into "trees" comprises two passes: marking the "successor |
550 | | // roots" and the predecessors; and marking the "dominator roots". Sorting the |
551 | | // "successor roots" by their bytecode offsets enables iteration in order from |
552 | | // greatest to least during the second pass; by working backwards in this case |
553 | | // and flooding the graph no further than "leaves" and already marked "roots", |
554 | | // it becomes possible to mark "dominator roots" without doing excessive work. |
555 | | // |
556 | | // Traversing the "trees" is just iterating over the "roots" in order of their |
557 | | // marking and flooding the graph no further than "leaves" and "roots". When a |
558 | | // "leaf" is reached, the instruction is copied with its successor remapped to |
559 | | // its "root" number. When a "root" is reached, a Nop instruction is generated |
560 | | // with its successor remapped similarly. As each "list" is produced, its last |
561 | | // instruction is marked as such. After all of the "lists" have been produced, |
562 | | // a pass over their instructions remaps their successors to bytecode offsets. |
563 | 0 | void Prog::Flatten() { |
564 | 0 | if (did_flatten_) |
565 | 0 | return; |
566 | 0 | did_flatten_ = true; |
567 | | |
568 | | // Scratch structures. It's important that these are reused by functions |
569 | | // that we call in loops because they would thrash the heap otherwise. |
570 | 0 | SparseSet reachable(size()); |
571 | 0 | std::vector<int> stk; |
572 | 0 | stk.reserve(size()); |
573 | | |
574 | | // First pass: Marks "successor roots" and predecessors. |
575 | | // Builds the mapping from inst-ids to root-ids. |
576 | 0 | SparseArray<int> rootmap(size()); |
577 | 0 | SparseArray<int> predmap(size()); |
578 | 0 | std::vector<std::vector<int>> predvec; |
579 | 0 | MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk); |
580 | | |
581 | | // Second pass: Marks "dominator roots". |
582 | 0 | SparseArray<int> sorted(rootmap); |
583 | 0 | std::sort(sorted.begin(), sorted.end(), sorted.less); |
584 | 0 | for (SparseArray<int>::const_iterator i = sorted.end() - 1; |
585 | 0 | i != sorted.begin(); |
586 | 0 | --i) { |
587 | 0 | if (i->index() != start_unanchored() && i->index() != start()) |
588 | 0 | MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk); |
589 | 0 | } |
590 | | |
591 | | // Third pass: Emits "lists". Remaps outs to root-ids. |
592 | | // Builds the mapping from root-ids to flat-ids. |
593 | 0 | std::vector<int> flatmap(rootmap.size()); |
594 | 0 | std::vector<Inst> flat; |
595 | 0 | flat.reserve(size()); |
596 | 0 | for (SparseArray<int>::const_iterator i = rootmap.begin(); |
597 | 0 | i != rootmap.end(); |
598 | 0 | ++i) { |
599 | 0 | flatmap[i->value()] = static_cast<int>(flat.size()); |
600 | 0 | EmitList(i->index(), &rootmap, &flat, &reachable, &stk); |
601 | 0 | flat.back().set_last(); |
602 | | // We have the bounds of the "list", so this is the |
603 | | // most convenient point at which to compute hints. |
604 | 0 | ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size())); |
605 | 0 | } |
606 | |
|
607 | 0 | list_count_ = static_cast<int>(flatmap.size()); |
608 | 0 | for (int i = 0; i < kNumInst; i++) |
609 | 0 | inst_count_[i] = 0; |
610 | | |
611 | | // Fourth pass: Remaps outs to flat-ids. |
612 | | // Counts instructions by opcode. |
613 | 0 | for (int id = 0; id < static_cast<int>(flat.size()); id++) { |
614 | 0 | Inst* ip = &flat[id]; |
615 | 0 | if (ip->opcode() != kInstAltMatch) // handled in EmitList() |
616 | 0 | ip->set_out(flatmap[ip->out()]); |
617 | 0 | inst_count_[ip->opcode()]++; |
618 | 0 | } |
619 | |
|
620 | | #if !defined(NDEBUG) |
621 | | // Address a `-Wunused-but-set-variable' warning from Clang 13.x. |
622 | | size_t total = 0; |
623 | | for (int i = 0; i < kNumInst; i++) |
624 | | total += inst_count_[i]; |
625 | | ABSL_CHECK_EQ(total, flat.size()); |
626 | | #endif |
627 | | |
628 | | // Remap start_unanchored and start. |
629 | 0 | if (start_unanchored() == 0) { |
630 | 0 | ABSL_DCHECK_EQ(start(), 0); |
631 | 0 | } else if (start_unanchored() == start()) { |
632 | 0 | set_start_unanchored(flatmap[1]); |
633 | 0 | set_start(flatmap[1]); |
634 | 0 | } else { |
635 | 0 | set_start_unanchored(flatmap[1]); |
636 | 0 | set_start(flatmap[2]); |
637 | 0 | } |
638 | | |
639 | | // Finally, replace the old instructions with the new instructions. |
640 | 0 | size_ = static_cast<int>(flat.size()); |
641 | 0 | inst_ = PODArray<Inst>(size_); |
642 | 0 | memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]); |
643 | | |
644 | | // Populate the list heads for BitState. |
645 | | // 512 instructions limits the memory footprint to 1KiB. |
646 | 0 | if (size_ <= 512) { |
647 | 0 | list_heads_ = PODArray<uint16_t>(size_); |
648 | | // 0xFF makes it more obvious if we try to look up a non-head. |
649 | 0 | memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]); |
650 | 0 | for (int i = 0; i < list_count_; ++i) |
651 | 0 | list_heads_[flatmap[i]] = i; |
652 | 0 | } |
653 | | |
654 | | // BitState allocates a bitmap of size list_count_ * (text.size()+1) |
655 | | // for tracking pairs of possibilities that it has already explored. |
656 | 0 | const size_t kBitStateBitmapMaxSize = 256*1024; // max size in bits |
657 | 0 | bit_state_text_max_size_ = kBitStateBitmapMaxSize / list_count_ - 1; |
658 | 0 | } |
659 | | |
660 | | void Prog::MarkSuccessors(SparseArray<int>* rootmap, |
661 | | SparseArray<int>* predmap, |
662 | | std::vector<std::vector<int>>* predvec, |
663 | 0 | SparseSet* reachable, std::vector<int>* stk) { |
664 | | // Mark the kInstFail instruction. |
665 | 0 | rootmap->set_new(0, rootmap->size()); |
666 | | |
667 | | // Mark the start_unanchored and start instructions. |
668 | 0 | if (!rootmap->has_index(start_unanchored())) |
669 | 0 | rootmap->set_new(start_unanchored(), rootmap->size()); |
670 | 0 | if (!rootmap->has_index(start())) |
671 | 0 | rootmap->set_new(start(), rootmap->size()); |
672 | |
|
673 | 0 | reachable->clear(); |
674 | 0 | stk->clear(); |
675 | 0 | stk->push_back(start_unanchored()); |
676 | 0 | while (!stk->empty()) { |
677 | 0 | int id = stk->back(); |
678 | 0 | stk->pop_back(); |
679 | 0 | Loop: |
680 | 0 | if (reachable->contains(id)) |
681 | 0 | continue; |
682 | 0 | reachable->insert_new(id); |
683 | |
|
684 | 0 | Inst* ip = inst(id); |
685 | 0 | switch (ip->opcode()) { |
686 | 0 | default: |
687 | 0 | ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); |
688 | 0 | break; |
689 | | |
690 | 0 | case kInstAltMatch: |
691 | 0 | case kInstAlt: |
692 | | // Mark this instruction as a predecessor of each out. |
693 | 0 | for (int out : {ip->out(), ip->out1()}) { |
694 | 0 | if (!predmap->has_index(out)) { |
695 | 0 | predmap->set_new(out, static_cast<int>(predvec->size())); |
696 | 0 | predvec->emplace_back(); |
697 | 0 | } |
698 | 0 | (*predvec)[predmap->get_existing(out)].emplace_back(id); |
699 | 0 | } |
700 | 0 | stk->push_back(ip->out1()); |
701 | 0 | id = ip->out(); |
702 | 0 | goto Loop; |
703 | | |
704 | 0 | case kInstByteRange: |
705 | 0 | case kInstCapture: |
706 | 0 | case kInstEmptyWidth: |
707 | | // Mark the out of this instruction as a "root". |
708 | 0 | if (!rootmap->has_index(ip->out())) |
709 | 0 | rootmap->set_new(ip->out(), rootmap->size()); |
710 | 0 | id = ip->out(); |
711 | 0 | goto Loop; |
712 | | |
713 | 0 | case kInstNop: |
714 | 0 | id = ip->out(); |
715 | 0 | goto Loop; |
716 | | |
717 | 0 | case kInstMatch: |
718 | 0 | case kInstFail: |
719 | 0 | break; |
720 | 0 | } |
721 | 0 | } |
722 | 0 | } |
723 | | |
724 | | void Prog::MarkDominator(int root, SparseArray<int>* rootmap, |
725 | | SparseArray<int>* predmap, |
726 | | std::vector<std::vector<int>>* predvec, |
727 | 0 | SparseSet* reachable, std::vector<int>* stk) { |
728 | 0 | reachable->clear(); |
729 | 0 | stk->clear(); |
730 | 0 | stk->push_back(root); |
731 | 0 | while (!stk->empty()) { |
732 | 0 | int id = stk->back(); |
733 | 0 | stk->pop_back(); |
734 | 0 | Loop: |
735 | 0 | if (reachable->contains(id)) |
736 | 0 | continue; |
737 | 0 | reachable->insert_new(id); |
738 | |
|
739 | 0 | if (id != root && rootmap->has_index(id)) { |
740 | | // We reached another "tree" via epsilon transition. |
741 | 0 | continue; |
742 | 0 | } |
743 | | |
744 | 0 | Inst* ip = inst(id); |
745 | 0 | switch (ip->opcode()) { |
746 | 0 | default: |
747 | 0 | ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); |
748 | 0 | break; |
749 | | |
750 | 0 | case kInstAltMatch: |
751 | 0 | case kInstAlt: |
752 | 0 | stk->push_back(ip->out1()); |
753 | 0 | id = ip->out(); |
754 | 0 | goto Loop; |
755 | | |
756 | 0 | case kInstByteRange: |
757 | 0 | case kInstCapture: |
758 | 0 | case kInstEmptyWidth: |
759 | 0 | break; |
760 | | |
761 | 0 | case kInstNop: |
762 | 0 | id = ip->out(); |
763 | 0 | goto Loop; |
764 | | |
765 | 0 | case kInstMatch: |
766 | 0 | case kInstFail: |
767 | 0 | break; |
768 | 0 | } |
769 | 0 | } |
770 | | |
771 | 0 | for (SparseSet::const_iterator i = reachable->begin(); |
772 | 0 | i != reachable->end(); |
773 | 0 | ++i) { |
774 | 0 | int id = *i; |
775 | 0 | if (predmap->has_index(id)) { |
776 | 0 | for (int pred : (*predvec)[predmap->get_existing(id)]) { |
777 | 0 | if (!reachable->contains(pred)) { |
778 | | // id has a predecessor that cannot be reached from root! |
779 | | // Therefore, id must be a "root" too - mark it as such. |
780 | 0 | if (!rootmap->has_index(id)) |
781 | 0 | rootmap->set_new(id, rootmap->size()); |
782 | 0 | } |
783 | 0 | } |
784 | 0 | } |
785 | 0 | } |
786 | 0 | } |
787 | | |
788 | | void Prog::EmitList(int root, SparseArray<int>* rootmap, |
789 | | std::vector<Inst>* flat, |
790 | 0 | SparseSet* reachable, std::vector<int>* stk) { |
791 | 0 | reachable->clear(); |
792 | 0 | stk->clear(); |
793 | 0 | stk->push_back(root); |
794 | 0 | while (!stk->empty()) { |
795 | 0 | int id = stk->back(); |
796 | 0 | stk->pop_back(); |
797 | 0 | Loop: |
798 | 0 | if (reachable->contains(id)) |
799 | 0 | continue; |
800 | 0 | reachable->insert_new(id); |
801 | |
|
802 | 0 | if (id != root && rootmap->has_index(id)) { |
803 | | // We reached another "tree" via epsilon transition. Emit a kInstNop |
804 | | // instruction so that the Prog does not become quadratically larger. |
805 | 0 | flat->emplace_back(); |
806 | 0 | flat->back().set_opcode(kInstNop); |
807 | 0 | flat->back().set_out(rootmap->get_existing(id)); |
808 | 0 | continue; |
809 | 0 | } |
810 | | |
811 | 0 | Inst* ip = inst(id); |
812 | 0 | switch (ip->opcode()) { |
813 | 0 | default: |
814 | 0 | ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode(); |
815 | 0 | break; |
816 | | |
817 | 0 | case kInstAltMatch: |
818 | 0 | flat->emplace_back(); |
819 | 0 | flat->back().set_opcode(kInstAltMatch); |
820 | 0 | flat->back().set_out(static_cast<int>(flat->size())); |
821 | 0 | flat->back().out1_ = static_cast<uint32_t>(flat->size())+1; |
822 | 0 | [[fallthrough]]; |
823 | |
|
824 | 0 | case kInstAlt: |
825 | 0 | stk->push_back(ip->out1()); |
826 | 0 | id = ip->out(); |
827 | 0 | goto Loop; |
828 | | |
829 | 0 | case kInstByteRange: |
830 | 0 | case kInstCapture: |
831 | 0 | case kInstEmptyWidth: |
832 | 0 | flat->emplace_back(); |
833 | 0 | memmove(&flat->back(), ip, sizeof *ip); |
834 | 0 | flat->back().set_out(rootmap->get_existing(ip->out())); |
835 | 0 | break; |
836 | | |
837 | 0 | case kInstNop: |
838 | 0 | id = ip->out(); |
839 | 0 | goto Loop; |
840 | | |
841 | 0 | case kInstMatch: |
842 | 0 | case kInstFail: |
843 | 0 | flat->emplace_back(); |
844 | 0 | memmove(&flat->back(), ip, sizeof *ip); |
845 | 0 | break; |
846 | 0 | } |
847 | 0 | } |
848 | 0 | } |
849 | | |
850 | | // For each ByteRange instruction in [begin, end), computes a hint to execution |
851 | | // engines: the delta to the next instruction (in flat) worth exploring iff the |
852 | | // current instruction matched. |
853 | | // |
854 | | // Implements a coloring algorithm related to ByteMapBuilder, but in this case, |
855 | | // colors are instructions and recoloring ranges precisely identifies conflicts |
856 | | // between instructions. Iterating backwards over [begin, end) is guaranteed to |
857 | | // identify the nearest conflict (if any) with only linear complexity. |
858 | 0 | void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) { |
859 | 0 | Bitmap256 splits; |
860 | 0 | int colors[256]; |
861 | |
|
862 | 0 | bool dirty = false; |
863 | 0 | for (int id = end; id >= begin; --id) { |
864 | 0 | if (id == end || |
865 | 0 | (*flat)[id].opcode() != kInstByteRange) { |
866 | 0 | if (dirty) { |
867 | 0 | dirty = false; |
868 | 0 | splits.Clear(); |
869 | 0 | } |
870 | 0 | splits.Set(255); |
871 | 0 | colors[255] = id; |
872 | | // At this point, the [0-255] range is colored with id. |
873 | | // Thus, hints cannot point beyond id; and if id == end, |
874 | | // hints that would have pointed to id will be 0 instead. |
875 | 0 | continue; |
876 | 0 | } |
877 | 0 | dirty = true; |
878 | | |
879 | | // We recolor the [lo-hi] range with id. Note that first ratchets backwards |
880 | | // from end to the nearest conflict (if any) during recoloring. |
881 | 0 | int first = end; |
882 | 0 | auto Recolor = [&](int lo, int hi) { |
883 | | // Like ByteMapBuilder, we split at lo-1 and at hi. |
884 | 0 | --lo; |
885 | |
|
886 | 0 | if (0 <= lo && !splits.Test(lo)) { |
887 | 0 | splits.Set(lo); |
888 | 0 | int next = splits.FindNextSetBit(lo+1); |
889 | 0 | colors[lo] = colors[next]; |
890 | 0 | } |
891 | 0 | if (!splits.Test(hi)) { |
892 | 0 | splits.Set(hi); |
893 | 0 | int next = splits.FindNextSetBit(hi+1); |
894 | 0 | colors[hi] = colors[next]; |
895 | 0 | } |
896 | |
|
897 | 0 | int c = lo+1; |
898 | 0 | while (c < 256) { |
899 | 0 | int next = splits.FindNextSetBit(c); |
900 | | // Ratchet backwards... |
901 | 0 | first = std::min(first, colors[next]); |
902 | | // Recolor with id - because it's the new nearest conflict! |
903 | 0 | colors[next] = id; |
904 | 0 | if (next == hi) |
905 | 0 | break; |
906 | 0 | c = next+1; |
907 | 0 | } |
908 | 0 | }; |
909 | |
|
910 | 0 | Inst* ip = &(*flat)[id]; |
911 | 0 | int lo = ip->lo(); |
912 | 0 | int hi = ip->hi(); |
913 | 0 | Recolor(lo, hi); |
914 | 0 | if (ip->foldcase() && lo <= 'z' && hi >= 'a') { |
915 | 0 | int foldlo = lo; |
916 | 0 | int foldhi = hi; |
917 | 0 | if (foldlo < 'a') |
918 | 0 | foldlo = 'a'; |
919 | 0 | if (foldhi > 'z') |
920 | 0 | foldhi = 'z'; |
921 | 0 | if (foldlo <= foldhi) { |
922 | 0 | foldlo += 'A' - 'a'; |
923 | 0 | foldhi += 'A' - 'a'; |
924 | 0 | Recolor(foldlo, foldhi); |
925 | 0 | } |
926 | 0 | } |
927 | |
|
928 | 0 | if (first != end) { |
929 | 0 | uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767)); |
930 | 0 | ip->hint_foldcase_ |= hint<<1; |
931 | 0 | } |
932 | 0 | } |
933 | 0 | } |
934 | | |
935 | | // The final state will always be this, which frees up a register for the hot |
936 | | // loop and thus avoids the spilling that can occur when building with Clang. |
937 | | static const size_t kShiftDFAFinal = 9; |
938 | | |
939 | | // This function takes the prefix as std::string (i.e. not const std::string& |
940 | | // as normal) because it's going to clobber it, so a temporary is convenient. |
941 | 0 | static uint64_t* BuildShiftDFA(std::string prefix) { |
942 | | // This constant is for convenience now and also for correctness later when |
943 | | // we clobber the prefix, but still need to know how long it was initially. |
944 | 0 | const size_t size = prefix.size(); |
945 | | |
946 | | // Construct the NFA. |
947 | | // The table is indexed by input byte; each element is a bitfield of states |
948 | | // reachable by the input byte. Given a bitfield of the current states, the |
949 | | // bitfield of states reachable from those is - for this specific purpose - |
950 | | // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives |
951 | | // the bitfield of the next states reached by stepping over the input byte. |
952 | | // Credits for this technique: the Hyperscan paper by Geoff Langdale et al. |
953 | 0 | uint16_t nfa[256]{}; |
954 | 0 | for (size_t i = 0; i < size; ++i) { |
955 | 0 | uint8_t b = prefix[i]; |
956 | 0 | nfa[b] |= 1 << (i+1); |
957 | 0 | } |
958 | | // This is the `\C*?` for unanchored search. |
959 | 0 | for (int b = 0; b < 256; ++b) |
960 | 0 | nfa[b] |= 1; |
961 | | |
962 | | // This maps from DFA state to NFA states; the reverse mapping is used when |
963 | | // recording transitions and gets implemented with plain old linear search. |
964 | | // The "Shift DFA" technique limits this to ten states when using uint64_t; |
965 | | // to allow for the initial state, we use at most nine bytes of the prefix. |
966 | | // That same limit is also why uint16_t is sufficient for the NFA bitfield. |
967 | 0 | uint16_t states[kShiftDFAFinal+1]{}; |
968 | 0 | states[0] = 1; |
969 | 0 | for (size_t dcurr = 0; dcurr < size; ++dcurr) { |
970 | 0 | uint8_t b = prefix[dcurr]; |
971 | 0 | uint16_t ncurr = states[dcurr]; |
972 | 0 | uint16_t nnext = nfa[b] & ((ncurr << 1) | 1); |
973 | 0 | size_t dnext = dcurr+1; |
974 | 0 | if (dnext == size) |
975 | 0 | dnext = kShiftDFAFinal; |
976 | 0 | states[dnext] = nnext; |
977 | 0 | } |
978 | | |
979 | | // Sort and unique the bytes of the prefix to avoid repeating work while we |
980 | | // record transitions. This clobbers the prefix, but it's no longer needed. |
981 | 0 | std::sort(prefix.begin(), prefix.end()); |
982 | 0 | prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end()); |
983 | | |
984 | | // Construct the DFA. |
985 | | // The table is indexed by input byte; each element is effectively a packed |
986 | | // array of uint6_t; each array value will be multiplied by six in order to |
987 | | // avoid having to do so later in the hot loop as well as masking/shifting. |
988 | | // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen. |
989 | 0 | uint64_t* dfa = new uint64_t[256]{}; |
990 | | // Record a transition from each state for each of the bytes of the prefix. |
991 | | // Note that all other input bytes go back to the initial state by default. |
992 | 0 | for (size_t dcurr = 0; dcurr < size; ++dcurr) { |
993 | 0 | for (uint8_t b : prefix) { |
994 | 0 | uint16_t ncurr = states[dcurr]; |
995 | 0 | uint16_t nnext = nfa[b] & ((ncurr << 1) | 1); |
996 | 0 | size_t dnext = 0; |
997 | 0 | while (states[dnext] != nnext) |
998 | 0 | ++dnext; |
999 | 0 | dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6); |
1000 | | // Convert ASCII letters to uppercase and record the extra transitions. |
1001 | | // Note that ASCII letters are guaranteed to be lowercase at this point |
1002 | | // because that's how the parser normalises them. #FunFact: 'k' and 's' |
1003 | | // match U+212A and U+017F, respectively, so they won't occur here when |
1004 | | // using UTF-8 encoding because the parser will emit character classes. |
1005 | 0 | if ('a' <= b && b <= 'z') { |
1006 | 0 | b -= 'a' - 'A'; |
1007 | 0 | dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6); |
1008 | 0 | } |
1009 | 0 | } |
1010 | 0 | } |
1011 | | // This lets the final state "saturate", which will matter for performance: |
1012 | | // in the hot loop, we check for a match only at the end of each iteration, |
1013 | | // so we must keep signalling the match until we get around to checking it. |
1014 | 0 | for (int b = 0; b < 256; ++b) |
1015 | 0 | dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6); |
1016 | |
|
1017 | 0 | return dfa; |
1018 | 0 | } |
1019 | | |
1020 | | void Prog::ConfigurePrefixAccel(const std::string& prefix, |
1021 | 0 | bool prefix_foldcase) { |
1022 | 0 | prefix_foldcase_ = prefix_foldcase; |
1023 | 0 | prefix_size_ = prefix.size(); |
1024 | 0 | if (prefix_foldcase_) { |
1025 | | // Use PrefixAccel_ShiftDFA(). |
1026 | | // ... and no more than nine bytes of the prefix. (See above for details.) |
1027 | 0 | prefix_size_ = std::min(prefix_size_, kShiftDFAFinal); |
1028 | 0 | prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_)); |
1029 | 0 | } else if (prefix_size_ != 1) { |
1030 | | // Use PrefixAccel_FrontAndBack(). |
1031 | 0 | prefix_front_ = prefix.front(); |
1032 | 0 | prefix_back_ = prefix.back(); |
1033 | 0 | } else { |
1034 | | // Use memchr(3). |
1035 | 0 | prefix_front_ = prefix.front(); |
1036 | 0 | } |
1037 | 0 | } |
1038 | | |
1039 | 0 | const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) { |
1040 | 0 | if (size < prefix_size_) |
1041 | 0 | return NULL; |
1042 | | |
1043 | 0 | uint64_t curr = 0; |
1044 | | |
1045 | | // At the time of writing, rough benchmarks on a Broadwell machine showed |
1046 | | // that this unroll factor (i.e. eight) achieves a speedup factor of two. |
1047 | 0 | if (size >= 8) { |
1048 | 0 | const uint8_t* p = reinterpret_cast<const uint8_t*>(data); |
1049 | 0 | const uint8_t* endp = p + (size&~7); |
1050 | 0 | do { |
1051 | 0 | uint8_t b0 = p[0]; |
1052 | 0 | uint8_t b1 = p[1]; |
1053 | 0 | uint8_t b2 = p[2]; |
1054 | 0 | uint8_t b3 = p[3]; |
1055 | 0 | uint8_t b4 = p[4]; |
1056 | 0 | uint8_t b5 = p[5]; |
1057 | 0 | uint8_t b6 = p[6]; |
1058 | 0 | uint8_t b7 = p[7]; |
1059 | |
|
1060 | 0 | uint64_t next0 = prefix_dfa_[b0]; |
1061 | 0 | uint64_t next1 = prefix_dfa_[b1]; |
1062 | 0 | uint64_t next2 = prefix_dfa_[b2]; |
1063 | 0 | uint64_t next3 = prefix_dfa_[b3]; |
1064 | 0 | uint64_t next4 = prefix_dfa_[b4]; |
1065 | 0 | uint64_t next5 = prefix_dfa_[b5]; |
1066 | 0 | uint64_t next6 = prefix_dfa_[b6]; |
1067 | 0 | uint64_t next7 = prefix_dfa_[b7]; |
1068 | |
|
1069 | 0 | uint64_t curr0 = next0 >> (curr & 63); |
1070 | 0 | uint64_t curr1 = next1 >> (curr0 & 63); |
1071 | 0 | uint64_t curr2 = next2 >> (curr1 & 63); |
1072 | 0 | uint64_t curr3 = next3 >> (curr2 & 63); |
1073 | 0 | uint64_t curr4 = next4 >> (curr3 & 63); |
1074 | 0 | uint64_t curr5 = next5 >> (curr4 & 63); |
1075 | 0 | uint64_t curr6 = next6 >> (curr5 & 63); |
1076 | 0 | uint64_t curr7 = next7 >> (curr6 & 63); |
1077 | |
|
1078 | 0 | if ((curr7 & 63) == kShiftDFAFinal * 6) { |
1079 | | // At the time of writing, using the same masking subexpressions from |
1080 | | // the preceding lines caused Clang to clutter the hot loop computing |
1081 | | // them - even though they aren't actually needed for shifting! Hence |
1082 | | // these rewritten conditions, which achieve a speedup factor of two. |
1083 | 0 | if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_; |
1084 | 0 | if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_; |
1085 | 0 | if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_; |
1086 | 0 | if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_; |
1087 | 0 | if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_; |
1088 | 0 | if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_; |
1089 | 0 | if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_; |
1090 | 0 | if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_; |
1091 | 0 | } |
1092 | | |
1093 | 0 | curr = curr7; |
1094 | 0 | p += 8; |
1095 | 0 | } while (p != endp); |
1096 | 0 | data = p; |
1097 | 0 | size = size&7; |
1098 | 0 | } |
1099 | | |
1100 | 0 | const uint8_t* p = reinterpret_cast<const uint8_t*>(data); |
1101 | 0 | const uint8_t* endp = p + size; |
1102 | 0 | while (p != endp) { |
1103 | 0 | uint8_t b = *p++; |
1104 | 0 | uint64_t next = prefix_dfa_[b]; |
1105 | 0 | curr = next >> (curr & 63); |
1106 | 0 | if ((curr & 63) == kShiftDFAFinal * 6) |
1107 | 0 | return p-prefix_size_; |
1108 | 0 | } |
1109 | 0 | return NULL; |
1110 | 0 | } |
1111 | | |
1112 | | #if defined(__AVX2__) |
1113 | | // Finds the least significant non-zero bit in n. |
1114 | | static int FindLSBSet(uint32_t n) { |
1115 | | ABSL_DCHECK_NE(n, uint32_t{0}); |
1116 | | #if defined(__GNUC__) |
1117 | | return __builtin_ctz(n); |
1118 | | #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86)) |
1119 | | unsigned long c; |
1120 | | _BitScanForward(&c, n); |
1121 | | return static_cast<int>(c); |
1122 | | #else |
1123 | | int c = 31; |
1124 | | for (int shift = 1 << 4; shift != 0; shift >>= 1) { |
1125 | | uint32_t word = n << shift; |
1126 | | if (word != 0) { |
1127 | | n = word; |
1128 | | c -= shift; |
1129 | | } |
1130 | | } |
1131 | | return c; |
1132 | | #endif |
1133 | | } |
1134 | | #endif |
1135 | | |
1136 | 0 | const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) { |
1137 | 0 | ABSL_DCHECK_GE(prefix_size_, size_t{2}); |
1138 | 0 | if (size < prefix_size_) |
1139 | 0 | return NULL; |
1140 | | // Don't bother searching the last prefix_size_-1 bytes for prefix_front_. |
1141 | | // This also means that probing for prefix_back_ doesn't go out of bounds. |
1142 | 0 | size -= prefix_size_-1; |
1143 | |
|
1144 | | #if defined(__AVX2__) |
1145 | | // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time. |
1146 | | if (size >= sizeof(__m256i)) { |
1147 | | const __m256i* fp = reinterpret_cast<const __m256i*>( |
1148 | | reinterpret_cast<const char*>(data)); |
1149 | | const __m256i* bp = reinterpret_cast<const __m256i*>( |
1150 | | reinterpret_cast<const char*>(data) + prefix_size_-1); |
1151 | | const __m256i* endfp = fp + size/sizeof(__m256i); |
1152 | | const __m256i f_set1 = _mm256_set1_epi8(prefix_front_); |
1153 | | const __m256i b_set1 = _mm256_set1_epi8(prefix_back_); |
1154 | | do { |
1155 | | const __m256i f_loadu = _mm256_loadu_si256(fp++); |
1156 | | const __m256i b_loadu = _mm256_loadu_si256(bp++); |
1157 | | const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu); |
1158 | | const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu); |
1159 | | const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq); |
1160 | | if (fb_testz == 0) { // ZF: 1 means zero, 0 means non-zero. |
1161 | | const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq); |
1162 | | const int fb_movemask = _mm256_movemask_epi8(fb_and); |
1163 | | const int fb_ctz = FindLSBSet(fb_movemask); |
1164 | | return reinterpret_cast<const char*>(fp-1) + fb_ctz; |
1165 | | } |
1166 | | } while (fp != endfp); |
1167 | | data = fp; |
1168 | | size = size%sizeof(__m256i); |
1169 | | } |
1170 | | #endif |
1171 | |
|
1172 | 0 | const char* p0 = reinterpret_cast<const char*>(data); |
1173 | 0 | for (const char* p = p0;; p++) { |
1174 | 0 | ABSL_DCHECK_GE(size, static_cast<size_t>(p-p0)); |
1175 | 0 | p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0))); |
1176 | 0 | if (p == NULL || p[prefix_size_-1] == prefix_back_) |
1177 | 0 | return p; |
1178 | 0 | } |
1179 | 0 | } |
1180 | | |
1181 | | } // namespace re2 |