Coverage Report

Created: 2026-05-27 07:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/proc/self/cwd/external/re2~/re2/prog.cc
Line
Count
Source
1
// Copyright 2007 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Compiled regular expression representation.
6
// Tested by compile_test.cc
7
8
#include "re2/prog.h"
9
10
#include <stdint.h>
11
#include <string.h>
12
13
#include <algorithm>
14
#include <string>
15
#include <utility>
16
#include <vector>
17
18
#include "absl/log/absl_check.h"
19
#include "absl/log/absl_log.h"
20
#include "absl/strings/str_format.h"
21
#include "absl/strings/string_view.h"
22
#include "re2/bitmap256.h"
23
#include "re2/pod_array.h"
24
#include "re2/sparse_array.h"
25
#include "re2/sparse_set.h"
26
27
#if defined(__AVX2__)
28
#include <immintrin.h>
29
#ifdef _MSC_VER
30
#include <intrin.h>
31
#endif
32
#endif
33
34
namespace re2 {
35
36
// Constructors per Inst opcode
37
38
0
void Prog::Inst::InitAlt(uint32_t out, uint32_t out1) {
39
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
40
0
  set_out_opcode(out, kInstAlt);
41
0
  out1_ = out1;
42
0
}
43
44
0
void Prog::Inst::InitByteRange(int lo, int hi, int foldcase, uint32_t out) {
45
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
46
0
  set_out_opcode(out, kInstByteRange);
47
0
  lo_ = lo & 0xFF;
48
0
  hi_ = hi & 0xFF;
49
0
  hint_foldcase_ = foldcase&1;
50
0
}
51
52
0
void Prog::Inst::InitCapture(int cap, uint32_t out) {
53
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
54
0
  set_out_opcode(out, kInstCapture);
55
0
  cap_ = cap;
56
0
}
57
58
0
void Prog::Inst::InitEmptyWidth(EmptyOp empty, uint32_t out) {
59
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
60
0
  set_out_opcode(out, kInstEmptyWidth);
61
0
  empty_ = empty;
62
0
}
63
64
0
void Prog::Inst::InitMatch(int32_t id) {
65
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
66
0
  set_opcode(kInstMatch);
67
0
  match_id_ = id;
68
0
}
69
70
0
void Prog::Inst::InitNop(uint32_t out) {
71
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
72
0
  set_opcode(kInstNop);
73
0
}
74
75
0
void Prog::Inst::InitFail() {
76
0
  ABSL_DCHECK_EQ(out_opcode_, uint32_t{0});
77
0
  set_opcode(kInstFail);
78
0
}
79
80
0
std::string Prog::Inst::Dump() {
81
0
  switch (opcode()) {
82
0
    default:
83
0
      return absl::StrFormat("opcode %d", static_cast<int>(opcode()));
84
85
0
    case kInstAlt:
86
0
      return absl::StrFormat("alt -> %d | %d", out(), out1_);
87
88
0
    case kInstAltMatch:
89
0
      return absl::StrFormat("altmatch -> %d | %d", out(), out1_);
90
91
0
    case kInstByteRange:
92
0
      return absl::StrFormat("byte%s [%02x-%02x] %d -> %d",
93
0
                             foldcase() ? "/i" : "",
94
0
                             lo_, hi_, hint(), out());
95
96
0
    case kInstCapture:
97
0
      return absl::StrFormat("capture %d -> %d", cap_, out());
98
99
0
    case kInstEmptyWidth:
100
0
      return absl::StrFormat("emptywidth %#x -> %d",
101
0
                             static_cast<int>(empty_), out());
102
103
0
    case kInstMatch:
104
0
      return absl::StrFormat("match! %d", match_id());
105
106
0
    case kInstNop:
107
0
      return absl::StrFormat("nop -> %d", out());
108
109
0
    case kInstFail:
110
0
      return absl::StrFormat("fail");
111
0
  }
112
0
}
113
114
Prog::Prog()
115
0
  : anchor_start_(false),
116
0
    anchor_end_(false),
117
0
    reversed_(false),
118
0
    did_flatten_(false),
119
0
    did_onepass_(false),
120
0
    start_(0),
121
0
    start_unanchored_(0),
122
0
    size_(0),
123
0
    bytemap_range_(0),
124
0
    prefix_foldcase_(false),
125
0
    prefix_size_(0),
126
0
    list_count_(0),
127
0
    bit_state_text_max_size_(0),
128
0
    dfa_mem_(0),
129
0
    dfa_first_(NULL),
130
0
    dfa_longest_(NULL) {
131
0
}
132
133
0
Prog::~Prog() {
134
0
  DeleteDFA(dfa_longest_);
135
0
  DeleteDFA(dfa_first_);
136
0
  if (prefix_foldcase_)
137
0
    delete[] prefix_dfa_;
138
0
}
139
140
typedef SparseSet Workq;
141
142
0
static inline void AddToQueue(Workq* q, int id) {
143
0
  if (id != 0)
144
0
    q->insert(id);
145
0
}
146
147
0
static std::string ProgToString(Prog* prog, Workq* q) {
148
0
  std::string s;
149
0
  for (Workq::iterator i = q->begin(); i != q->end(); ++i) {
150
0
    int id = *i;
151
0
    Prog::Inst* ip = prog->inst(id);
152
0
    s += absl::StrFormat("%d. %s\n", id, ip->Dump());
153
0
    AddToQueue(q, ip->out());
154
0
    if (ip->opcode() == kInstAlt || ip->opcode() == kInstAltMatch)
155
0
      AddToQueue(q, ip->out1());
156
0
  }
157
0
  return s;
158
0
}
159
160
0
static std::string FlattenedProgToString(Prog* prog, int start) {
161
0
  std::string s;
162
0
  for (int id = start; id < prog->size(); id++) {
163
0
    Prog::Inst* ip = prog->inst(id);
164
0
    if (ip->last())
165
0
      s += absl::StrFormat("%d. %s\n", id, ip->Dump());
166
0
    else
167
0
      s += absl::StrFormat("%d+ %s\n", id, ip->Dump());
168
0
  }
169
0
  return s;
170
0
}
171
172
0
std::string Prog::Dump() {
173
0
  if (did_flatten_)
174
0
    return FlattenedProgToString(this, start_);
175
176
0
  Workq q(size_);
177
0
  AddToQueue(&q, start_);
178
0
  return ProgToString(this, &q);
179
0
}
180
181
0
std::string Prog::DumpUnanchored() {
182
0
  if (did_flatten_)
183
0
    return FlattenedProgToString(this, start_unanchored_);
184
185
0
  Workq q(size_);
186
0
  AddToQueue(&q, start_unanchored_);
187
0
  return ProgToString(this, &q);
188
0
}
189
190
0
std::string Prog::DumpByteMap() {
191
0
  std::string map;
192
0
  for (int c = 0; c < 256; c++) {
193
0
    int b = bytemap_[c];
194
0
    int lo = c;
195
0
    while (c < 256-1 && bytemap_[c+1] == b)
196
0
      c++;
197
0
    int hi = c;
198
0
    map += absl::StrFormat("[%02x-%02x] -> %d\n", lo, hi, b);
199
0
  }
200
0
  return map;
201
0
}
202
203
// Is ip a guaranteed match at end of text, perhaps after some capturing?
204
0
static bool IsMatch(Prog* prog, Prog::Inst* ip) {
205
0
  for (;;) {
206
0
    switch (ip->opcode()) {
207
0
      default:
208
0
        ABSL_LOG(DFATAL) << "Unexpected opcode in IsMatch: " << ip->opcode();
209
0
        return false;
210
211
0
      case kInstAlt:
212
0
      case kInstAltMatch:
213
0
      case kInstByteRange:
214
0
      case kInstFail:
215
0
      case kInstEmptyWidth:
216
0
        return false;
217
218
0
      case kInstCapture:
219
0
      case kInstNop:
220
0
        ip = prog->inst(ip->out());
221
0
        break;
222
223
0
      case kInstMatch:
224
0
        return true;
225
0
    }
226
0
  }
227
0
}
228
229
// Peep-hole optimizer.
230
0
void Prog::Optimize() {
231
0
  Workq q(size_);
232
233
  // Eliminate nops.  Most are taken out during compilation
234
  // but a few are hard to avoid.
235
0
  q.clear();
236
0
  AddToQueue(&q, start_);
237
0
  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
238
0
    int id = *i;
239
240
0
    Inst* ip = inst(id);
241
0
    int j = ip->out();
242
0
    Inst* jp;
243
0
    while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
244
0
      j = jp->out();
245
0
    }
246
0
    ip->set_out(j);
247
0
    AddToQueue(&q, ip->out());
248
249
0
    if (ip->opcode() == kInstAlt) {
250
0
      j = ip->out1();
251
0
      while (j != 0 && (jp=inst(j))->opcode() == kInstNop) {
252
0
        j = jp->out();
253
0
      }
254
0
      ip->out1_ = j;
255
0
      AddToQueue(&q, ip->out1());
256
0
    }
257
0
  }
258
259
  // Insert kInstAltMatch instructions
260
  // Look for
261
  //   ip: Alt -> j | k
262
  //    j: ByteRange [00-FF] -> ip
263
  //    k: Match
264
  // or the reverse (the above is the greedy one).
265
  // Rewrite Alt to AltMatch.
266
0
  q.clear();
267
0
  AddToQueue(&q, start_);
268
0
  for (Workq::iterator i = q.begin(); i != q.end(); ++i) {
269
0
    int id = *i;
270
0
    Inst* ip = inst(id);
271
0
    AddToQueue(&q, ip->out());
272
0
    if (ip->opcode() == kInstAlt)
273
0
      AddToQueue(&q, ip->out1());
274
275
0
    if (ip->opcode() == kInstAlt) {
276
0
      Inst* j = inst(ip->out());
277
0
      Inst* k = inst(ip->out1());
278
0
      if (j->opcode() == kInstByteRange && j->out() == id &&
279
0
          j->lo() == 0x00 && j->hi() == 0xFF &&
280
0
          IsMatch(this, k)) {
281
0
        ip->set_opcode(kInstAltMatch);
282
0
        continue;
283
0
      }
284
0
      if (IsMatch(this, j) &&
285
0
          k->opcode() == kInstByteRange && k->out() == id &&
286
0
          k->lo() == 0x00 && k->hi() == 0xFF) {
287
0
        ip->set_opcode(kInstAltMatch);
288
0
      }
289
0
    }
290
0
  }
291
0
}
292
293
0
uint32_t Prog::EmptyFlags(absl::string_view text, const char* p) {
294
0
  int flags = 0;
295
296
  // ^ and \A
297
0
  if (p == text.data())
298
0
    flags |= kEmptyBeginText | kEmptyBeginLine;
299
0
  else if (p[-1] == '\n')
300
0
    flags |= kEmptyBeginLine;
301
302
  // $ and \z
303
0
  if (p == text.data() + text.size())
304
0
    flags |= kEmptyEndText | kEmptyEndLine;
305
0
  else if (p < text.data() + text.size() && p[0] == '\n')
306
0
    flags |= kEmptyEndLine;
307
308
  // \b and \B
309
0
  if (p == text.data() && p == text.data() + text.size()) {
310
    // no word boundary here
311
0
  } else if (p == text.data()) {
312
0
    if (IsWordChar(p[0]))
313
0
      flags |= kEmptyWordBoundary;
314
0
  } else if (p == text.data() + text.size()) {
315
0
    if (IsWordChar(p[-1]))
316
0
      flags |= kEmptyWordBoundary;
317
0
  } else {
318
0
    if (IsWordChar(p[-1]) != IsWordChar(p[0]))
319
0
      flags |= kEmptyWordBoundary;
320
0
  }
321
0
  if (!(flags & kEmptyWordBoundary))
322
0
    flags |= kEmptyNonWordBoundary;
323
324
0
  return flags;
325
0
}
326
327
// ByteMapBuilder implements a coloring algorithm.
328
//
329
// The first phase is a series of "mark and merge" batches: we mark one or more
330
// [lo-hi] ranges, then merge them into our internal state. Batching is not for
331
// performance; rather, it means that the ranges are treated indistinguishably.
332
//
333
// Internally, the ranges are represented using a bitmap that stores the splits
334
// and a vector that stores the colors; both of them are indexed by the ranges'
335
// last bytes. Thus, in order to merge a [lo-hi] range, we split at lo-1 and at
336
// hi (if not already split), then recolor each range in between. The color map
337
// (i.e. from the old color to the new color) is maintained for the lifetime of
338
// the batch and so underpins this somewhat obscure approach to set operations.
339
//
340
// The second phase builds the bytemap from our internal state: we recolor each
341
// range, then store the new color (which is now the byte class) in each of the
342
// corresponding array elements. Finally, we output the number of byte classes.
343
class ByteMapBuilder {
344
 public:
345
0
  ByteMapBuilder() {
346
    // Initial state: the [0-255] range has color 256.
347
    // This will avoid problems during the second phase,
348
    // in which we assign byte classes numbered from 0.
349
0
    splits_.Set(255);
350
0
    colors_[255] = 256;
351
0
    nextcolor_ = 257;
352
0
  }
353
354
  void Mark(int lo, int hi);
355
  void Merge();
356
  void Build(uint8_t* bytemap, int* bytemap_range);
357
358
 private:
359
  int Recolor(int oldcolor);
360
361
  Bitmap256 splits_;
362
  int colors_[256];
363
  int nextcolor_;
364
  std::vector<std::pair<int, int>> colormap_;
365
  std::vector<std::pair<int, int>> ranges_;
366
367
  ByteMapBuilder(const ByteMapBuilder&) = delete;
368
  ByteMapBuilder& operator=(const ByteMapBuilder&) = delete;
369
};
370
371
0
void ByteMapBuilder::Mark(int lo, int hi) {
372
0
  ABSL_DCHECK_GE(lo, 0);
373
0
  ABSL_DCHECK_GE(hi, 0);
374
0
  ABSL_DCHECK_LE(lo, 255);
375
0
  ABSL_DCHECK_LE(hi, 255);
376
0
  ABSL_DCHECK_LE(lo, hi);
377
378
  // Ignore any [0-255] ranges. They cause us to recolor every range, which
379
  // has no effect on the eventual result and is therefore a waste of time.
380
0
  if (lo == 0 && hi == 255)
381
0
    return;
382
383
0
  ranges_.emplace_back(lo, hi);
384
0
}
385
386
0
void ByteMapBuilder::Merge() {
387
0
  for (std::vector<std::pair<int, int>>::const_iterator it = ranges_.begin();
388
0
       it != ranges_.end();
389
0
       ++it) {
390
0
    int lo = it->first-1;
391
0
    int hi = it->second;
392
393
0
    if (0 <= lo && !splits_.Test(lo)) {
394
0
      splits_.Set(lo);
395
0
      int next = splits_.FindNextSetBit(lo+1);
396
0
      colors_[lo] = colors_[next];
397
0
    }
398
0
    if (!splits_.Test(hi)) {
399
0
      splits_.Set(hi);
400
0
      int next = splits_.FindNextSetBit(hi+1);
401
0
      colors_[hi] = colors_[next];
402
0
    }
403
404
0
    int c = lo+1;
405
0
    while (c < 256) {
406
0
      int next = splits_.FindNextSetBit(c);
407
0
      colors_[next] = Recolor(colors_[next]);
408
0
      if (next == hi)
409
0
        break;
410
0
      c = next+1;
411
0
    }
412
0
  }
413
0
  colormap_.clear();
414
0
  ranges_.clear();
415
0
}
416
417
0
void ByteMapBuilder::Build(uint8_t* bytemap, int* bytemap_range) {
418
  // Assign byte classes numbered from 0.
419
0
  nextcolor_ = 0;
420
421
0
  int c = 0;
422
0
  while (c < 256) {
423
0
    int next = splits_.FindNextSetBit(c);
424
0
    uint8_t b = static_cast<uint8_t>(Recolor(colors_[next]));
425
0
    while (c <= next) {
426
0
      bytemap[c] = b;
427
0
      c++;
428
0
    }
429
0
  }
430
431
0
  *bytemap_range = nextcolor_;
432
0
}
433
434
0
int ByteMapBuilder::Recolor(int oldcolor) {
435
  // Yes, this is a linear search. There can be at most 256
436
  // colors and there will typically be far fewer than that.
437
  // Also, we need to consider keys *and* values in order to
438
  // avoid recoloring a given range more than once per batch.
439
0
  std::vector<std::pair<int, int>>::const_iterator it =
440
0
      std::find_if(colormap_.begin(), colormap_.end(),
441
0
                   [=](const std::pair<int, int>& kv) -> bool {
442
0
                     return kv.first == oldcolor || kv.second == oldcolor;
443
0
                   });
444
0
  if (it != colormap_.end())
445
0
    return it->second;
446
0
  int newcolor = nextcolor_;
447
0
  nextcolor_++;
448
0
  colormap_.emplace_back(oldcolor, newcolor);
449
0
  return newcolor;
450
0
}
451
452
0
void Prog::ComputeByteMap() {
453
  // Fill in bytemap with byte classes for the program.
454
  // Ranges of bytes that are treated indistinguishably
455
  // will be mapped to a single byte class.
456
0
  ByteMapBuilder builder;
457
458
  // Don't repeat the work for ^ and $.
459
0
  bool marked_line_boundaries = false;
460
  // Don't repeat the work for \b and \B.
461
0
  bool marked_word_boundaries = false;
462
463
0
  for (int id = 0; id < size(); id++) {
464
0
    Inst* ip = inst(id);
465
0
    if (ip->opcode() == kInstByteRange) {
466
0
      int lo = ip->lo();
467
0
      int hi = ip->hi();
468
0
      builder.Mark(lo, hi);
469
0
      if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
470
0
        int foldlo = lo;
471
0
        int foldhi = hi;
472
0
        if (foldlo < 'a')
473
0
          foldlo = 'a';
474
0
        if (foldhi > 'z')
475
0
          foldhi = 'z';
476
0
        if (foldlo <= foldhi) {
477
0
          foldlo += 'A' - 'a';
478
0
          foldhi += 'A' - 'a';
479
0
          builder.Mark(foldlo, foldhi);
480
0
        }
481
0
      }
482
      // If this Inst is not the last Inst in its list AND the next Inst is
483
      // also a ByteRange AND the Insts have the same out, defer the merge.
484
0
      if (!ip->last() &&
485
0
          inst(id+1)->opcode() == kInstByteRange &&
486
0
          ip->out() == inst(id+1)->out())
487
0
        continue;
488
0
      builder.Merge();
489
0
    } else if (ip->opcode() == kInstEmptyWidth) {
490
0
      if (ip->empty() & (kEmptyBeginLine|kEmptyEndLine) &&
491
0
          !marked_line_boundaries) {
492
0
        builder.Mark('\n', '\n');
493
0
        builder.Merge();
494
0
        marked_line_boundaries = true;
495
0
      }
496
0
      if (ip->empty() & (kEmptyWordBoundary|kEmptyNonWordBoundary) &&
497
0
          !marked_word_boundaries) {
498
        // We require two batches here: the first for ranges that are word
499
        // characters, the second for ranges that are not word characters.
500
0
        for (bool isword : {true, false}) {
501
0
          int j;
502
0
          for (int i = 0; i < 256; i = j) {
503
0
            for (j = i + 1; j < 256 &&
504
0
                            Prog::IsWordChar(static_cast<uint8_t>(i)) ==
505
0
                                Prog::IsWordChar(static_cast<uint8_t>(j));
506
0
                 j++)
507
0
              ;
508
0
            if (Prog::IsWordChar(static_cast<uint8_t>(i)) == isword)
509
0
              builder.Mark(i, j - 1);
510
0
          }
511
0
          builder.Merge();
512
0
        }
513
0
        marked_word_boundaries = true;
514
0
      }
515
0
    }
516
0
  }
517
518
0
  builder.Build(bytemap_, &bytemap_range_);
519
520
0
  if ((0)) {  // For debugging, use trivial bytemap.
521
0
    ABSL_LOG(ERROR) << "Using trivial bytemap.";
522
0
    for (int i = 0; i < 256; i++)
523
0
      bytemap_[i] = static_cast<uint8_t>(i);
524
0
    bytemap_range_ = 256;
525
0
  }
526
0
}
527
528
// Prog::Flatten() implements a graph rewriting algorithm.
529
//
530
// The overall process is similar to epsilon removal, but retains some epsilon
531
// transitions: those from Capture and EmptyWidth instructions; and those from
532
// nullable subexpressions. (The latter avoids quadratic blowup in transitions
533
// in the worst case.) It might be best thought of as Alt instruction elision.
534
//
535
// In conceptual terms, it divides the Prog into "trees" of instructions, then
536
// traverses the "trees" in order to produce "lists" of instructions. A "tree"
537
// is one or more instructions that grow from one "root" instruction to one or
538
// more "leaf" instructions; if a "tree" has exactly one instruction, then the
539
// "root" is also the "leaf". In most cases, a "root" is the successor of some
540
// "leaf" (i.e. the "leaf" instruction's out() returns the "root" instruction)
541
// and is considered a "successor root". A "leaf" can be a ByteRange, Capture,
542
// EmptyWidth or Match instruction. However, this is insufficient for handling
543
// nested nullable subexpressions correctly, so in some cases, a "root" is the
544
// dominator of the instructions reachable from some "successor root" (i.e. it
545
// has an unreachable predecessor) and is considered a "dominator root". Since
546
// only Alt instructions can be "dominator roots" (other instructions would be
547
// "leaves"), only Alt instructions are required to be marked as predecessors.
548
//
549
// Dividing the Prog into "trees" comprises two passes: marking the "successor
550
// roots" and the predecessors; and marking the "dominator roots". Sorting the
551
// "successor roots" by their bytecode offsets enables iteration in order from
552
// greatest to least during the second pass; by working backwards in this case
553
// and flooding the graph no further than "leaves" and already marked "roots",
554
// it becomes possible to mark "dominator roots" without doing excessive work.
555
//
556
// Traversing the "trees" is just iterating over the "roots" in order of their
557
// marking and flooding the graph no further than "leaves" and "roots". When a
558
// "leaf" is reached, the instruction is copied with its successor remapped to
559
// its "root" number. When a "root" is reached, a Nop instruction is generated
560
// with its successor remapped similarly. As each "list" is produced, its last
561
// instruction is marked as such. After all of the "lists" have been produced,
562
// a pass over their instructions remaps their successors to bytecode offsets.
563
0
void Prog::Flatten() {
564
0
  if (did_flatten_)
565
0
    return;
566
0
  did_flatten_ = true;
567
568
  // Scratch structures. It's important that these are reused by functions
569
  // that we call in loops because they would thrash the heap otherwise.
570
0
  SparseSet reachable(size());
571
0
  std::vector<int> stk;
572
0
  stk.reserve(size());
573
574
  // First pass: Marks "successor roots" and predecessors.
575
  // Builds the mapping from inst-ids to root-ids.
576
0
  SparseArray<int> rootmap(size());
577
0
  SparseArray<int> predmap(size());
578
0
  std::vector<std::vector<int>> predvec;
579
0
  MarkSuccessors(&rootmap, &predmap, &predvec, &reachable, &stk);
580
581
  // Second pass: Marks "dominator roots".
582
0
  SparseArray<int> sorted(rootmap);
583
0
  std::sort(sorted.begin(), sorted.end(), sorted.less);
584
0
  for (SparseArray<int>::const_iterator i = sorted.end() - 1;
585
0
       i != sorted.begin();
586
0
       --i) {
587
0
    if (i->index() != start_unanchored() && i->index() != start())
588
0
      MarkDominator(i->index(), &rootmap, &predmap, &predvec, &reachable, &stk);
589
0
  }
590
591
  // Third pass: Emits "lists". Remaps outs to root-ids.
592
  // Builds the mapping from root-ids to flat-ids.
593
0
  std::vector<int> flatmap(rootmap.size());
594
0
  std::vector<Inst> flat;
595
0
  flat.reserve(size());
596
0
  for (SparseArray<int>::const_iterator i = rootmap.begin();
597
0
       i != rootmap.end();
598
0
       ++i) {
599
0
    flatmap[i->value()] = static_cast<int>(flat.size());
600
0
    EmitList(i->index(), &rootmap, &flat, &reachable, &stk);
601
0
    flat.back().set_last();
602
    // We have the bounds of the "list", so this is the
603
    // most convenient point at which to compute hints.
604
0
    ComputeHints(&flat, flatmap[i->value()], static_cast<int>(flat.size()));
605
0
  }
606
607
0
  list_count_ = static_cast<int>(flatmap.size());
608
0
  for (int i = 0; i < kNumInst; i++)
609
0
    inst_count_[i] = 0;
610
611
  // Fourth pass: Remaps outs to flat-ids.
612
  // Counts instructions by opcode.
613
0
  for (int id = 0; id < static_cast<int>(flat.size()); id++) {
614
0
    Inst* ip = &flat[id];
615
0
    if (ip->opcode() != kInstAltMatch)  // handled in EmitList()
616
0
      ip->set_out(flatmap[ip->out()]);
617
0
    inst_count_[ip->opcode()]++;
618
0
  }
619
620
#if !defined(NDEBUG)
621
  // Address a `-Wunused-but-set-variable' warning from Clang 13.x.
622
  size_t total = 0;
623
  for (int i = 0; i < kNumInst; i++)
624
    total += inst_count_[i];
625
  ABSL_CHECK_EQ(total, flat.size());
626
#endif
627
628
  // Remap start_unanchored and start.
629
0
  if (start_unanchored() == 0) {
630
0
    ABSL_DCHECK_EQ(start(), 0);
631
0
  } else if (start_unanchored() == start()) {
632
0
    set_start_unanchored(flatmap[1]);
633
0
    set_start(flatmap[1]);
634
0
  } else {
635
0
    set_start_unanchored(flatmap[1]);
636
0
    set_start(flatmap[2]);
637
0
  }
638
639
  // Finally, replace the old instructions with the new instructions.
640
0
  size_ = static_cast<int>(flat.size());
641
0
  inst_ = PODArray<Inst>(size_);
642
0
  memmove(inst_.data(), flat.data(), size_*sizeof inst_[0]);
643
644
  // Populate the list heads for BitState.
645
  // 512 instructions limits the memory footprint to 1KiB.
646
0
  if (size_ <= 512) {
647
0
    list_heads_ = PODArray<uint16_t>(size_);
648
    // 0xFF makes it more obvious if we try to look up a non-head.
649
0
    memset(list_heads_.data(), 0xFF, size_*sizeof list_heads_[0]);
650
0
    for (int i = 0; i < list_count_; ++i)
651
0
      list_heads_[flatmap[i]] = i;
652
0
  }
653
654
  // BitState allocates a bitmap of size list_count_ * (text.size()+1)
655
  // for tracking pairs of possibilities that it has already explored.
656
0
  const size_t kBitStateBitmapMaxSize = 256*1024;  // max size in bits
657
0
  bit_state_text_max_size_ = kBitStateBitmapMaxSize / list_count_ - 1;
658
0
}
659
660
void Prog::MarkSuccessors(SparseArray<int>* rootmap,
661
                          SparseArray<int>* predmap,
662
                          std::vector<std::vector<int>>* predvec,
663
0
                          SparseSet* reachable, std::vector<int>* stk) {
664
  // Mark the kInstFail instruction.
665
0
  rootmap->set_new(0, rootmap->size());
666
667
  // Mark the start_unanchored and start instructions.
668
0
  if (!rootmap->has_index(start_unanchored()))
669
0
    rootmap->set_new(start_unanchored(), rootmap->size());
670
0
  if (!rootmap->has_index(start()))
671
0
    rootmap->set_new(start(), rootmap->size());
672
673
0
  reachable->clear();
674
0
  stk->clear();
675
0
  stk->push_back(start_unanchored());
676
0
  while (!stk->empty()) {
677
0
    int id = stk->back();
678
0
    stk->pop_back();
679
0
  Loop:
680
0
    if (reachable->contains(id))
681
0
      continue;
682
0
    reachable->insert_new(id);
683
684
0
    Inst* ip = inst(id);
685
0
    switch (ip->opcode()) {
686
0
      default:
687
0
        ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
688
0
        break;
689
690
0
      case kInstAltMatch:
691
0
      case kInstAlt:
692
        // Mark this instruction as a predecessor of each out.
693
0
        for (int out : {ip->out(), ip->out1()}) {
694
0
          if (!predmap->has_index(out)) {
695
0
            predmap->set_new(out, static_cast<int>(predvec->size()));
696
0
            predvec->emplace_back();
697
0
          }
698
0
          (*predvec)[predmap->get_existing(out)].emplace_back(id);
699
0
        }
700
0
        stk->push_back(ip->out1());
701
0
        id = ip->out();
702
0
        goto Loop;
703
704
0
      case kInstByteRange:
705
0
      case kInstCapture:
706
0
      case kInstEmptyWidth:
707
        // Mark the out of this instruction as a "root".
708
0
        if (!rootmap->has_index(ip->out()))
709
0
          rootmap->set_new(ip->out(), rootmap->size());
710
0
        id = ip->out();
711
0
        goto Loop;
712
713
0
      case kInstNop:
714
0
        id = ip->out();
715
0
        goto Loop;
716
717
0
      case kInstMatch:
718
0
      case kInstFail:
719
0
        break;
720
0
    }
721
0
  }
722
0
}
723
724
void Prog::MarkDominator(int root, SparseArray<int>* rootmap,
725
                         SparseArray<int>* predmap,
726
                         std::vector<std::vector<int>>* predvec,
727
0
                         SparseSet* reachable, std::vector<int>* stk) {
728
0
  reachable->clear();
729
0
  stk->clear();
730
0
  stk->push_back(root);
731
0
  while (!stk->empty()) {
732
0
    int id = stk->back();
733
0
    stk->pop_back();
734
0
  Loop:
735
0
    if (reachable->contains(id))
736
0
      continue;
737
0
    reachable->insert_new(id);
738
739
0
    if (id != root && rootmap->has_index(id)) {
740
      // We reached another "tree" via epsilon transition.
741
0
      continue;
742
0
    }
743
744
0
    Inst* ip = inst(id);
745
0
    switch (ip->opcode()) {
746
0
      default:
747
0
        ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
748
0
        break;
749
750
0
      case kInstAltMatch:
751
0
      case kInstAlt:
752
0
        stk->push_back(ip->out1());
753
0
        id = ip->out();
754
0
        goto Loop;
755
756
0
      case kInstByteRange:
757
0
      case kInstCapture:
758
0
      case kInstEmptyWidth:
759
0
        break;
760
761
0
      case kInstNop:
762
0
        id = ip->out();
763
0
        goto Loop;
764
765
0
      case kInstMatch:
766
0
      case kInstFail:
767
0
        break;
768
0
    }
769
0
  }
770
771
0
  for (SparseSet::const_iterator i = reachable->begin();
772
0
       i != reachable->end();
773
0
       ++i) {
774
0
    int id = *i;
775
0
    if (predmap->has_index(id)) {
776
0
      for (int pred : (*predvec)[predmap->get_existing(id)]) {
777
0
        if (!reachable->contains(pred)) {
778
          // id has a predecessor that cannot be reached from root!
779
          // Therefore, id must be a "root" too - mark it as such.
780
0
          if (!rootmap->has_index(id))
781
0
            rootmap->set_new(id, rootmap->size());
782
0
        }
783
0
      }
784
0
    }
785
0
  }
786
0
}
787
788
void Prog::EmitList(int root, SparseArray<int>* rootmap,
789
                    std::vector<Inst>* flat,
790
0
                    SparseSet* reachable, std::vector<int>* stk) {
791
0
  reachable->clear();
792
0
  stk->clear();
793
0
  stk->push_back(root);
794
0
  while (!stk->empty()) {
795
0
    int id = stk->back();
796
0
    stk->pop_back();
797
0
  Loop:
798
0
    if (reachable->contains(id))
799
0
      continue;
800
0
    reachable->insert_new(id);
801
802
0
    if (id != root && rootmap->has_index(id)) {
803
      // We reached another "tree" via epsilon transition. Emit a kInstNop
804
      // instruction so that the Prog does not become quadratically larger.
805
0
      flat->emplace_back();
806
0
      flat->back().set_opcode(kInstNop);
807
0
      flat->back().set_out(rootmap->get_existing(id));
808
0
      continue;
809
0
    }
810
811
0
    Inst* ip = inst(id);
812
0
    switch (ip->opcode()) {
813
0
      default:
814
0
        ABSL_LOG(DFATAL) << "unhandled opcode: " << ip->opcode();
815
0
        break;
816
817
0
      case kInstAltMatch:
818
0
        flat->emplace_back();
819
0
        flat->back().set_opcode(kInstAltMatch);
820
0
        flat->back().set_out(static_cast<int>(flat->size()));
821
0
        flat->back().out1_ = static_cast<uint32_t>(flat->size())+1;
822
0
        [[fallthrough]];
823
824
0
      case kInstAlt:
825
0
        stk->push_back(ip->out1());
826
0
        id = ip->out();
827
0
        goto Loop;
828
829
0
      case kInstByteRange:
830
0
      case kInstCapture:
831
0
      case kInstEmptyWidth:
832
0
        flat->emplace_back();
833
0
        memmove(&flat->back(), ip, sizeof *ip);
834
0
        flat->back().set_out(rootmap->get_existing(ip->out()));
835
0
        break;
836
837
0
      case kInstNop:
838
0
        id = ip->out();
839
0
        goto Loop;
840
841
0
      case kInstMatch:
842
0
      case kInstFail:
843
0
        flat->emplace_back();
844
0
        memmove(&flat->back(), ip, sizeof *ip);
845
0
        break;
846
0
    }
847
0
  }
848
0
}
849
850
// For each ByteRange instruction in [begin, end), computes a hint to execution
851
// engines: the delta to the next instruction (in flat) worth exploring iff the
852
// current instruction matched.
853
//
854
// Implements a coloring algorithm related to ByteMapBuilder, but in this case,
855
// colors are instructions and recoloring ranges precisely identifies conflicts
856
// between instructions. Iterating backwards over [begin, end) is guaranteed to
857
// identify the nearest conflict (if any) with only linear complexity.
858
0
void Prog::ComputeHints(std::vector<Inst>* flat, int begin, int end) {
859
0
  Bitmap256 splits;
860
0
  int colors[256];
861
862
0
  bool dirty = false;
863
0
  for (int id = end; id >= begin; --id) {
864
0
    if (id == end ||
865
0
        (*flat)[id].opcode() != kInstByteRange) {
866
0
      if (dirty) {
867
0
        dirty = false;
868
0
        splits.Clear();
869
0
      }
870
0
      splits.Set(255);
871
0
      colors[255] = id;
872
      // At this point, the [0-255] range is colored with id.
873
      // Thus, hints cannot point beyond id; and if id == end,
874
      // hints that would have pointed to id will be 0 instead.
875
0
      continue;
876
0
    }
877
0
    dirty = true;
878
879
    // We recolor the [lo-hi] range with id. Note that first ratchets backwards
880
    // from end to the nearest conflict (if any) during recoloring.
881
0
    int first = end;
882
0
    auto Recolor = [&](int lo, int hi) {
883
      // Like ByteMapBuilder, we split at lo-1 and at hi.
884
0
      --lo;
885
886
0
      if (0 <= lo && !splits.Test(lo)) {
887
0
        splits.Set(lo);
888
0
        int next = splits.FindNextSetBit(lo+1);
889
0
        colors[lo] = colors[next];
890
0
      }
891
0
      if (!splits.Test(hi)) {
892
0
        splits.Set(hi);
893
0
        int next = splits.FindNextSetBit(hi+1);
894
0
        colors[hi] = colors[next];
895
0
      }
896
897
0
      int c = lo+1;
898
0
      while (c < 256) {
899
0
        int next = splits.FindNextSetBit(c);
900
        // Ratchet backwards...
901
0
        first = std::min(first, colors[next]);
902
        // Recolor with id - because it's the new nearest conflict!
903
0
        colors[next] = id;
904
0
        if (next == hi)
905
0
          break;
906
0
        c = next+1;
907
0
      }
908
0
    };
909
910
0
    Inst* ip = &(*flat)[id];
911
0
    int lo = ip->lo();
912
0
    int hi = ip->hi();
913
0
    Recolor(lo, hi);
914
0
    if (ip->foldcase() && lo <= 'z' && hi >= 'a') {
915
0
      int foldlo = lo;
916
0
      int foldhi = hi;
917
0
      if (foldlo < 'a')
918
0
        foldlo = 'a';
919
0
      if (foldhi > 'z')
920
0
        foldhi = 'z';
921
0
      if (foldlo <= foldhi) {
922
0
        foldlo += 'A' - 'a';
923
0
        foldhi += 'A' - 'a';
924
0
        Recolor(foldlo, foldhi);
925
0
      }
926
0
    }
927
928
0
    if (first != end) {
929
0
      uint16_t hint = static_cast<uint16_t>(std::min(first - id, 32767));
930
0
      ip->hint_foldcase_ |= hint<<1;
931
0
    }
932
0
  }
933
0
}
934
935
// The final state will always be this, which frees up a register for the hot
936
// loop and thus avoids the spilling that can occur when building with Clang.
937
static const size_t kShiftDFAFinal = 9;
938
939
// This function takes the prefix as std::string (i.e. not const std::string&
940
// as normal) because it's going to clobber it, so a temporary is convenient.
941
0
static uint64_t* BuildShiftDFA(std::string prefix) {
942
  // This constant is for convenience now and also for correctness later when
943
  // we clobber the prefix, but still need to know how long it was initially.
944
0
  const size_t size = prefix.size();
945
946
  // Construct the NFA.
947
  // The table is indexed by input byte; each element is a bitfield of states
948
  // reachable by the input byte. Given a bitfield of the current states, the
949
  // bitfield of states reachable from those is - for this specific purpose -
950
  // always ((ncurr << 1) | 1). Intersecting the reachability bitfields gives
951
  // the bitfield of the next states reached by stepping over the input byte.
952
  // Credits for this technique: the Hyperscan paper by Geoff Langdale et al.
953
0
  uint16_t nfa[256]{};
954
0
  for (size_t i = 0; i < size; ++i) {
955
0
    uint8_t b = prefix[i];
956
0
    nfa[b] |= 1 << (i+1);
957
0
  }
958
  // This is the `\C*?` for unanchored search.
959
0
  for (int b = 0; b < 256; ++b)
960
0
    nfa[b] |= 1;
961
962
  // This maps from DFA state to NFA states; the reverse mapping is used when
963
  // recording transitions and gets implemented with plain old linear search.
964
  // The "Shift DFA" technique limits this to ten states when using uint64_t;
965
  // to allow for the initial state, we use at most nine bytes of the prefix.
966
  // That same limit is also why uint16_t is sufficient for the NFA bitfield.
967
0
  uint16_t states[kShiftDFAFinal+1]{};
968
0
  states[0] = 1;
969
0
  for (size_t dcurr = 0; dcurr < size; ++dcurr) {
970
0
    uint8_t b = prefix[dcurr];
971
0
    uint16_t ncurr = states[dcurr];
972
0
    uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
973
0
    size_t dnext = dcurr+1;
974
0
    if (dnext == size)
975
0
      dnext = kShiftDFAFinal;
976
0
    states[dnext] = nnext;
977
0
  }
978
979
  // Sort and unique the bytes of the prefix to avoid repeating work while we
980
  // record transitions. This clobbers the prefix, but it's no longer needed.
981
0
  std::sort(prefix.begin(), prefix.end());
982
0
  prefix.erase(std::unique(prefix.begin(), prefix.end()), prefix.end());
983
984
  // Construct the DFA.
985
  // The table is indexed by input byte; each element is effectively a packed
986
  // array of uint6_t; each array value will be multiplied by six in order to
987
  // avoid having to do so later in the hot loop as well as masking/shifting.
988
  // Credits for this technique: "Shift-based DFAs" on GitHub by Per Vognsen.
989
0
  uint64_t* dfa = new uint64_t[256]{};
990
  // Record a transition from each state for each of the bytes of the prefix.
991
  // Note that all other input bytes go back to the initial state by default.
992
0
  for (size_t dcurr = 0; dcurr < size; ++dcurr) {
993
0
    for (uint8_t b : prefix) {
994
0
      uint16_t ncurr = states[dcurr];
995
0
      uint16_t nnext = nfa[b] & ((ncurr << 1) | 1);
996
0
      size_t dnext = 0;
997
0
      while (states[dnext] != nnext)
998
0
        ++dnext;
999
0
      dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
1000
      // Convert ASCII letters to uppercase and record the extra transitions.
1001
      // Note that ASCII letters are guaranteed to be lowercase at this point
1002
      // because that's how the parser normalises them. #FunFact: 'k' and 's'
1003
      // match U+212A and U+017F, respectively, so they won't occur here when
1004
      // using UTF-8 encoding because the parser will emit character classes.
1005
0
      if ('a' <= b && b <= 'z') {
1006
0
        b -= 'a' - 'A';
1007
0
        dfa[b] |= static_cast<uint64_t>(dnext * 6) << (dcurr * 6);
1008
0
      }
1009
0
    }
1010
0
  }
1011
  // This lets the final state "saturate", which will matter for performance:
1012
  // in the hot loop, we check for a match only at the end of each iteration,
1013
  // so we must keep signalling the match until we get around to checking it.
1014
0
  for (int b = 0; b < 256; ++b)
1015
0
    dfa[b] |= static_cast<uint64_t>(kShiftDFAFinal * 6) << (kShiftDFAFinal * 6);
1016
1017
0
  return dfa;
1018
0
}
1019
1020
void Prog::ConfigurePrefixAccel(const std::string& prefix,
1021
0
                                bool prefix_foldcase) {
1022
0
  prefix_foldcase_ = prefix_foldcase;
1023
0
  prefix_size_ = prefix.size();
1024
0
  if (prefix_foldcase_) {
1025
    // Use PrefixAccel_ShiftDFA().
1026
    // ... and no more than nine bytes of the prefix. (See above for details.)
1027
0
    prefix_size_ = std::min(prefix_size_, kShiftDFAFinal);
1028
0
    prefix_dfa_ = BuildShiftDFA(prefix.substr(0, prefix_size_));
1029
0
  } else if (prefix_size_ != 1) {
1030
    // Use PrefixAccel_FrontAndBack().
1031
0
    prefix_front_ = prefix.front();
1032
0
    prefix_back_ = prefix.back();
1033
0
  } else {
1034
    // Use memchr(3).
1035
0
    prefix_front_ = prefix.front();
1036
0
  }
1037
0
}
1038
1039
0
const void* Prog::PrefixAccel_ShiftDFA(const void* data, size_t size) {
1040
0
  if (size < prefix_size_)
1041
0
    return NULL;
1042
1043
0
  uint64_t curr = 0;
1044
1045
  // At the time of writing, rough benchmarks on a Broadwell machine showed
1046
  // that this unroll factor (i.e. eight) achieves a speedup factor of two.
1047
0
  if (size >= 8) {
1048
0
    const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1049
0
    const uint8_t* endp = p + (size&~7);
1050
0
    do {
1051
0
      uint8_t b0 = p[0];
1052
0
      uint8_t b1 = p[1];
1053
0
      uint8_t b2 = p[2];
1054
0
      uint8_t b3 = p[3];
1055
0
      uint8_t b4 = p[4];
1056
0
      uint8_t b5 = p[5];
1057
0
      uint8_t b6 = p[6];
1058
0
      uint8_t b7 = p[7];
1059
1060
0
      uint64_t next0 = prefix_dfa_[b0];
1061
0
      uint64_t next1 = prefix_dfa_[b1];
1062
0
      uint64_t next2 = prefix_dfa_[b2];
1063
0
      uint64_t next3 = prefix_dfa_[b3];
1064
0
      uint64_t next4 = prefix_dfa_[b4];
1065
0
      uint64_t next5 = prefix_dfa_[b5];
1066
0
      uint64_t next6 = prefix_dfa_[b6];
1067
0
      uint64_t next7 = prefix_dfa_[b7];
1068
1069
0
      uint64_t curr0 = next0 >> (curr  & 63);
1070
0
      uint64_t curr1 = next1 >> (curr0 & 63);
1071
0
      uint64_t curr2 = next2 >> (curr1 & 63);
1072
0
      uint64_t curr3 = next3 >> (curr2 & 63);
1073
0
      uint64_t curr4 = next4 >> (curr3 & 63);
1074
0
      uint64_t curr5 = next5 >> (curr4 & 63);
1075
0
      uint64_t curr6 = next6 >> (curr5 & 63);
1076
0
      uint64_t curr7 = next7 >> (curr6 & 63);
1077
1078
0
      if ((curr7 & 63) == kShiftDFAFinal * 6) {
1079
        // At the time of writing, using the same masking subexpressions from
1080
        // the preceding lines caused Clang to clutter the hot loop computing
1081
        // them - even though they aren't actually needed for shifting! Hence
1082
        // these rewritten conditions, which achieve a speedup factor of two.
1083
0
        if (((curr7-curr0) & 63) == 0) return p+1-prefix_size_;
1084
0
        if (((curr7-curr1) & 63) == 0) return p+2-prefix_size_;
1085
0
        if (((curr7-curr2) & 63) == 0) return p+3-prefix_size_;
1086
0
        if (((curr7-curr3) & 63) == 0) return p+4-prefix_size_;
1087
0
        if (((curr7-curr4) & 63) == 0) return p+5-prefix_size_;
1088
0
        if (((curr7-curr5) & 63) == 0) return p+6-prefix_size_;
1089
0
        if (((curr7-curr6) & 63) == 0) return p+7-prefix_size_;
1090
0
        if (((curr7-curr7) & 63) == 0) return p+8-prefix_size_;
1091
0
      }
1092
1093
0
      curr = curr7;
1094
0
      p += 8;
1095
0
    } while (p != endp);
1096
0
    data = p;
1097
0
    size = size&7;
1098
0
  }
1099
1100
0
  const uint8_t* p = reinterpret_cast<const uint8_t*>(data);
1101
0
  const uint8_t* endp = p + size;
1102
0
  while (p != endp) {
1103
0
    uint8_t b = *p++;
1104
0
    uint64_t next = prefix_dfa_[b];
1105
0
    curr = next >> (curr & 63);
1106
0
    if ((curr & 63) == kShiftDFAFinal * 6)
1107
0
      return p-prefix_size_;
1108
0
  }
1109
0
  return NULL;
1110
0
}
1111
1112
#if defined(__AVX2__)
1113
// Finds the least significant non-zero bit in n.
1114
static int FindLSBSet(uint32_t n) {
1115
  ABSL_DCHECK_NE(n, uint32_t{0});
1116
#if defined(__GNUC__)
1117
  return __builtin_ctz(n);
1118
#elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
1119
  unsigned long c;
1120
  _BitScanForward(&c, n);
1121
  return static_cast<int>(c);
1122
#else
1123
  int c = 31;
1124
  for (int shift = 1 << 4; shift != 0; shift >>= 1) {
1125
    uint32_t word = n << shift;
1126
    if (word != 0) {
1127
      n = word;
1128
      c -= shift;
1129
    }
1130
  }
1131
  return c;
1132
#endif
1133
}
1134
#endif
1135
1136
0
const void* Prog::PrefixAccel_FrontAndBack(const void* data, size_t size) {
1137
0
  ABSL_DCHECK_GE(prefix_size_, size_t{2});
1138
0
  if (size < prefix_size_)
1139
0
    return NULL;
1140
  // Don't bother searching the last prefix_size_-1 bytes for prefix_front_.
1141
  // This also means that probing for prefix_back_ doesn't go out of bounds.
1142
0
  size -= prefix_size_-1;
1143
1144
#if defined(__AVX2__)
1145
  // Use AVX2 to look for prefix_front_ and prefix_back_ 32 bytes at a time.
1146
  if (size >= sizeof(__m256i)) {
1147
    const __m256i* fp = reinterpret_cast<const __m256i*>(
1148
        reinterpret_cast<const char*>(data));
1149
    const __m256i* bp = reinterpret_cast<const __m256i*>(
1150
        reinterpret_cast<const char*>(data) + prefix_size_-1);
1151
    const __m256i* endfp = fp + size/sizeof(__m256i);
1152
    const __m256i f_set1 = _mm256_set1_epi8(prefix_front_);
1153
    const __m256i b_set1 = _mm256_set1_epi8(prefix_back_);
1154
    do {
1155
      const __m256i f_loadu = _mm256_loadu_si256(fp++);
1156
      const __m256i b_loadu = _mm256_loadu_si256(bp++);
1157
      const __m256i f_cmpeq = _mm256_cmpeq_epi8(f_set1, f_loadu);
1158
      const __m256i b_cmpeq = _mm256_cmpeq_epi8(b_set1, b_loadu);
1159
      const int fb_testz = _mm256_testz_si256(f_cmpeq, b_cmpeq);
1160
      if (fb_testz == 0) {  // ZF: 1 means zero, 0 means non-zero.
1161
        const __m256i fb_and = _mm256_and_si256(f_cmpeq, b_cmpeq);
1162
        const int fb_movemask = _mm256_movemask_epi8(fb_and);
1163
        const int fb_ctz = FindLSBSet(fb_movemask);
1164
        return reinterpret_cast<const char*>(fp-1) + fb_ctz;
1165
      }
1166
    } while (fp != endfp);
1167
    data = fp;
1168
    size = size%sizeof(__m256i);
1169
  }
1170
#endif
1171
1172
0
  const char* p0 = reinterpret_cast<const char*>(data);
1173
0
  for (const char* p = p0;; p++) {
1174
0
    ABSL_DCHECK_GE(size, static_cast<size_t>(p-p0));
1175
0
    p = reinterpret_cast<const char*>(memchr(p, prefix_front_, size - (p-p0)));
1176
0
    if (p == NULL || p[prefix_size_-1] == prefix_back_)
1177
0
      return p;
1178
0
  }
1179
0
}
1180
1181
}  // namespace re2