Coverage Report

Created: 2025-12-03 06:28

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/re2/re2/nfa.cc
Line
Count
Source
1
// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Tested by search_test.cc.
6
//
7
// Prog::SearchNFA, an NFA search.
8
// This is an actual NFA like the theorists talk about,
9
// not the pseudo-NFA found in backtracking regexp implementations.
10
//
11
// IMPLEMENTATION
12
//
13
// This algorithm is a variant of one that appeared in Rob Pike's sam editor,
14
// which is a variant of the one described in Thompson's 1968 CACM paper.
15
// See http://swtch.com/~rsc/regexp/ for various history.  The main feature
16
// over the DFA implementation is that it tracks submatch boundaries.
17
//
18
// When the choice of submatch boundaries is ambiguous, this particular
19
// implementation makes the same choices that traditional backtracking
20
// implementations (in particular, Perl and PCRE) do.
21
// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential
22
// time in the length of the input.
23
//
24
// Like Thompson's original machine and like the DFA implementation, this
25
// implementation notices a match only once it is one byte past it.
26
27
#include <stdio.h>
28
#include <string.h>
29
30
#include <algorithm>
31
#include <deque>
32
#include <string>
33
#include <utility>
34
35
#include "absl/log/absl_check.h"
36
#include "absl/log/absl_log.h"
37
#include "absl/strings/str_format.h"
38
#include "absl/strings/string_view.h"
39
#include "re2/pod_array.h"
40
#include "re2/prog.h"
41
#include "re2/regexp.h"
42
#include "re2/sparse_array.h"
43
#include "re2/sparse_set.h"
44
45
namespace re2 {
46
47
static const bool ExtraDebug = false;
48
49
class NFA {
50
 public:
51
  NFA(Prog* prog);
52
  ~NFA();
53
54
  // Searches for a matching string.
55
  //   * If anchored is true, only considers matches starting at offset.
56
  //     Otherwise finds lefmost match at or after offset.
57
  //   * If longest is true, returns the longest match starting
58
  //     at the chosen start point.  Otherwise returns the so-called
59
  //     left-biased match, the one traditional backtracking engines
60
  //     (like Perl and PCRE) find.
61
  // Records submatch boundaries in submatch[1..nsubmatch-1].
62
  // Submatch[0] is the entire match.  When there is a choice in
63
  // which text matches each subexpression, the submatch boundaries
64
  // are chosen to match what a backtracking implementation would choose.
65
  bool Search(absl::string_view text, absl::string_view context, bool anchored,
66
              bool longest, absl::string_view* submatch, int nsubmatch);
67
68
 private:
69
  struct Thread {
70
    union {
71
      int ref;
72
      Thread* next;  // when on free list
73
    };
74
    const char** capture;
75
  };
76
77
  // State for explicit stack in AddToThreadq.
78
  struct AddState {
79
    int id;     // Inst to process
80
    Thread* t;  // if not null, set t0 = t before processing id
81
  };
82
83
  // Threadq is a list of threads.  The list is sorted by the order
84
  // in which Perl would explore that particular state -- the earlier
85
  // choices appear earlier in the list.
86
  typedef SparseArray<Thread*> Threadq;
87
88
  inline Thread* AllocThread();
89
  inline Thread* Incref(Thread* t);
90
  inline void Decref(Thread* t);
91
92
  // Follows all empty arrows from id0 and enqueues all the states reached.
93
  // Enqueues only the ByteRange instructions that match byte c.
94
  // context is used (with p) for evaluating empty-width specials.
95
  // p is the current input position, and t0 is the current thread.
96
  void AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
97
                    const char* p, Thread* t0);
98
99
  // Run runq on byte c, appending new states to nextq.
100
  // Updates matched_ and match_ as new, better matches are found.
101
  // context is used (with p) for evaluating empty-width specials.
102
  // p is the position of byte c in the input string for AddToThreadq;
103
  // p-1 will be used when processing Match instructions.
104
  // Frees all the threads on runq.
105
  // If there is a shortcut to the end, returns that shortcut.
106
  int Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
107
           const char* p);
108
109
  // Returns text version of capture information, for debugging.
110
  std::string FormatCapture(const char** capture);
111
112
248k
  void CopyCapture(const char** dst, const char** src) {
113
248k
    memmove(dst, src, ncapture_*sizeof src[0]);
114
248k
  }
115
116
  Prog* prog_;                // underlying program
117
  int start_;                 // start instruction in program
118
  int ncapture_;              // number of submatches to track
119
  bool longest_;              // whether searching for longest match
120
  bool endmatch_;             // whether match must end at text.end()
121
  const char* btext_;         // beginning of text (for FormatSubmatch)
122
  const char* etext_;         // end of text (for endmatch_)
123
  Threadq q0_, q1_;           // pre-allocated for Search.
124
  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
125
  std::deque<Thread> arena_;  // thread arena
126
  Thread* freelist_;          // thread freelist
127
  const char** match_;        // best match so far
128
  bool matched_;              // any match so far?
129
130
  NFA(const NFA&) = delete;
131
  NFA& operator=(const NFA&) = delete;
132
};
133
134
1.36k
NFA::NFA(Prog* prog) {
135
1.36k
  prog_ = prog;
136
1.36k
  start_ = prog_->start();
137
1.36k
  ncapture_ = 0;
138
1.36k
  longest_ = false;
139
1.36k
  endmatch_ = false;
140
1.36k
  btext_ = NULL;
141
1.36k
  etext_ = NULL;
142
1.36k
  q0_.resize(prog_->size());
143
1.36k
  q1_.resize(prog_->size());
144
  // See NFA::AddToThreadq() for why this is so.
145
1.36k
  int nstack = 2*prog_->inst_count(kInstCapture) +
146
1.36k
               prog_->inst_count(kInstEmptyWidth) +
147
1.36k
               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
148
1.36k
  stack_ = PODArray<AddState>(nstack);
149
1.36k
  freelist_ = NULL;
150
1.36k
  match_ = NULL;
151
1.36k
  matched_ = false;
152
1.36k
}
153
154
1.36k
NFA::~NFA() {
155
1.36k
  delete[] match_;
156
1.36k
  for (const Thread& t : arena_)
157
93.9k
    delete[] t.capture;
158
1.36k
}
159
160
240k
NFA::Thread* NFA::AllocThread() {
161
240k
  Thread* t = freelist_;
162
240k
  if (t != NULL) {
163
146k
    freelist_ = t->next;
164
146k
    t->ref = 1;
165
    // We don't need to touch t->capture because
166
    // the caller will immediately overwrite it.
167
146k
    return t;
168
146k
  }
169
93.9k
  arena_.emplace_back();
170
93.9k
  t = &arena_.back();
171
93.9k
  t->ref = 1;
172
93.9k
  t->capture = new const char*[ncapture_];
173
93.9k
  return t;
174
240k
}
175
176
7.80M
NFA::Thread* NFA::Incref(Thread* t) {
177
7.80M
  ABSL_DCHECK(t != NULL);
178
7.80M
  t->ref++;
179
7.80M
  return t;
180
7.80M
}
181
182
8.04M
void NFA::Decref(Thread* t) {
183
8.04M
  ABSL_DCHECK(t != NULL);
184
8.04M
  t->ref--;
185
8.04M
  if (t->ref > 0)
186
7.80M
    return;
187
8.04M
  ABSL_DCHECK_EQ(t->ref, 0);
188
240k
  t->next = freelist_;
189
240k
  freelist_ = t;
190
240k
}
191
192
// Follows all empty arrows from id0 and enqueues all the states reached.
193
// Enqueues only the ByteRange instructions that match byte c.
194
// context is used (with p) for evaluating empty-width specials.
195
// p is the current input position, and t0 is the current thread.
196
void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
197
7.78M
                       const char* p, Thread* t0) {
198
7.78M
  if (id0 == 0)
199
0
    return;
200
201
  // Use stack_ to hold our stack of instructions yet to process.
202
  // It was preallocated as follows:
203
  //   two entries per Capture;
204
  //   one entry per EmptyWidth; and
205
  //   one entry per Nop.
206
  // This reflects the maximum number of stack pushes that each can
207
  // perform. (Each instruction can be processed at most once.)
208
7.78M
  AddState* stk = stack_.data();
209
7.78M
  int nstk = 0;
210
211
7.78M
  stk[nstk++] = {id0, NULL};
212
16.0M
  while (nstk > 0) {
213
8.22M
    ABSL_DCHECK_LE(nstk, stack_.size());
214
8.22M
    AddState a = stk[--nstk];
215
216
17.1M
  Loop:
217
17.1M
    if (a.t != NULL) {
218
      // t0 was a thread that we allocated and copied in order to
219
      // record the capture, so we must now decref it.
220
238k
      Decref(t0);
221
238k
      t0 = a.t;
222
238k
    }
223
224
17.1M
    int id = a.id;
225
17.1M
    if (id == 0)
226
239k
      continue;
227
16.8M
    if (q->has_index(id)) {
228
3.90M
      if (ExtraDebug)
229
0
        absl::FPrintF(stderr, "  [%d%s]\n", id, FormatCapture(t0->capture));
230
3.90M
      continue;
231
3.90M
    }
232
233
    // Create entry in q no matter what.  We might fill it in below,
234
    // or we might not.  Even if not, it is necessary to have it,
235
    // so that we don't revisit id0 during the recursion.
236
12.9M
    q->set_new(id, NULL);
237
12.9M
    Thread** tp = &q->get_existing(id);
238
12.9M
    int j;
239
12.9M
    Thread* t;
240
12.9M
    Prog::Inst* ip = prog_->inst(id);
241
12.9M
    switch (ip->opcode()) {
242
0
    default:
243
0
      ABSL_LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
244
0
      break;
245
246
0
    case kInstFail:
247
0
      break;
248
249
1.44k
    case kInstAltMatch:
250
      // Save state; will pick up at next byte.
251
1.44k
      t = Incref(t0);
252
1.44k
      *tp = t;
253
254
1.44k
      ABSL_DCHECK(!ip->last());
255
1.44k
      a = {id+1, NULL};
256
1.44k
      goto Loop;
257
258
4.19M
    case kInstNop:
259
4.19M
      if (!ip->last())
260
184k
        stk[nstk++] = {id+1, NULL};
261
262
      // Continue on.
263
4.19M
      a = {ip->out(), NULL};
264
4.19M
      goto Loop;
265
266
263k
    case kInstCapture:
267
263k
      if (!ip->last())
268
5.43k
        stk[nstk++] = {id+1, NULL};
269
270
263k
      if ((j=ip->cap()) < ncapture_) {
271
        // Push a dummy whose only job is to restore t0
272
        // once we finish exploring this possibility.
273
238k
        stk[nstk++] = {0, t0};
274
275
        // Record capture.
276
238k
        t = AllocThread();
277
238k
        CopyCapture(t->capture, t0->capture);
278
238k
        t->capture[j] = p;
279
238k
        t0 = t;
280
238k
      }
281
263k
      a = {ip->out(), NULL};
282
263k
      goto Loop;
283
284
8.48M
    case kInstByteRange:
285
8.48M
      if (!ip->Matches(c))
286
691k
        goto Next;
287
288
      // Save state; will pick up at next byte.
289
7.78M
      t = Incref(t0);
290
7.78M
      *tp = t;
291
7.78M
      if (ExtraDebug)
292
0
        absl::FPrintF(stderr, " + %d%s\n", id, FormatCapture(t0->capture));
293
294
7.78M
      if (ip->hint() == 0)
295
3.95M
        break;
296
3.83M
      a = {id+ip->hint(), NULL};
297
3.83M
      goto Loop;
298
299
18.6k
    case kInstMatch:
300
      // Save state; will pick up at next byte.
301
18.6k
      t = Incref(t0);
302
18.6k
      *tp = t;
303
18.6k
      if (ExtraDebug)
304
0
        absl::FPrintF(stderr, " ! %d%s\n", id, FormatCapture(t0->capture));
305
306
709k
    Next:
307
709k
      if (ip->last())
308
116k
        break;
309
593k
      a = {id+1, NULL};
310
593k
      goto Loop;
311
312
19.5k
    case kInstEmptyWidth:
313
19.5k
      if (!ip->last())
314
6.75k
        stk[nstk++] = {id+1, NULL};
315
316
      // Continue on if we have all the right flag bits.
317
19.5k
      if (ip->empty() & ~Prog::EmptyFlags(context, p))
318
9.94k
        break;
319
9.57k
      a = {ip->out(), NULL};
320
9.57k
      goto Loop;
321
12.9M
    }
322
12.9M
  }
323
7.78M
}
324
325
// Run runq on byte c, appending new states to nextq.
326
// Updates matched_ and match_ as new, better matches are found.
327
// context is used (with p) for evaluating empty-width specials.
328
// p is the position of byte c in the input string for AddToThreadq;
329
// p-1 will be used when processing Match instructions.
330
// Frees all the threads on runq.
331
// If there is a shortcut to the end, returns that shortcut.
332
int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
333
26.3k
              const char* p) {
334
26.3k
  nextq->clear();
335
336
12.9M
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
337
12.9M
    Thread* t = i->value();
338
12.9M
    if (t == NULL)
339
5.15M
      continue;
340
341
7.80M
    if (longest_) {
342
      // Can skip any threads started after our current best match.
343
7.80M
      if (matched_ && match_[0] < t->capture[0]) {
344
0
        Decref(t);
345
0
        continue;
346
0
      }
347
7.80M
    }
348
349
7.80M
    int id = i->index();
350
7.80M
    Prog::Inst* ip = prog_->inst(id);
351
352
7.80M
    switch (ip->opcode()) {
353
0
      default:
354
        // Should only see the values handled below.
355
0
        ABSL_LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
356
0
        break;
357
358
7.78M
      case kInstByteRange:
359
7.78M
        AddToThreadq(nextq, ip->out(), c, context, p, t);
360
7.78M
        break;
361
362
1.42k
      case kInstAltMatch:
363
1.42k
        if (i != runq->begin())
364
1.25k
          break;
365
        // The match is ours if we want it.
366
161
        if (ip->greedy(prog_) || longest_) {
367
161
          CopyCapture(match_, t->capture);
368
161
          matched_ = true;
369
370
161
          Decref(t);
371
12.9k
          for (++i; i != runq->end(); ++i) {
372
12.8k
            if (i->value() != NULL)
373
3.08k
              Decref(i->value());
374
12.8k
          }
375
161
          runq->clear();
376
161
          if (ip->greedy(prog_))
377
140
            return ip->out1();
378
21
          return ip->out();
379
161
        }
380
0
        break;
381
382
18.5k
      case kInstMatch: {
383
        // Avoid invoking undefined behavior (arithmetic on a null pointer)
384
        // by storing p instead of p-1. (What would the latter even mean?!)
385
        // This complements the special case in NFA::Search().
386
18.5k
        if (p == NULL) {
387
0
          CopyCapture(match_, t->capture);
388
0
          match_[1] = p;
389
0
          matched_ = true;
390
0
          break;
391
0
        }
392
393
18.5k
        if (endmatch_ && p-1 != etext_)
394
6.37k
          break;
395
396
12.1k
        if (longest_) {
397
          // Leftmost-longest mode: save this match only if
398
          // it is either farther to the left or at the same
399
          // point but longer than an existing match.
400
12.1k
          if (!matched_ || t->capture[0] < match_[0] ||
401
10.8k
              (t->capture[0] == match_[0] && p-1 > match_[1])) {
402
7.67k
            CopyCapture(match_, t->capture);
403
7.67k
            match_[1] = p-1;
404
7.67k
            matched_ = true;
405
7.67k
          }
406
12.1k
        } else {
407
          // Leftmost-biased mode: this match is by definition
408
          // better than what we've already found (see next line).
409
0
          CopyCapture(match_, t->capture);
410
0
          match_[1] = p-1;
411
0
          matched_ = true;
412
413
          // Cut off the threads that can only find matches
414
          // worse than the one we just found: don't run the
415
          // rest of the current Threadq.
416
0
          Decref(t);
417
0
          for (++i; i != runq->end(); ++i) {
418
0
            if (i->value() != NULL)
419
0
              Decref(i->value());
420
0
          }
421
0
          runq->clear();
422
0
          return 0;
423
0
        }
424
12.1k
        break;
425
12.1k
      }
426
7.80M
    }
427
7.80M
    Decref(t);
428
7.80M
  }
429
26.2k
  runq->clear();
430
26.2k
  return 0;
431
26.3k
}
432
433
0
std::string NFA::FormatCapture(const char** capture) {
434
0
  std::string s;
435
0
  for (int i = 0; i < ncapture_; i+=2) {
436
0
    if (capture[i] == NULL)
437
0
      s += "(?,?)";
438
0
    else if (capture[i+1] == NULL)
439
0
      s += absl::StrFormat("(%d,?)",
440
0
                           capture[i] - btext_);
441
0
    else
442
0
      s += absl::StrFormat("(%d,%d)",
443
0
                           capture[i] - btext_,
444
0
                           capture[i+1] - btext_);
445
0
  }
446
0
  return s;
447
0
}
448
449
bool NFA::Search(absl::string_view text, absl::string_view context,
450
                 bool anchored, bool longest, absl::string_view* submatch,
451
1.36k
                 int nsubmatch) {
452
1.36k
  if (start_ == 0)
453
0
    return false;
454
455
1.36k
  if (context.data() == NULL)
456
0
    context = text;
457
458
  // Sanity check: make sure that text lies within context.
459
1.36k
  if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
460
0
    ABSL_LOG(DFATAL) << "context does not contain text";
461
0
    return false;
462
0
  }
463
464
1.36k
  if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text))
465
0
    return false;
466
1.36k
  if (prog_->anchor_end() && EndPtr(context) != EndPtr(text))
467
0
    return false;
468
1.36k
  anchored |= prog_->anchor_start();
469
1.36k
  if (prog_->anchor_end()) {
470
114
    longest = true;
471
114
    endmatch_ = true;
472
114
  }
473
474
1.36k
  if (nsubmatch < 0) {
475
0
    ABSL_LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
476
0
    return false;
477
0
  }
478
479
  // Save search parameters.
480
1.36k
  ncapture_ = 2*nsubmatch;
481
1.36k
  longest_ = longest;
482
483
1.36k
  if (nsubmatch == 0) {
484
    // We need to maintain match[0], both to distinguish the
485
    // longest match (if longest is true) and also to tell
486
    // whether we've seen any matches at all.
487
0
    ncapture_ = 2;
488
0
  }
489
490
1.36k
  match_ = new const char*[ncapture_];
491
1.36k
  memset(match_, 0, ncapture_*sizeof match_[0]);
492
1.36k
  matched_ = false;
493
494
  // For debugging prints.
495
1.36k
  btext_ = context.data();
496
  // For convenience.
497
1.36k
  etext_ = text.data() + text.size();
498
499
1.36k
  if (ExtraDebug)
500
0
    absl::FPrintF(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
501
0
                  text, context, anchored, longest);
502
503
  // Set up search.
504
1.36k
  Threadq* runq = &q0_;
505
1.36k
  Threadq* nextq = &q1_;
506
1.36k
  runq->clear();
507
1.36k
  nextq->clear();
508
509
  // Loop over the text, stepping the machine.
510
26.3k
  for (const char* p = text.data();; p++) {
511
26.3k
    if (ExtraDebug) {
512
0
      int c = 0;
513
0
      if (p == btext_)
514
0
        c = '^';
515
0
      else if (p > etext_)
516
0
        c = '$';
517
0
      else if (p < etext_)
518
0
        c = p[0] & 0xFF;
519
520
0
      absl::FPrintF(stderr, "%c:", c);
521
0
      for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
522
0
        Thread* t = i->value();
523
0
        if (t == NULL)
524
0
          continue;
525
0
        absl::FPrintF(stderr, " %d%s", i->index(), FormatCapture(t->capture));
526
0
      }
527
0
      absl::FPrintF(stderr, "\n");
528
0
    }
529
530
    // This is a no-op the first time around the loop because runq is empty.
531
26.3k
    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
532
26.3k
    ABSL_DCHECK_EQ(runq->size(), 0);
533
26.3k
    using std::swap;
534
26.3k
    swap(nextq, runq);
535
26.3k
    nextq->clear();
536
26.3k
    if (id != 0) {
537
      // We're done: full match ahead.
538
161
      p = etext_;
539
767
      for (;;) {
540
767
        Prog::Inst* ip = prog_->inst(id);
541
767
        switch (ip->opcode()) {
542
0
          default:
543
0
            ABSL_LOG(DFATAL) << "Unexpected opcode in short circuit: "
544
0
                             << ip->opcode();
545
0
            break;
546
547
503
          case kInstCapture:
548
503
            if (ip->cap() < ncapture_)
549
213
              match_[ip->cap()] = p;
550
503
            id = ip->out();
551
503
            continue;
552
553
103
          case kInstNop:
554
103
            id = ip->out();
555
103
            continue;
556
557
161
          case kInstMatch:
558
161
            match_[1] = p;
559
161
            matched_ = true;
560
161
            break;
561
767
        }
562
161
        break;
563
767
      }
564
161
      break;
565
161
    }
566
567
26.2k
    if (p > etext_)
568
1.20k
      break;
569
570
    // Start a new thread if there have not been any matches.
571
    // (No point in starting a new thread if there have been
572
    // matches, since it would be to the right of the match
573
    // we already found.)
574
25.0k
    if (!matched_ && (!anchored || p == text.data())) {
575
      // Try to use prefix accel (e.g. memchr) to skip ahead.
576
      // The search must be unanchored and there must be zero
577
      // possible matches already.
578
1.36k
      if (!anchored && runq->size() == 0 &&
579
0
          p < etext_ && prog_->can_prefix_accel()) {
580
0
        p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
581
0
        if (p == NULL)
582
0
          p = etext_;
583
0
      }
584
585
1.36k
      Thread* t = AllocThread();
586
1.36k
      CopyCapture(t->capture, match_);
587
1.36k
      t->capture[0] = p;
588
1.36k
      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
589
1.36k
                   t);
590
1.36k
      Decref(t);
591
1.36k
    }
592
593
    // If all the threads have died, stop early.
594
25.0k
    if (runq->size() == 0) {
595
0
      if (ExtraDebug)
596
0
        absl::FPrintF(stderr, "dead\n");
597
0
      break;
598
0
    }
599
600
    // Avoid invoking undefined behavior (arithmetic on a null pointer)
601
    // by simply not continuing the loop.
602
    // This complements the special case in NFA::Step().
603
25.0k
    if (p == NULL) {
604
0
      (void) Step(runq, nextq, -1, context, p);
605
0
      ABSL_DCHECK_EQ(runq->size(), 0);
606
0
      using std::swap;
607
0
      swap(nextq, runq);
608
0
      nextq->clear();
609
0
      break;
610
0
    }
611
25.0k
  }
612
613
1.36k
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
614
0
    if (i->value() != NULL)
615
0
      Decref(i->value());
616
0
  }
617
618
1.36k
  if (matched_) {
619
4.10k
    for (int i = 0; i < nsubmatch; i++)
620
2.73k
      submatch[i] = absl::string_view(
621
2.73k
          match_[2 * i],
622
2.73k
          static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
623
1.36k
    if (ExtraDebug)
624
0
      absl::FPrintF(stderr, "match (%d,%d)\n",
625
0
                    match_[0] - btext_,
626
0
                    match_[1] - btext_);
627
1.36k
    return true;
628
1.36k
  }
629
0
  return false;
630
1.36k
}
631
632
bool Prog::SearchNFA(absl::string_view text, absl::string_view context,
633
                     Anchor anchor, MatchKind kind, absl::string_view* match,
634
1.36k
                     int nmatch) {
635
1.36k
  if (ExtraDebug)
636
0
    Dump();
637
638
1.36k
  NFA nfa(this);
639
1.36k
  absl::string_view sp;
640
1.36k
  if (kind == kFullMatch) {
641
1.36k
    anchor = kAnchored;
642
1.36k
    if (nmatch == 0) {
643
0
      match = &sp;
644
0
      nmatch = 1;
645
0
    }
646
1.36k
  }
647
1.36k
  if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))
648
0
    return false;
649
1.36k
  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
650
0
    return false;
651
1.36k
  return true;
652
1.36k
}
653
654
// For each instruction i in the program reachable from the start, compute the
655
// number of instructions reachable from i by following only empty transitions
656
// and record that count as fanout[i].
657
//
658
// fanout holds the results and is also the work queue for the outer iteration.
659
// reachable holds the reached nodes for the inner iteration.
660
19.3k
void Prog::Fanout(SparseArray<int>* fanout) {
661
19.3k
  ABSL_DCHECK_EQ(fanout->max_size(), size());
662
19.3k
  SparseSet reachable(size());
663
19.3k
  fanout->clear();
664
19.3k
  fanout->set_new(start(), 0);
665
4.26M
  for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
666
4.24M
    int* count = &i->value();
667
4.24M
    reachable.clear();
668
4.24M
    reachable.insert(i->index());
669
69.1M
    for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
670
64.8M
      int id = *j;
671
64.8M
      Prog::Inst* ip = inst(id);
672
64.8M
      switch (ip->opcode()) {
673
0
        default:
674
0
          ABSL_LOG(DFATAL) << "unhandled " << ip->opcode()
675
0
                           << " in Prog::Fanout()";
676
0
          break;
677
678
24.0M
        case kInstByteRange:
679
24.0M
          if (!ip->last())
680
16.7M
            reachable.insert(id+1);
681
682
24.0M
          (*count)++;
683
24.0M
          if (!fanout->has_index(ip->out())) {
684
4.22M
            fanout->set_new(ip->out(), 0);
685
4.22M
          }
686
24.0M
          break;
687
688
4.67k
        case kInstAltMatch:
689
4.67k
          ABSL_DCHECK(!ip->last());
690
4.67k
          reachable.insert(id+1);
691
4.67k
          break;
692
693
4.07M
        case kInstCapture:
694
4.96M
        case kInstEmptyWidth:
695
40.6M
        case kInstNop:
696
40.6M
          if (!ip->last())
697
21.6M
            reachable.insert(id+1);
698
699
40.6M
          reachable.insert(ip->out());
700
40.6M
          break;
701
702
150k
        case kInstMatch:
703
150k
          if (!ip->last())
704
64.4k
            reachable.insert(id+1);
705
150k
          break;
706
707
6.54k
        case kInstFail:
708
6.54k
          break;
709
64.8M
      }
710
64.8M
    }
711
4.24M
  }
712
19.3k
}
713
714
}  // namespace re2