Coverage Report

Created: 2026-01-09 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/re2/re2/nfa.cc
Line
Count
Source
1
// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Tested by search_test.cc.
6
//
7
// Prog::SearchNFA, an NFA search.
8
// This is an actual NFA like the theorists talk about,
9
// not the pseudo-NFA found in backtracking regexp implementations.
10
//
11
// IMPLEMENTATION
12
//
13
// This algorithm is a variant of one that appeared in Rob Pike's sam editor,
14
// which is a variant of the one described in Thompson's 1968 CACM paper.
15
// See http://swtch.com/~rsc/regexp/ for various history.  The main feature
16
// over the DFA implementation is that it tracks submatch boundaries.
17
//
18
// When the choice of submatch boundaries is ambiguous, this particular
19
// implementation makes the same choices that traditional backtracking
20
// implementations (in particular, Perl and PCRE) do.
21
// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential
22
// time in the length of the input.
23
//
24
// Like Thompson's original machine and like the DFA implementation, this
25
// implementation notices a match only once it is one byte past it.
26
27
#include <stdio.h>
28
#include <string.h>
29
30
#include <algorithm>
31
#include <deque>
32
#include <string>
33
#include <utility>
34
35
#include "absl/log/absl_check.h"
36
#include "absl/log/absl_log.h"
37
#include "absl/strings/str_format.h"
38
#include "absl/strings/string_view.h"
39
#include "re2/pod_array.h"
40
#include "re2/prog.h"
41
#include "re2/regexp.h"
42
#include "re2/sparse_array.h"
43
#include "re2/sparse_set.h"
44
45
namespace re2 {
46
47
static const bool ExtraDebug = false;
48
49
class NFA {
50
 public:
51
  NFA(Prog* prog);
52
  ~NFA();
53
54
  // Searches for a matching string.
55
  //   * If anchored is true, only considers matches starting at offset.
56
  //     Otherwise finds lefmost match at or after offset.
57
  //   * If longest is true, returns the longest match starting
58
  //     at the chosen start point.  Otherwise returns the so-called
59
  //     left-biased match, the one traditional backtracking engines
60
  //     (like Perl and PCRE) find.
61
  // Records submatch boundaries in submatch[1..nsubmatch-1].
62
  // Submatch[0] is the entire match.  When there is a choice in
63
  // which text matches each subexpression, the submatch boundaries
64
  // are chosen to match what a backtracking implementation would choose.
65
  bool Search(absl::string_view text, absl::string_view context, bool anchored,
66
              bool longest, absl::string_view* submatch, int nsubmatch);
67
68
 private:
69
  struct Thread {
70
    union {
71
      int ref;
72
      Thread* next;  // when on free list
73
    };
74
    const char** capture;
75
  };
76
77
  // State for explicit stack in AddToThreadq.
78
  struct AddState {
79
    int id;     // Inst to process
80
    Thread* t;  // if not null, set t0 = t before processing id
81
  };
82
83
  // Threadq is a list of threads.  The list is sorted by the order
84
  // in which Perl would explore that particular state -- the earlier
85
  // choices appear earlier in the list.
86
  typedef SparseArray<Thread*> Threadq;
87
88
  inline Thread* AllocThread();
89
  inline Thread* Incref(Thread* t);
90
  inline void Decref(Thread* t);
91
92
  // Follows all empty arrows from id0 and enqueues all the states reached.
93
  // Enqueues only the ByteRange instructions that match byte c.
94
  // context is used (with p) for evaluating empty-width specials.
95
  // p is the current input position, and t0 is the current thread.
96
  void AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
97
                    const char* p, Thread* t0);
98
99
  // Run runq on byte c, appending new states to nextq.
100
  // Updates matched_ and match_ as new, better matches are found.
101
  // context is used (with p) for evaluating empty-width specials.
102
  // p is the position of byte c in the input string for AddToThreadq;
103
  // p-1 will be used when processing Match instructions.
104
  // Frees all the threads on runq.
105
  // If there is a shortcut to the end, returns that shortcut.
106
  int Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
107
           const char* p);
108
109
  // Returns text version of capture information, for debugging.
110
  std::string FormatCapture(const char** capture);
111
112
340k
  void CopyCapture(const char** dst, const char** src) {
113
340k
    memmove(dst, src, ncapture_*sizeof src[0]);
114
340k
  }
115
116
  Prog* prog_;                // underlying program
117
  int start_;                 // start instruction in program
118
  int ncapture_;              // number of submatches to track
119
  bool longest_;              // whether searching for longest match
120
  bool endmatch_;             // whether match must end at text.end()
121
  const char* btext_;         // beginning of text (for FormatSubmatch)
122
  const char* etext_;         // end of text (for endmatch_)
123
  Threadq q0_, q1_;           // pre-allocated for Search.
124
  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
125
  std::deque<Thread> arena_;  // thread arena
126
  Thread* freelist_;          // thread freelist
127
  const char** match_;        // best match so far
128
  bool matched_;              // any match so far?
129
130
  NFA(const NFA&) = delete;
131
  NFA& operator=(const NFA&) = delete;
132
};
133
134
1.46k
NFA::NFA(Prog* prog) {
135
1.46k
  prog_ = prog;
136
1.46k
  start_ = prog_->start();
137
1.46k
  ncapture_ = 0;
138
1.46k
  longest_ = false;
139
1.46k
  endmatch_ = false;
140
1.46k
  btext_ = NULL;
141
1.46k
  etext_ = NULL;
142
1.46k
  q0_.resize(prog_->size());
143
1.46k
  q1_.resize(prog_->size());
144
  // See NFA::AddToThreadq() for why this is so.
145
1.46k
  int nstack = 2*prog_->inst_count(kInstCapture) +
146
1.46k
               prog_->inst_count(kInstEmptyWidth) +
147
1.46k
               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
148
1.46k
  stack_ = PODArray<AddState>(nstack);
149
1.46k
  freelist_ = NULL;
150
1.46k
  match_ = NULL;
151
1.46k
  matched_ = false;
152
1.46k
}
153
154
1.46k
NFA::~NFA() {
155
1.46k
  delete[] match_;
156
1.46k
  for (const Thread& t : arena_)
157
121k
    delete[] t.capture;
158
1.46k
}
159
160
325k
NFA::Thread* NFA::AllocThread() {
161
325k
  Thread* t = freelist_;
162
325k
  if (t != NULL) {
163
204k
    freelist_ = t->next;
164
204k
    t->ref = 1;
165
    // We don't need to touch t->capture because
166
    // the caller will immediately overwrite it.
167
204k
    return t;
168
204k
  }
169
121k
  arena_.emplace_back();
170
121k
  t = &arena_.back();
171
121k
  t->ref = 1;
172
121k
  t->capture = new const char*[ncapture_];
173
121k
  return t;
174
325k
}
175
176
10.9M
NFA::Thread* NFA::Incref(Thread* t) {
177
10.9M
  ABSL_DCHECK(t != NULL);
178
10.9M
  t->ref++;
179
10.9M
  return t;
180
10.9M
}
181
182
11.2M
void NFA::Decref(Thread* t) {
183
11.2M
  ABSL_DCHECK(t != NULL);
184
11.2M
  t->ref--;
185
11.2M
  if (t->ref > 0)
186
10.9M
    return;
187
11.2M
  ABSL_DCHECK_EQ(t->ref, 0);
188
325k
  t->next = freelist_;
189
325k
  freelist_ = t;
190
325k
}
191
192
// Follows all empty arrows from id0 and enqueues all the states reached.
193
// Enqueues only the ByteRange instructions that match byte c.
194
// context is used (with p) for evaluating empty-width specials.
195
// p is the current input position, and t0 is the current thread.
196
void NFA::AddToThreadq(Threadq* q, int id0, int c, absl::string_view context,
197
10.8M
                       const char* p, Thread* t0) {
198
10.8M
  if (id0 == 0)
199
0
    return;
200
201
  // Use stack_ to hold our stack of instructions yet to process.
202
  // It was preallocated as follows:
203
  //   two entries per Capture;
204
  //   one entry per EmptyWidth; and
205
  //   one entry per Nop.
206
  // This reflects the maximum number of stack pushes that each can
207
  // perform. (Each instruction can be processed at most once.)
208
10.8M
  AddState* stk = stack_.data();
209
10.8M
  int nstk = 0;
210
211
10.8M
  stk[nstk++] = {id0, NULL};
212
22.4M
  while (nstk > 0) {
213
11.5M
    ABSL_DCHECK_LE(nstk, stack_.size());
214
11.5M
    AddState a = stk[--nstk];
215
216
24.1M
  Loop:
217
24.1M
    if (a.t != NULL) {
218
      // t0 was a thread that we allocated and copied in order to
219
      // record the capture, so we must now decref it.
220
324k
      Decref(t0);
221
324k
      t0 = a.t;
222
324k
    }
223
224
24.1M
    int id = a.id;
225
24.1M
    if (id == 0)
226
324k
      continue;
227
23.8M
    if (q->has_index(id)) {
228
5.39M
      if (ExtraDebug)
229
0
        absl::FPrintF(stderr, "  [%d%s]\n", id, FormatCapture(t0->capture));
230
5.39M
      continue;
231
5.39M
    }
232
233
    // Create entry in q no matter what.  We might fill it in below,
234
    // or we might not.  Even if not, it is necessary to have it,
235
    // so that we don't revisit id0 during the recursion.
236
18.4M
    q->set_new(id, NULL);
237
18.4M
    Thread** tp = &q->get_existing(id);
238
18.4M
    int j;
239
18.4M
    Thread* t;
240
18.4M
    Prog::Inst* ip = prog_->inst(id);
241
18.4M
    switch (ip->opcode()) {
242
0
    default:
243
0
      ABSL_LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
244
0
      break;
245
246
0
    case kInstFail:
247
0
      break;
248
249
2.47k
    case kInstAltMatch:
250
      // Save state; will pick up at next byte.
251
2.47k
      t = Incref(t0);
252
2.47k
      *tp = t;
253
254
2.47k
      ABSL_DCHECK(!ip->last());
255
2.47k
      a = {id+1, NULL};
256
2.47k
      goto Loop;
257
258
5.95M
    case kInstNop:
259
5.95M
      if (!ip->last())
260
251k
        stk[nstk++] = {id+1, NULL};
261
262
      // Continue on.
263
5.95M
      a = {ip->out(), NULL};
264
5.95M
      goto Loop;
265
266
408k
    case kInstCapture:
267
408k
      if (!ip->last())
268
5.28k
        stk[nstk++] = {id+1, NULL};
269
270
408k
      if ((j=ip->cap()) < ncapture_) {
271
        // Push a dummy whose only job is to restore t0
272
        // once we finish exploring this possibility.
273
324k
        stk[nstk++] = {0, t0};
274
275
        // Record capture.
276
324k
        t = AllocThread();
277
324k
        CopyCapture(t->capture, t0->capture);
278
324k
        t->capture[j] = p;
279
324k
        t0 = t;
280
324k
      }
281
408k
      a = {ip->out(), NULL};
282
408k
      goto Loop;
283
284
11.9M
    case kInstByteRange:
285
11.9M
      if (!ip->Matches(c))
286
1.09M
        goto Next;
287
288
      // Save state; will pick up at next byte.
289
10.8M
      t = Incref(t0);
290
10.8M
      *tp = t;
291
10.8M
      if (ExtraDebug)
292
0
        absl::FPrintF(stderr, " + %d%s\n", id, FormatCapture(t0->capture));
293
294
10.8M
      if (ip->hint() == 0)
295
5.54M
        break;
296
5.33M
      a = {id+ip->hint(), NULL};
297
5.33M
      goto Loop;
298
299
26.1k
    case kInstMatch:
300
      // Save state; will pick up at next byte.
301
26.1k
      t = Incref(t0);
302
26.1k
      *tp = t;
303
26.1k
      if (ExtraDebug)
304
0
        absl::FPrintF(stderr, " ! %d%s\n", id, FormatCapture(t0->capture));
305
306
1.11M
    Next:
307
1.11M
      if (ip->last())
308
193k
        break;
309
923k
      a = {id+1, NULL};
310
923k
      goto Loop;
311
312
79.9k
    case kInstEmptyWidth:
313
79.9k
      if (!ip->last())
314
67.3k
        stk[nstk++] = {id+1, NULL};
315
316
      // Continue on if we have all the right flag bits.
317
79.9k
      if (ip->empty() & ~Prog::EmptyFlags(context, p))
318
72.5k
        break;
319
7.37k
      a = {ip->out(), NULL};
320
7.37k
      goto Loop;
321
18.4M
    }
322
18.4M
  }
323
10.8M
}
324
325
// Run runq on byte c, appending new states to nextq.
326
// Updates matched_ and match_ as new, better matches are found.
327
// context is used (with p) for evaluating empty-width specials.
328
// p is the position of byte c in the input string for AddToThreadq;
329
// p-1 will be used when processing Match instructions.
330
// Frees all the threads on runq.
331
// If there is a shortcut to the end, returns that shortcut.
332
int NFA::Step(Threadq* runq, Threadq* nextq, int c, absl::string_view context,
333
38.3k
              const char* p) {
334
38.3k
  nextq->clear();
335
336
18.4M
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
337
18.4M
    Thread* t = i->value();
338
18.4M
    if (t == NULL)
339
7.52M
      continue;
340
341
10.9M
    if (longest_) {
342
      // Can skip any threads started after our current best match.
343
10.9M
      if (matched_ && match_[0] < t->capture[0]) {
344
0
        Decref(t);
345
0
        continue;
346
0
      }
347
10.9M
    }
348
349
10.9M
    int id = i->index();
350
10.9M
    Prog::Inst* ip = prog_->inst(id);
351
352
10.9M
    switch (ip->opcode()) {
353
0
      default:
354
        // Should only see the values handled below.
355
0
        ABSL_LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
356
0
        break;
357
358
10.8M
      case kInstByteRange:
359
10.8M
        AddToThreadq(nextq, ip->out(), c, context, p, t);
360
10.8M
        break;
361
362
2.44k
      case kInstAltMatch:
363
2.44k
        if (i != runq->begin())
364
2.26k
          break;
365
        // The match is ours if we want it.
366
189
        if (ip->greedy(prog_) || longest_) {
367
189
          CopyCapture(match_, t->capture);
368
189
          matched_ = true;
369
370
189
          Decref(t);
371
6.91k
          for (++i; i != runq->end(); ++i) {
372
6.73k
            if (i->value() != NULL)
373
1.87k
              Decref(i->value());
374
6.73k
          }
375
189
          runq->clear();
376
189
          if (ip->greedy(prog_))
377
151
            return ip->out1();
378
38
          return ip->out();
379
189
        }
380
0
        break;
381
382
25.9k
      case kInstMatch: {
383
        // Avoid invoking undefined behavior (arithmetic on a null pointer)
384
        // by storing p instead of p-1. (What would the latter even mean?!)
385
        // This complements the special case in NFA::Search().
386
25.9k
        if (p == NULL) {
387
0
          CopyCapture(match_, t->capture);
388
0
          match_[1] = p;
389
0
          matched_ = true;
390
0
          break;
391
0
        }
392
393
25.9k
        if (endmatch_ && p-1 != etext_)
394
8.89k
          break;
395
396
17.0k
        if (longest_) {
397
          // Leftmost-longest mode: save this match only if
398
          // it is either farther to the left or at the same
399
          // point but longer than an existing match.
400
17.0k
          if (!matched_ || t->capture[0] < match_[0] ||
401
15.6k
              (t->capture[0] == match_[0] && p-1 > match_[1])) {
402
14.0k
            CopyCapture(match_, t->capture);
403
14.0k
            match_[1] = p-1;
404
14.0k
            matched_ = true;
405
14.0k
          }
406
17.0k
        } else {
407
          // Leftmost-biased mode: this match is by definition
408
          // better than what we've already found (see next line).
409
0
          CopyCapture(match_, t->capture);
410
0
          match_[1] = p-1;
411
0
          matched_ = true;
412
413
          // Cut off the threads that can only find matches
414
          // worse than the one we just found: don't run the
415
          // rest of the current Threadq.
416
0
          Decref(t);
417
0
          for (++i; i != runq->end(); ++i) {
418
0
            if (i->value() != NULL)
419
0
              Decref(i->value());
420
0
          }
421
0
          runq->clear();
422
0
          return 0;
423
0
        }
424
17.0k
        break;
425
17.0k
      }
426
10.9M
    }
427
10.9M
    Decref(t);
428
10.9M
  }
429
38.1k
  runq->clear();
430
38.1k
  return 0;
431
38.3k
}
432
433
0
std::string NFA::FormatCapture(const char** capture) {
434
0
  std::string s;
435
0
  for (int i = 0; i < ncapture_; i+=2) {
436
0
    if (capture[i] == NULL)
437
0
      s += "(?,?)";
438
0
    else if (capture[i+1] == NULL)
439
0
      s += absl::StrFormat("(%d,?)",
440
0
                           capture[i] - btext_);
441
0
    else
442
0
      s += absl::StrFormat("(%d,%d)",
443
0
                           capture[i] - btext_,
444
0
                           capture[i+1] - btext_);
445
0
  }
446
0
  return s;
447
0
}
448
449
bool NFA::Search(absl::string_view text, absl::string_view context,
450
                 bool anchored, bool longest, absl::string_view* submatch,
451
1.46k
                 int nsubmatch) {
452
1.46k
  if (start_ == 0)
453
0
    return false;
454
455
1.46k
  if (context.data() == NULL)
456
0
    context = text;
457
458
  // Sanity check: make sure that text lies within context.
459
1.46k
  if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
460
0
    ABSL_LOG(DFATAL) << "context does not contain text";
461
0
    return false;
462
0
  }
463
464
1.46k
  if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text))
465
0
    return false;
466
1.46k
  if (prog_->anchor_end() && EndPtr(context) != EndPtr(text))
467
0
    return false;
468
1.46k
  anchored |= prog_->anchor_start();
469
1.46k
  if (prog_->anchor_end()) {
470
152
    longest = true;
471
152
    endmatch_ = true;
472
152
  }
473
474
1.46k
  if (nsubmatch < 0) {
475
0
    ABSL_LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
476
0
    return false;
477
0
  }
478
479
  // Save search parameters.
480
1.46k
  ncapture_ = 2*nsubmatch;
481
1.46k
  longest_ = longest;
482
483
1.46k
  if (nsubmatch == 0) {
484
    // We need to maintain match[0], both to distinguish the
485
    // longest match (if longest is true) and also to tell
486
    // whether we've seen any matches at all.
487
0
    ncapture_ = 2;
488
0
  }
489
490
1.46k
  match_ = new const char*[ncapture_];
491
1.46k
  memset(match_, 0, ncapture_*sizeof match_[0]);
492
1.46k
  matched_ = false;
493
494
  // For debugging prints.
495
1.46k
  btext_ = context.data();
496
  // For convenience.
497
1.46k
  etext_ = text.data() + text.size();
498
499
1.46k
  if (ExtraDebug)
500
0
    absl::FPrintF(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
501
0
                  text, context, anchored, longest);
502
503
  // Set up search.
504
1.46k
  Threadq* runq = &q0_;
505
1.46k
  Threadq* nextq = &q1_;
506
1.46k
  runq->clear();
507
1.46k
  nextq->clear();
508
509
  // Loop over the text, stepping the machine.
510
38.3k
  for (const char* p = text.data();; p++) {
511
38.3k
    if (ExtraDebug) {
512
0
      int c = 0;
513
0
      if (p == btext_)
514
0
        c = '^';
515
0
      else if (p > etext_)
516
0
        c = '$';
517
0
      else if (p < etext_)
518
0
        c = p[0] & 0xFF;
519
520
0
      absl::FPrintF(stderr, "%c:", c);
521
0
      for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
522
0
        Thread* t = i->value();
523
0
        if (t == NULL)
524
0
          continue;
525
0
        absl::FPrintF(stderr, " %d%s", i->index(), FormatCapture(t->capture));
526
0
      }
527
0
      absl::FPrintF(stderr, "\n");
528
0
    }
529
530
    // This is a no-op the first time around the loop because runq is empty.
531
38.3k
    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
532
38.3k
    ABSL_DCHECK_EQ(runq->size(), 0);
533
38.3k
    using std::swap;
534
38.3k
    swap(nextq, runq);
535
38.3k
    nextq->clear();
536
38.3k
    if (id != 0) {
537
      // We're done: full match ahead.
538
189
      p = etext_;
539
775
      for (;;) {
540
775
        Prog::Inst* ip = prog_->inst(id);
541
775
        switch (ip->opcode()) {
542
0
          default:
543
0
            ABSL_LOG(DFATAL) << "Unexpected opcode in short circuit: "
544
0
                             << ip->opcode();
545
0
            break;
546
547
468
          case kInstCapture:
548
468
            if (ip->cap() < ncapture_)
549
242
              match_[ip->cap()] = p;
550
468
            id = ip->out();
551
468
            continue;
552
553
118
          case kInstNop:
554
118
            id = ip->out();
555
118
            continue;
556
557
189
          case kInstMatch:
558
189
            match_[1] = p;
559
189
            matched_ = true;
560
189
            break;
561
775
        }
562
189
        break;
563
775
      }
564
189
      break;
565
189
    }
566
567
38.1k
    if (p > etext_)
568
1.27k
      break;
569
570
    // Start a new thread if there have not been any matches.
571
    // (No point in starting a new thread if there have been
572
    // matches, since it would be to the right of the match
573
    // we already found.)
574
36.8k
    if (!matched_ && (!anchored || p == text.data())) {
575
      // Try to use prefix accel (e.g. memchr) to skip ahead.
576
      // The search must be unanchored and there must be zero
577
      // possible matches already.
578
1.46k
      if (!anchored && runq->size() == 0 &&
579
0
          p < etext_ && prog_->can_prefix_accel()) {
580
0
        p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
581
0
        if (p == NULL)
582
0
          p = etext_;
583
0
      }
584
585
1.46k
      Thread* t = AllocThread();
586
1.46k
      CopyCapture(t->capture, match_);
587
1.46k
      t->capture[0] = p;
588
1.46k
      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
589
1.46k
                   t);
590
1.46k
      Decref(t);
591
1.46k
    }
592
593
    // If all the threads have died, stop early.
594
36.8k
    if (runq->size() == 0) {
595
0
      if (ExtraDebug)
596
0
        absl::FPrintF(stderr, "dead\n");
597
0
      break;
598
0
    }
599
600
    // Avoid invoking undefined behavior (arithmetic on a null pointer)
601
    // by simply not continuing the loop.
602
    // This complements the special case in NFA::Step().
603
36.8k
    if (p == NULL) {
604
0
      (void) Step(runq, nextq, -1, context, p);
605
0
      ABSL_DCHECK_EQ(runq->size(), 0);
606
0
      using std::swap;
607
0
      swap(nextq, runq);
608
0
      nextq->clear();
609
0
      break;
610
0
    }
611
36.8k
  }
612
613
1.46k
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
614
0
    if (i->value() != NULL)
615
0
      Decref(i->value());
616
0
  }
617
618
1.46k
  if (matched_) {
619
4.39k
    for (int i = 0; i < nsubmatch; i++)
620
2.93k
      submatch[i] = absl::string_view(
621
2.93k
          match_[2 * i],
622
2.93k
          static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
623
1.46k
    if (ExtraDebug)
624
0
      absl::FPrintF(stderr, "match (%d,%d)\n",
625
0
                    match_[0] - btext_,
626
0
                    match_[1] - btext_);
627
1.46k
    return true;
628
1.46k
  }
629
0
  return false;
630
1.46k
}
631
632
bool Prog::SearchNFA(absl::string_view text, absl::string_view context,
633
                     Anchor anchor, MatchKind kind, absl::string_view* match,
634
1.46k
                     int nmatch) {
635
1.46k
  if (ExtraDebug)
636
0
    Dump();
637
638
1.46k
  NFA nfa(this);
639
1.46k
  absl::string_view sp;
640
1.46k
  if (kind == kFullMatch) {
641
1.46k
    anchor = kAnchored;
642
1.46k
    if (nmatch == 0) {
643
0
      match = &sp;
644
0
      nmatch = 1;
645
0
    }
646
1.46k
  }
647
1.46k
  if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))
648
0
    return false;
649
1.46k
  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
650
0
    return false;
651
1.46k
  return true;
652
1.46k
}
653
654
// For each instruction i in the program reachable from the start, compute the
655
// number of instructions reachable from i by following only empty transitions
656
// and record that count as fanout[i].
657
//
658
// fanout holds the results and is also the work queue for the outer iteration.
659
// reachable holds the reached nodes for the inner iteration.
660
18.8k
void Prog::Fanout(SparseArray<int>* fanout) {
661
18.8k
  ABSL_DCHECK_EQ(fanout->max_size(), size());
662
18.8k
  SparseSet reachable(size());
663
18.8k
  fanout->clear();
664
18.8k
  fanout->set_new(start(), 0);
665
4.10M
  for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
666
4.08M
    int* count = &i->value();
667
4.08M
    reachable.clear();
668
4.08M
    reachable.insert(i->index());
669
66.9M
    for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
670
62.8M
      int id = *j;
671
62.8M
      Prog::Inst* ip = inst(id);
672
62.8M
      switch (ip->opcode()) {
673
0
        default:
674
0
          ABSL_LOG(DFATAL) << "unhandled " << ip->opcode()
675
0
                           << " in Prog::Fanout()";
676
0
          break;
677
678
26.0M
        case kInstByteRange:
679
26.0M
          if (!ip->last())
680
19.1M
            reachable.insert(id+1);
681
682
26.0M
          (*count)++;
683
26.0M
          if (!fanout->has_index(ip->out())) {
684
4.06M
            fanout->set_new(ip->out(), 0);
685
4.06M
          }
686
26.0M
          break;
687
688
4.75k
        case kInstAltMatch:
689
4.75k
          ABSL_DCHECK(!ip->last());
690
4.75k
          reachable.insert(id+1);
691
4.75k
          break;
692
693
1.79M
        case kInstCapture:
694
2.61M
        case kInstEmptyWidth:
695
36.7M
        case kInstNop:
696
36.7M
          if (!ip->last())
697
19.9M
            reachable.insert(id+1);
698
699
36.7M
          reachable.insert(ip->out());
700
36.7M
          break;
701
702
107k
        case kInstMatch:
703
107k
          if (!ip->last())
704
34.4k
            reachable.insert(id+1);
705
107k
          break;
706
707
3.70k
        case kInstFail:
708
3.70k
          break;
709
62.8M
      }
710
62.8M
    }
711
4.08M
  }
712
18.8k
}
713
714
}  // namespace re2