Coverage Report

Created: 2024-04-26 11:14

/src/re2/re2/nfa.cc
Line
Count
Source (jump to first uncovered line)
1
// Copyright 2006-2007 The RE2 Authors.  All Rights Reserved.
2
// Use of this source code is governed by a BSD-style
3
// license that can be found in the LICENSE file.
4
5
// Tested by search_test.cc.
6
//
7
// Prog::SearchNFA, an NFA search.
8
// This is an actual NFA like the theorists talk about,
9
// not the pseudo-NFA found in backtracking regexp implementations.
10
//
11
// IMPLEMENTATION
12
//
13
// This algorithm is a variant of one that appeared in Rob Pike's sam editor,
14
// which is a variant of the one described in Thompson's 1968 CACM paper.
15
// See http://swtch.com/~rsc/regexp/ for various history.  The main feature
16
// over the DFA implementation is that it tracks submatch boundaries.
17
//
18
// When the choice of submatch boundaries is ambiguous, this particular
19
// implementation makes the same choices that traditional backtracking
20
// implementations (in particular, Perl and PCRE) do.
21
// Note that unlike in Perl and PCRE, this algorithm *cannot* take exponential
22
// time in the length of the input.
23
//
24
// Like Thompson's original machine and like the DFA implementation, this
25
// implementation notices a match only once it is one byte past it.
26
27
#include <stdio.h>
28
#include <string.h>
29
#include <algorithm>
30
#include <deque>
31
#include <string>
32
#include <utility>
33
#include <vector>
34
35
#include "util/logging.h"
36
#include "util/strutil.h"
37
#include "re2/pod_array.h"
38
#include "re2/prog.h"
39
#include "re2/regexp.h"
40
#include "re2/sparse_array.h"
41
#include "re2/sparse_set.h"
42
43
namespace re2 {
44
45
static const bool ExtraDebug = false;
46
47
class NFA {
48
 public:
49
  NFA(Prog* prog);
50
  ~NFA();
51
52
  // Searches for a matching string.
53
  //   * If anchored is true, only considers matches starting at offset.
54
  //     Otherwise finds lefmost match at or after offset.
55
  //   * If longest is true, returns the longest match starting
56
  //     at the chosen start point.  Otherwise returns the so-called
57
  //     left-biased match, the one traditional backtracking engines
58
  //     (like Perl and PCRE) find.
59
  // Records submatch boundaries in submatch[1..nsubmatch-1].
60
  // Submatch[0] is the entire match.  When there is a choice in
61
  // which text matches each subexpression, the submatch boundaries
62
  // are chosen to match what a backtracking implementation would choose.
63
  bool Search(const StringPiece& text, const StringPiece& context,
64
              bool anchored, bool longest,
65
              StringPiece* submatch, int nsubmatch);
66
67
 private:
68
  struct Thread {
69
    union {
70
      int ref;
71
      Thread* next;  // when on free list
72
    };
73
    const char** capture;
74
  };
75
76
  // State for explicit stack in AddToThreadq.
77
  struct AddState {
78
    int id;     // Inst to process
79
    Thread* t;  // if not null, set t0 = t before processing id
80
  };
81
82
  // Threadq is a list of threads.  The list is sorted by the order
83
  // in which Perl would explore that particular state -- the earlier
84
  // choices appear earlier in the list.
85
  typedef SparseArray<Thread*> Threadq;
86
87
  inline Thread* AllocThread();
88
  inline Thread* Incref(Thread* t);
89
  inline void Decref(Thread* t);
90
91
  // Follows all empty arrows from id0 and enqueues all the states reached.
92
  // Enqueues only the ByteRange instructions that match byte c.
93
  // context is used (with p) for evaluating empty-width specials.
94
  // p is the current input position, and t0 is the current thread.
95
  void AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
96
                    const char* p, Thread* t0);
97
98
  // Run runq on byte c, appending new states to nextq.
99
  // Updates matched_ and match_ as new, better matches are found.
100
  // context is used (with p) for evaluating empty-width specials.
101
  // p is the position of byte c in the input string for AddToThreadq;
102
  // p-1 will be used when processing Match instructions.
103
  // Frees all the threads on runq.
104
  // If there is a shortcut to the end, returns that shortcut.
105
  int Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
106
           const char* p);
107
108
  // Returns text version of capture information, for debugging.
109
  std::string FormatCapture(const char** capture);
110
111
67.1k
  void CopyCapture(const char** dst, const char** src) {
112
67.1k
    memmove(dst, src, ncapture_*sizeof src[0]);
113
67.1k
  }
114
115
  Prog* prog_;                // underlying program
116
  int start_;                 // start instruction in program
117
  int ncapture_;              // number of submatches to track
118
  bool longest_;              // whether searching for longest match
119
  bool endmatch_;             // whether match must end at text.end()
120
  const char* btext_;         // beginning of text (for FormatSubmatch)
121
  const char* etext_;         // end of text (for endmatch_)
122
  Threadq q0_, q1_;           // pre-allocated for Search.
123
  PODArray<AddState> stack_;  // pre-allocated for AddToThreadq
124
  std::deque<Thread> arena_;  // thread arena
125
  Thread* freelist_;          // thread freelist
126
  const char** match_;        // best match so far
127
  bool matched_;              // any match so far?
128
129
  NFA(const NFA&) = delete;
130
  NFA& operator=(const NFA&) = delete;
131
};
132
133
17.5k
NFA::NFA(Prog* prog) {
134
17.5k
  prog_ = prog;
135
17.5k
  start_ = prog_->start();
136
17.5k
  ncapture_ = 0;
137
17.5k
  longest_ = false;
138
17.5k
  endmatch_ = false;
139
17.5k
  btext_ = NULL;
140
17.5k
  etext_ = NULL;
141
17.5k
  q0_.resize(prog_->size());
142
17.5k
  q1_.resize(prog_->size());
143
  // See NFA::AddToThreadq() for why this is so.
144
17.5k
  int nstack = 2*prog_->inst_count(kInstCapture) +
145
17.5k
               prog_->inst_count(kInstEmptyWidth) +
146
17.5k
               prog_->inst_count(kInstNop) + 1;  // + 1 for start inst
147
17.5k
  stack_ = PODArray<AddState>(nstack);
148
17.5k
  freelist_ = NULL;
149
17.5k
  match_ = NULL;
150
17.5k
  matched_ = false;
151
17.5k
}
152
153
17.5k
NFA::~NFA() {
154
17.5k
  delete[] match_;
155
17.5k
  for (const Thread& t : arena_)
156
17.5k
    delete[] t.capture;
157
17.5k
}
158
159
17.5k
NFA::Thread* NFA::AllocThread() {
160
17.5k
  Thread* t = freelist_;
161
17.5k
  if (t != NULL) {
162
0
    freelist_ = t->next;
163
0
    t->ref = 1;
164
    // We don't need to touch t->capture because
165
    // the caller will immediately overwrite it.
166
0
    return t;
167
0
  }
168
17.5k
  arena_.emplace_back();
169
17.5k
  t = &arena_.back();
170
17.5k
  t->ref = 1;
171
17.5k
  t->capture = new const char*[ncapture_];
172
17.5k
  return t;
173
17.5k
}
174
175
62.2M
NFA::Thread* NFA::Incref(Thread* t) {
176
62.2M
  DCHECK(t != NULL);
177
62.2M
  t->ref++;
178
62.2M
  return t;
179
62.2M
}
180
181
62.2M
void NFA::Decref(Thread* t) {
182
62.2M
  DCHECK(t != NULL);
183
62.2M
  t->ref--;
184
62.2M
  if (t->ref > 0)
185
62.2M
    return;
186
17.5k
  DCHECK_EQ(t->ref, 0);
187
17.5k
  t->next = freelist_;
188
17.5k
  freelist_ = t;
189
17.5k
}
190
191
// Follows all empty arrows from id0 and enqueues all the states reached.
192
// Enqueues only the ByteRange instructions that match byte c.
193
// context is used (with p) for evaluating empty-width specials.
194
// p is the current input position, and t0 is the current thread.
195
void NFA::AddToThreadq(Threadq* q, int id0, int c, const StringPiece& context,
196
61.9M
                       const char* p, Thread* t0) {
197
61.9M
  if (id0 == 0)
198
0
    return;
199
200
  // Use stack_ to hold our stack of instructions yet to process.
201
  // It was preallocated as follows:
202
  //   two entries per Capture;
203
  //   one entry per EmptyWidth; and
204
  //   one entry per Nop.
205
  // This reflects the maximum number of stack pushes that each can
206
  // perform. (Each instruction can be processed at most once.)
207
61.9M
  AddState* stk = stack_.data();
208
61.9M
  int nstk = 0;
209
210
61.9M
  stk[nstk++] = {id0, NULL};
211
236M
  while (nstk > 0) {
212
174M
    DCHECK_LE(nstk, stack_.size());
213
174M
    AddState a = stk[--nstk];
214
215
441M
  Loop:
216
441M
    if (a.t != NULL) {
217
      // t0 was a thread that we allocated and copied in order to
218
      // record the capture, so we must now decref it.
219
0
      Decref(t0);
220
0
      t0 = a.t;
221
0
    }
222
223
441M
    int id = a.id;
224
441M
    if (id == 0)
225
1.05k
      continue;
226
441M
    if (q->has_index(id)) {
227
106M
      if (ExtraDebug)
228
0
        fprintf(stderr, "  [%d%s]\n", id, FormatCapture(t0->capture).c_str());
229
106M
      continue;
230
106M
    }
231
232
    // Create entry in q no matter what.  We might fill it in below,
233
    // or we might not.  Even if not, it is necessary to have it,
234
    // so that we don't revisit id0 during the recursion.
235
334M
    q->set_new(id, NULL);
236
334M
    Thread** tp = &q->get_existing(id);
237
334M
    int j;
238
334M
    Thread* t;
239
334M
    Prog::Inst* ip = prog_->inst(id);
240
334M
    switch (ip->opcode()) {
241
0
    default:
242
0
      LOG(DFATAL) << "unhandled " << ip->opcode() << " in AddToThreadq";
243
0
      break;
244
245
0
    case kInstFail:
246
0
      break;
247
248
24.6k
    case kInstAltMatch:
249
      // Save state; will pick up at next byte.
250
24.6k
      t = Incref(t0);
251
24.6k
      *tp = t;
252
253
24.6k
      DCHECK(!ip->last());
254
24.6k
      a = {id+1, NULL};
255
24.6k
      goto Loop;
256
257
121M
    case kInstNop:
258
121M
      if (!ip->last())
259
105M
        stk[nstk++] = {id+1, NULL};
260
261
      // Continue on.
262
121M
      a = {ip->out(), NULL};
263
121M
      goto Loop;
264
265
43.4M
    case kInstCapture:
266
43.4M
      if (!ip->last())
267
2.07M
        stk[nstk++] = {id+1, NULL};
268
269
43.4M
      if ((j=ip->cap()) < ncapture_) {
270
        // Push a dummy whose only job is to restore t0
271
        // once we finish exploring this possibility.
272
0
        stk[nstk++] = {0, t0};
273
274
        // Record capture.
275
0
        t = AllocThread();
276
0
        CopyCapture(t->capture, t0->capture);
277
0
        t->capture[j] = p;
278
0
        t0 = t;
279
0
      }
280
43.4M
      a = {ip->out(), NULL};
281
43.4M
      goto Loop;
282
283
162M
    case kInstByteRange:
284
162M
      if (!ip->Matches(c))
285
100M
        goto Next;
286
287
      // Save state; will pick up at next byte.
288
61.9M
      t = Incref(t0);
289
61.9M
      *tp = t;
290
61.9M
      if (ExtraDebug)
291
0
        fprintf(stderr, " + %d%s\n", id, FormatCapture(t0->capture).c_str());
292
293
61.9M
      if (ip->hint() == 0)
294
28.7M
        break;
295
33.2M
      a = {id+ip->hint(), NULL};
296
33.2M
      goto Loop;
297
298
188k
    case kInstMatch:
299
      // Save state; will pick up at next byte.
300
188k
      t = Incref(t0);
301
188k
      *tp = t;
302
188k
      if (ExtraDebug)
303
0
        fprintf(stderr, " ! %d%s\n", id, FormatCapture(t0->capture).c_str());
304
305
100M
    Next:
306
100M
      if (ip->last())
307
34.3M
        break;
308
66.5M
      a = {id+1, NULL};
309
66.5M
      goto Loop;
310
311
6.69M
    case kInstEmptyWidth:
312
6.69M
      if (!ip->last())
313
5.20M
        stk[nstk++] = {id+1, NULL};
314
315
      // Continue on if we have all the right flag bits.
316
6.69M
      if (ip->empty() & ~Prog::EmptyFlags(context, p))
317
5.04M
        break;
318
1.64M
      a = {ip->out(), NULL};
319
1.64M
      goto Loop;
320
334M
    }
321
334M
  }
322
61.9M
}
323
324
// Run runq on byte c, appending new states to nextq.
325
// Updates matched_ and match_ as new, better matches are found.
326
// context is used (with p) for evaluating empty-width specials.
327
// p is the position of byte c in the input string for AddToThreadq;
328
// p-1 will be used when processing Match instructions.
329
// Frees all the threads on runq.
330
// If there is a shortcut to the end, returns that shortcut.
331
int NFA::Step(Threadq* runq, Threadq* nextq, int c, const StringPiece& context,
332
274k
              const char* p) {
333
274k
  nextq->clear();
334
335
333M
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
336
333M
    Thread* t = i->value();
337
333M
    if (t == NULL)
338
271M
      continue;
339
340
62.1M
    if (longest_) {
341
      // Can skip any threads started after our current best match.
342
62.1M
      if (matched_ && match_[0] < t->capture[0]) {
343
0
        Decref(t);
344
0
        continue;
345
0
      }
346
62.1M
    }
347
348
62.1M
    int id = i->index();
349
62.1M
    Prog::Inst* ip = prog_->inst(id);
350
351
62.1M
    switch (ip->opcode()) {
352
0
      default:
353
        // Should only see the values handled below.
354
0
        LOG(DFATAL) << "Unhandled " << ip->opcode() << " in step";
355
0
        break;
356
357
61.9M
      case kInstByteRange:
358
61.9M
        AddToThreadq(nextq, ip->out(), c, context, p, t);
359
61.9M
        break;
360
361
24.2k
      case kInstAltMatch:
362
24.2k
        if (i != runq->begin())
363
23.1k
          break;
364
        // The match is ours if we want it.
365
1.08k
        if (ip->greedy(prog_) || longest_) {
366
1.08k
          CopyCapture(match_, t->capture);
367
1.08k
          matched_ = true;
368
369
1.08k
          Decref(t);
370
871k
          for (++i; i != runq->end(); ++i) {
371
870k
            if (i->value() != NULL)
372
76.3k
              Decref(i->value());
373
870k
          }
374
1.08k
          runq->clear();
375
1.08k
          if (ip->greedy(prog_))
376
821
            return ip->out1();
377
260
          return ip->out();
378
1.08k
        }
379
0
        break;
380
381
187k
      case kInstMatch: {
382
        // Avoid invoking undefined behavior (arithmetic on a null pointer)
383
        // by storing p instead of p-1. (What would the latter even mean?!)
384
        // This complements the special case in NFA::Search().
385
187k
        if (p == NULL) {
386
0
          CopyCapture(match_, t->capture);
387
0
          match_[1] = p;
388
0
          matched_ = true;
389
0
          break;
390
0
        }
391
392
187k
        if (endmatch_ && p-1 != etext_)
393
56
          break;
394
395
187k
        if (longest_) {
396
          // Leftmost-longest mode: save this match only if
397
          // it is either farther to the left or at the same
398
          // point but longer than an existing match.
399
187k
          if (!matched_ || t->capture[0] < match_[0] ||
400
187k
              (t->capture[0] == match_[0] && p-1 > match_[1])) {
401
48.5k
            CopyCapture(match_, t->capture);
402
48.5k
            match_[1] = p-1;
403
48.5k
            matched_ = true;
404
48.5k
          }
405
187k
        } else {
406
          // Leftmost-biased mode: this match is by definition
407
          // better than what we've already found (see next line).
408
0
          CopyCapture(match_, t->capture);
409
0
          match_[1] = p-1;
410
0
          matched_ = true;
411
412
          // Cut off the threads that can only find matches
413
          // worse than the one we just found: don't run the
414
          // rest of the current Threadq.
415
0
          Decref(t);
416
0
          for (++i; i != runq->end(); ++i) {
417
0
            if (i->value() != NULL)
418
0
              Decref(i->value());
419
0
          }
420
0
          runq->clear();
421
0
          return 0;
422
0
        }
423
187k
        break;
424
187k
      }
425
62.1M
    }
426
62.1M
    Decref(t);
427
62.1M
  }
428
273k
  runq->clear();
429
273k
  return 0;
430
274k
}
431
432
0
std::string NFA::FormatCapture(const char** capture) {
433
0
  std::string s;
434
0
  for (int i = 0; i < ncapture_; i+=2) {
435
0
    if (capture[i] == NULL)
436
0
      s += "(?,?)";
437
0
    else if (capture[i+1] == NULL)
438
0
      s += StringPrintf("(%td,?)",
439
0
                        capture[i] - btext_);
440
0
    else
441
0
      s += StringPrintf("(%td,%td)",
442
0
                        capture[i] - btext_,
443
0
                        capture[i+1] - btext_);
444
0
  }
445
0
  return s;
446
0
}
447
448
bool NFA::Search(const StringPiece& text, const StringPiece& const_context,
449
            bool anchored, bool longest,
450
17.5k
            StringPiece* submatch, int nsubmatch) {
451
17.5k
  if (start_ == 0)
452
0
    return false;
453
454
17.5k
  StringPiece context = const_context;
455
17.5k
  if (context.data() == NULL)
456
0
    context = text;
457
458
  // Sanity check: make sure that text lies within context.
459
17.5k
  if (BeginPtr(text) < BeginPtr(context) || EndPtr(text) > EndPtr(context)) {
460
0
    LOG(DFATAL) << "context does not contain text";
461
0
    return false;
462
0
  }
463
464
17.5k
  if (prog_->anchor_start() && BeginPtr(context) != BeginPtr(text))
465
0
    return false;
466
17.5k
  if (prog_->anchor_end() && EndPtr(context) != EndPtr(text))
467
0
    return false;
468
17.5k
  anchored |= prog_->anchor_start();
469
17.5k
  if (prog_->anchor_end()) {
470
626
    longest = true;
471
626
    endmatch_ = true;
472
626
  }
473
474
17.5k
  if (nsubmatch < 0) {
475
0
    LOG(DFATAL) << "Bad args: nsubmatch=" << nsubmatch;
476
0
    return false;
477
0
  }
478
479
  // Save search parameters.
480
17.5k
  ncapture_ = 2*nsubmatch;
481
17.5k
  longest_ = longest;
482
483
17.5k
  if (nsubmatch == 0) {
484
    // We need to maintain match[0], both to distinguish the
485
    // longest match (if longest is true) and also to tell
486
    // whether we've seen any matches at all.
487
0
    ncapture_ = 2;
488
0
  }
489
490
17.5k
  match_ = new const char*[ncapture_];
491
17.5k
  memset(match_, 0, ncapture_*sizeof match_[0]);
492
17.5k
  matched_ = false;
493
494
  // For debugging prints.
495
17.5k
  btext_ = context.data();
496
  // For convenience.
497
17.5k
  etext_ = text.data() + text.size();
498
499
17.5k
  if (ExtraDebug)
500
0
    fprintf(stderr, "NFA::Search %s (context: %s) anchored=%d longest=%d\n",
501
0
            std::string(text).c_str(), std::string(context).c_str(), anchored, longest);
502
503
  // Set up search.
504
17.5k
  Threadq* runq = &q0_;
505
17.5k
  Threadq* nextq = &q1_;
506
17.5k
  runq->clear();
507
17.5k
  nextq->clear();
508
509
  // Loop over the text, stepping the machine.
510
274k
  for (const char* p = text.data();; p++) {
511
274k
    if (ExtraDebug) {
512
0
      int c = 0;
513
0
      if (p == btext_)
514
0
        c = '^';
515
0
      else if (p > etext_)
516
0
        c = '$';
517
0
      else if (p < etext_)
518
0
        c = p[0] & 0xFF;
519
520
0
      fprintf(stderr, "%c:", c);
521
0
      for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
522
0
        Thread* t = i->value();
523
0
        if (t == NULL)
524
0
          continue;
525
0
        fprintf(stderr, " %d%s", i->index(), FormatCapture(t->capture).c_str());
526
0
      }
527
0
      fprintf(stderr, "\n");
528
0
    }
529
530
    // This is a no-op the first time around the loop because runq is empty.
531
274k
    int id = Step(runq, nextq, p < etext_ ? p[0] & 0xFF : -1, context, p);
532
274k
    DCHECK_EQ(runq->size(), 0);
533
274k
    using std::swap;
534
274k
    swap(nextq, runq);
535
274k
    nextq->clear();
536
274k
    if (id != 0) {
537
      // We're done: full match ahead.
538
1.08k
      p = etext_;
539
2.13k
      for (;;) {
540
2.13k
        Prog::Inst* ip = prog_->inst(id);
541
2.13k
        switch (ip->opcode()) {
542
0
          default:
543
0
            LOG(DFATAL) << "Unexpected opcode in short circuit: " << ip->opcode();
544
0
            break;
545
546
87
          case kInstCapture:
547
87
            if (ip->cap() < ncapture_)
548
0
              match_[ip->cap()] = p;
549
87
            id = ip->out();
550
87
            continue;
551
552
962
          case kInstNop:
553
962
            id = ip->out();
554
962
            continue;
555
556
1.08k
          case kInstMatch:
557
1.08k
            match_[1] = p;
558
1.08k
            matched_ = true;
559
1.08k
            break;
560
2.13k
        }
561
1.08k
        break;
562
2.13k
      }
563
1.08k
      break;
564
1.08k
    }
565
566
273k
    if (p > etext_)
567
3.37k
      break;
568
569
    // Start a new thread if there have not been any matches.
570
    // (No point in starting a new thread if there have been
571
    // matches, since it would be to the right of the match
572
    // we already found.)
573
269k
    if (!matched_ && (!anchored || p == text.data())) {
574
      // Try to use prefix accel (e.g. memchr) to skip ahead.
575
      // The search must be unanchored and there must be zero
576
      // possible matches already.
577
17.5k
      if (!anchored && runq->size() == 0 &&
578
17.5k
          p < etext_ && prog_->can_prefix_accel()) {
579
0
        p = reinterpret_cast<const char*>(prog_->PrefixAccel(p, etext_ - p));
580
0
        if (p == NULL)
581
0
          p = etext_;
582
0
      }
583
584
17.5k
      Thread* t = AllocThread();
585
17.5k
      CopyCapture(t->capture, match_);
586
17.5k
      t->capture[0] = p;
587
17.5k
      AddToThreadq(runq, start_, p < etext_ ? p[0] & 0xFF : -1, context, p,
588
17.5k
                   t);
589
17.5k
      Decref(t);
590
17.5k
    }
591
592
    // If all the threads have died, stop early.
593
269k
    if (runq->size() == 0) {
594
13.0k
      if (ExtraDebug)
595
0
        fprintf(stderr, "dead\n");
596
13.0k
      break;
597
13.0k
    }
598
599
    // Avoid invoking undefined behavior (arithmetic on a null pointer)
600
    // by simply not continuing the loop.
601
    // This complements the special case in NFA::Step().
602
256k
    if (p == NULL) {
603
0
      (void) Step(runq, nextq, -1, context, p);
604
0
      DCHECK_EQ(runq->size(), 0);
605
0
      using std::swap;
606
0
      swap(nextq, runq);
607
0
      nextq->clear();
608
0
      break;
609
0
    }
610
256k
  }
611
612
17.5k
  for (Threadq::iterator i = runq->begin(); i != runq->end(); ++i) {
613
0
    if (i->value() != NULL)
614
0
      Decref(i->value());
615
0
  }
616
617
17.5k
  if (matched_) {
618
15.8k
    for (int i = 0; i < nsubmatch; i++)
619
7.94k
      submatch[i] =
620
7.94k
          StringPiece(match_[2 * i],
621
7.94k
                      static_cast<size_t>(match_[2 * i + 1] - match_[2 * i]));
622
7.94k
    if (ExtraDebug)
623
0
      fprintf(stderr, "match (%td,%td)\n",
624
0
              match_[0] - btext_,
625
0
              match_[1] - btext_);
626
7.94k
    return true;
627
7.94k
  }
628
9.56k
  return false;
629
17.5k
}
630
631
bool
632
Prog::SearchNFA(const StringPiece& text, const StringPiece& context,
633
                Anchor anchor, MatchKind kind,
634
17.5k
                StringPiece* match, int nmatch) {
635
17.5k
  if (ExtraDebug)
636
0
    Dump();
637
638
17.5k
  NFA nfa(this);
639
17.5k
  StringPiece sp;
640
17.5k
  if (kind == kFullMatch) {
641
17.5k
    anchor = kAnchored;
642
17.5k
    if (nmatch == 0) {
643
17.5k
      match = &sp;
644
17.5k
      nmatch = 1;
645
17.5k
    }
646
17.5k
  }
647
17.5k
  if (!nfa.Search(text, context, anchor == kAnchored, kind != kFirstMatch, match, nmatch))
648
9.56k
    return false;
649
7.94k
  if (kind == kFullMatch && EndPtr(match[0]) != EndPtr(text))
650
5.60k
    return false;
651
2.33k
  return true;
652
7.94k
}
653
654
// For each instruction i in the program reachable from the start, compute the
655
// number of instructions reachable from i by following only empty transitions
656
// and record that count as fanout[i].
657
//
658
// fanout holds the results and is also the work queue for the outer iteration.
659
// reachable holds the reached nodes for the inner iteration.
660
0
void Prog::Fanout(SparseArray<int>* fanout) {
661
0
  DCHECK_EQ(fanout->max_size(), size());
662
0
  SparseSet reachable(size());
663
0
  fanout->clear();
664
0
  fanout->set_new(start(), 0);
665
0
  for (SparseArray<int>::iterator i = fanout->begin(); i != fanout->end(); ++i) {
666
0
    int* count = &i->value();
667
0
    reachable.clear();
668
0
    reachable.insert(i->index());
669
0
    for (SparseSet::iterator j = reachable.begin(); j != reachable.end(); ++j) {
670
0
      int id = *j;
671
0
      Prog::Inst* ip = inst(id);
672
0
      switch (ip->opcode()) {
673
0
        default:
674
0
          LOG(DFATAL) << "unhandled " << ip->opcode() << " in Prog::Fanout()";
675
0
          break;
676
677
0
        case kInstByteRange:
678
0
          if (!ip->last())
679
0
            reachable.insert(id+1);
680
681
0
          (*count)++;
682
0
          if (!fanout->has_index(ip->out())) {
683
0
            fanout->set_new(ip->out(), 0);
684
0
          }
685
0
          break;
686
687
0
        case kInstAltMatch:
688
0
          DCHECK(!ip->last());
689
0
          reachable.insert(id+1);
690
0
          break;
691
692
0
        case kInstCapture:
693
0
        case kInstEmptyWidth:
694
0
        case kInstNop:
695
0
          if (!ip->last())
696
0
            reachable.insert(id+1);
697
698
0
          reachable.insert(ip->out());
699
0
          break;
700
701
0
        case kInstMatch:
702
0
          if (!ip->last())
703
0
            reachable.insert(id+1);
704
0
          break;
705
706
0
        case kInstFail:
707
0
          break;
708
0
      }
709
0
    }
710
0
  }
711
0
}
712
713
}  // namespace re2