Coverage Report

Created: 2026-06-15 07:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/CMake/Source/kwsys/RegularExpression.cxx
Line
Count
Source
1
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2
   file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3
//
4
// Copyright (C) 1991 Texas Instruments Incorporated.
5
//
6
// Permission is granted to any individual or institution to use, copy, modify
7
// and distribute this software, provided that this complete copyright and
8
// permission notice is maintained, intact, in all copies and supporting
9
// documentation.
10
//
11
// Texas Instruments Incorporated provides this software "as is" without
12
// express or implied warranty.
13
//
14
//
15
// Created: MNF 06/13/89  Initial Design and Implementation
16
// Updated: LGO 08/09/89  Inherit from Generic
17
// Updated: MBN 09/07/89  Added conditional exception handling
18
// Updated: MBN 12/15/89  Sprinkled "const" qualifiers all over the place!
19
// Updated: DLS 03/22/91  New lite version
20
//
21
22
#include "kwsysPrivate.h"
23
#include KWSYS_HEADER(RegularExpression.hxx)
24
25
// Work-around CMake dependency scanning limitation.  This must
26
// duplicate the above list of headers.
27
#if 0
28
#  include "RegularExpression.hxx.in"
29
#endif
30
31
#include <cstdio>
32
#include <cstring>
33
34
namespace KWSYS_NAMESPACE {
35
36
// RegularExpression -- Copies the given regular expression.
37
RegularExpression::RegularExpression(RegularExpression const& rxp)
38
0
{
39
0
  if (!rxp.program) {
40
0
    this->program = nullptr;
41
0
    return;
42
0
  }
43
0
  int ind;
44
0
  this->progsize = rxp.progsize;            // Copy regular expression size
45
0
  this->program = new char[this->progsize]; // Allocate storage
46
0
  for (ind = this->progsize; ind-- != 0;)   // Copy regular expression
47
0
    this->program[ind] = rxp.program[ind];
48
  // Copy pointers into last successful "find" operation
49
0
  this->regmatch = rxp.regmatch;
50
0
  this->regmust = rxp.regmust; // Copy field
51
0
  if (rxp.regmust) {
52
0
    char* dum = rxp.program;
53
0
    ind = 0;
54
0
    while (dum != rxp.regmust) {
55
0
      ++dum;
56
0
      ++ind;
57
0
    }
58
0
    this->regmust = this->program + ind;
59
0
  }
60
0
  this->regstart = rxp.regstart; // Copy starting index
61
0
  this->reganch = rxp.reganch;   // Copy remaining private data
62
0
  this->regmlen = rxp.regmlen;   // Copy remaining private data
63
0
  this->regnpar = rxp.regnpar;
64
0
}
65
66
// operator= -- Copies the given regular expression.
67
RegularExpression& RegularExpression::operator=(RegularExpression const& rxp)
68
0
{
69
0
  if (this == &rxp) {
70
0
    return *this;
71
0
  }
72
0
  if (!rxp.program) {
73
0
    this->program = nullptr;
74
0
    return *this;
75
0
  }
76
0
  int ind;
77
0
  this->progsize = rxp.progsize; // Copy regular expression size
78
0
  delete[] this->program;
79
0
  this->program = new char[this->progsize]; // Allocate storage
80
0
  for (ind = this->progsize; ind-- != 0;)   // Copy regular expression
81
0
    this->program[ind] = rxp.program[ind];
82
  // Copy pointers into last successful "find" operation
83
0
  this->regmatch = rxp.regmatch;
84
0
  this->regmust = rxp.regmust; // Copy field
85
0
  if (rxp.regmust) {
86
0
    char* dum = rxp.program;
87
0
    ind = 0;
88
0
    while (dum != rxp.regmust) {
89
0
      ++dum;
90
0
      ++ind;
91
0
    }
92
0
    this->regmust = this->program + ind;
93
0
  }
94
0
  this->regstart = rxp.regstart; // Copy starting index
95
0
  this->reganch = rxp.reganch;   // Copy remaining private data
96
0
  this->regmlen = rxp.regmlen;   // Copy remaining private data
97
0
  this->regnpar = rxp.regnpar;
98
99
0
  return *this;
100
0
}
101
102
// operator== -- Returns true if two regular expressions have the same
103
// compiled program for pattern matching.
104
bool RegularExpression::operator==(RegularExpression const& rxp) const
105
0
{
106
0
  if (this != &rxp) {         // Same address?
107
0
    int ind = this->progsize; // Get regular expression size
108
0
    if (ind != rxp.progsize)  // If different size regexp
109
0
      return false;           // Return failure
110
0
    while (ind-- != 0)        // Else while still characters
111
0
      if (this->program[ind] != rxp.program[ind]) // If regexp are different
112
0
        return false;                             // Return failure
113
0
  }
114
0
  return true; // Else same, return success
115
0
}
116
117
// deep_equal -- Returns true if have the same compiled regular expressions
118
// and the same start and end pointers.
119
120
bool RegularExpression::deep_equal(RegularExpression const& rxp) const
121
0
{
122
0
  int ind = this->progsize;                     // Get regular expression size
123
0
  if (ind != rxp.progsize)                      // If different size regexp
124
0
    return false;                               // Return failure
125
0
  while (ind-- != 0)                            // Else while still characters
126
0
    if (this->program[ind] != rxp.program[ind]) // If regexp are different
127
0
      return false;                             // Return failure
128
  // Else if same start/end ptrs, return true
129
0
  return (this->regmatch.start() == rxp.regmatch.start() &&
130
0
          this->regmatch.end() == rxp.regmatch.end());
131
0
}
132
133
// The remaining code in this file is derived from the regular expression code
134
// whose copyright statement appears below.  It has been changed to work
135
// with the class concepts of C++ and COOL.
136
137
/*
138
 * compile and find
139
 *
140
 *      Copyright (c) 1986 by University of Toronto.
141
 *      Written by Henry Spencer.  Not derived from licensed software.
142
 *
143
 *      Permission is granted to anyone to use this software for any
144
 *      purpose on any computer system, and to redistribute it freely,
145
 *      subject to the following restrictions:
146
 *
147
 *      1. The author is not responsible for the consequences of use of
148
 *              this software, no matter how awful, even if they arise
149
 *              from defects in it.
150
 *
151
 *      2. The origin of this software must not be misrepresented, either
152
 *              by explicit claim or by omission.
153
 *
154
 *      3. Altered versions must be plainly marked as such, and must not
155
 *              be misrepresented as being the original software.
156
 *
157
 * Beware that some of this code is subtly aware of the way operator
158
 * precedence is structured in regular expressions.  Serious changes in
159
 * regular-expression syntax might require a total rethink.
160
 */
161
162
/*
163
 * The "internal use only" fields in regexp.h are present to pass info from
164
 * compile to execute that permits the execute phase to run lots faster on
165
 * simple cases.  They are:
166
 *
167
 * regstart     char that must begin a match; '\0' if none obvious
168
 * reganch      is the match anchored (at beginning-of-line only)?
169
 * regmust      string (pointer into program) that match must include, or
170
 * nullptr regmlen      length of regmust string
171
 *
172
 * Regstart and reganch permit very fast decisions on suitable starting points
173
 * for a match, cutting down the work a lot.  Regmust permits fast rejection
174
 * of lines that cannot possibly match.  The regmust tests are costly enough
175
 * that compile() supplies a regmust only if the r.e. contains something
176
 * potentially expensive (at present, the only such thing detected is * or +
177
 * at the start of the r.e., which can involve a lot of backup).  Regmlen is
178
 * supplied because the test in find() needs it and compile() is computing
179
 * it anyway.
180
 */
181
182
/*
183
 * Structure for regexp "program".  This is essentially a linear encoding
184
 * of a nondeterministic finite-state machine (aka syntax charts or
185
 * "railroad normal form" in parsing technology).  Each node is an opcode
186
 * plus a "next" pointer, possibly plus an operand.  "Next" pointers of
187
 * all nodes except BRANCH implement concatenation; a "next" pointer with
188
 * a BRANCH on both ends of it is connecting two alternatives.  (Here we
189
 * have one of the subtle syntax dependencies:  an individual BRANCH (as
190
 * opposed to a collection of them) is never concatenated with anything
191
 * because of operator precedence.)  The operand of some types of node is
192
 * a literal string; for others, it is a node leading into a sub-FSM.  In
193
 * particular, the operand of a BRANCH node is the first node of the branch.
194
 * (NB this is *not* a tree structure:  the tail of the branch connects
195
 * to the thing following the set of BRANCHes.)  The opcodes are:
196
 */
197
198
// definition   number  opnd?   meaning
199
26.4k
#define END 0   // no   End of program.
200
49.1M
#define BOL 1   // no   Match "" at beginning of line.
201
101M
#define EOL 2   // no   Match "" at end of line.
202
27.8M
#define ANY 3   // no   Match any one character.
203
14.5M
#define ANYOF 4 // str  Match any character in this string.
204
#define ANYBUT                                                                \
205
38.1k
  5 // str  Match any character not in this
206
    // string.
207
#define BRANCH                                                                \
208
622M
  6               // node Match this alternative, or the
209
                  // next...
210
1.13G
#define BACK 7    // no   Match "", "next" ptr points backward.
211
103M
#define EXACTLY 8 // str  Match this string.
212
144M
#define NOTHING 9 // no   Match empty string.
213
#define STAR                                                                  \
214
9.60M
  10 // node Match this (simple) thing 0 or more
215
     // times.
216
#define PLUS                                                                  \
217
9.25M
  11 // node Match this (simple) thing 1 or more
218
     // times.
219
#define OPEN                                                                  \
220
1.67G
  20 // no   Mark this point in input as start of
221
     // #n.
222
// OPEN+1 is number 1, etc.
223
1.86G
#define CLOSE 52 // no   Analogous to OPEN.
224
225
/*
226
 * Opcode notes:
227
 *
228
 * BRANCH       The set of branches constituting a single choice are hooked
229
 *              together with their "next" pointers, since precedence prevents
230
 *              anything being concatenated to any individual branch.  The
231
 *              "next" pointer of the last BRANCH in a choice points to the
232
 *              thing following the whole choice.  This is also where the
233
 *              final "next" pointer of each individual branch points; each
234
 *              branch starts with the operand node of a BRANCH node.
235
 *
236
 * BACK         Normal "next" pointers all implicitly point forward; BACK
237
 *              exists to make loop structures possible.
238
 *
239
 * STAR,PLUS    '?', and complex '*' and '+', are implemented as circular
240
 *              BRANCH structures using BACK.  Simple cases (one character
241
 *              per match) are implemented with STAR and PLUS for speed
242
 *              and to minimize recursive plunges.
243
 *
244
 * OPEN,CLOSE   ...are numbered at compile time.
245
 */
246
247
/*
248
 * A node is one char of opcode followed by two chars of "next" pointer.
249
 * "Next" pointers are stored as two 8-bit pieces, high order first.  The
250
 * value is a positive offset from the opcode of the node containing it.
251
 * An operand, if any, simply follows the node.  (Note that much of the
252
 * code generation knows about this implicit relationship.)
253
 *
254
 * Using two bytes for the "next" pointer is vast overkill for most things,
255
 * but allows patterns to get big without disasters.
256
 */
257
258
2.68G
#define OP(p) (*(p))
259
1.13G
#define NEXT(p) (((*((p) + 1) & 0377) << 8) + (*((p) + 2) & 0377))
260
459M
#define OPERAND(p) ((p) + 3)
261
262
unsigned char const MAGIC = 0234;
263
/*
264
 * Utility definitions.
265
 */
266
267
82.0k
#define UCHARAT(p) (reinterpret_cast<const unsigned char*>(p))[0]
268
269
93.9k
#define ISMULT(c) ((c) == '*' || (c) == '+' || (c) == '?')
270
23.2k
#define META "^$.[()|?+*\\"
271
272
/*
273
 * Flags to be passed up and down.
274
 */
275
213k
#define HASWIDTH 01 // Known never to match null string.
276
47.9k
#define SIMPLE 02   // Simple enough to be STAR/PLUS operand.
277
73.7k
#define SPSTART 04  // Starts with * or +.
278
114k
#define WORST 0     // Worst case.
279
280
/////////////////////////////////////////////////////////////////////////
281
//
282
//  COMPILE AND ASSOCIATED FUNCTIONS
283
//
284
/////////////////////////////////////////////////////////////////////////
285
286
/*
287
 * Read only utility variables.
288
 */
289
static char regdummy;
290
static char* const regdummyptr = &regdummy;
291
292
/*
293
 * Utility class for RegularExpression::compile().
294
 */
295
class RegExpCompile
296
{
297
public:
298
  char const* regparse; // Input-scan pointer.
299
  int regnpar;          // () count.
300
  char* regcode;        // Code-emit pointer; regdummyptr = don't.
301
  long regsize;         // Code size.
302
303
  char* reg(int, int*);
304
  char* regbranch(int*);
305
  char* regpiece(int*);
306
  char* regatom(int*);
307
  char* regnode(char);
308
  void regc(char);
309
  void reginsert(char, char*);
310
  static void regtail(char*, char const*);
311
  static void regoptail(char*, char const*);
312
};
313
314
static char const* regnext(char const*);
315
static char* regnext(char*);
316
317
#ifdef STRCSPN
318
static int strcspn();
319
#endif
320
321
/*
322
 * We can't allocate space until we know how big the compiled form will be,
323
 * but we can't compile it (and thus know how big it is) until we've got a
324
 * place to put the code.  So we cheat:  we compile it twice, once with code
325
 * generation turned off and size counting turned on, and once "for real".
326
 * This also means that we don't allocate space until we are sure that the
327
 * thing really will compile successfully, and we never have to move the
328
 * code and thus invalidate pointers into it.  (Note that it has to be in
329
 * one piece because free() must be able to free it all.)
330
 *
331
 * Beware that the optimization-preparation code in here knows about some
332
 * of the structure of the compiled regexp.
333
 */
334
335
// compile -- compile a regular expression into internal code
336
// for later pattern matching.
337
338
bool RegularExpression::compile(char const* exp)
339
2.02k
{
340
2.02k
  char const* scan;
341
2.02k
  char const* longest;
342
2.02k
  int flags;
343
344
2.02k
  if (!exp) {
345
    // RAISE Error, SYM(RegularExpression), SYM(No_Expr),
346
0
    printf("RegularExpression::compile(): No expression supplied.\n");
347
0
    return false;
348
0
  }
349
350
  // First pass: determine size, legality.
351
2.02k
  RegExpCompile comp;
352
2.02k
  comp.regparse = exp;
353
2.02k
  comp.regnpar = 1;
354
2.02k
  comp.regsize = 0L;
355
2.02k
  comp.regcode = regdummyptr;
356
2.02k
  comp.regc(static_cast<char>(MAGIC));
357
2.02k
  if (!comp.reg(0, &flags)) {
358
178
    printf("RegularExpression::compile(): Error in compile.\n");
359
178
    return false;
360
178
  }
361
1.84k
  this->regmatch.clear();
362
363
  // Small enough for pointer-storage convention?
364
1.84k
  if (comp.regsize >= 65535L) {
365
    // RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big),
366
3
    printf("RegularExpression::compile(): Expression too big.\n");
367
3
    return false;
368
3
  }
369
370
  // Allocate space.
371
  // #ifndef _WIN32
372
1.84k
  delete[] this->program;
373
  // #endif
374
1.84k
  this->program = new char[comp.regsize];
375
1.84k
  this->progsize = static_cast<int>(comp.regsize);
376
1.84k
  this->regnpar = comp.regnpar;
377
378
1.84k
  if (!this->program) {
379
    // RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory),
380
0
    printf("RegularExpression::compile(): Out of memory.\n");
381
0
    return false;
382
0
  }
383
384
#ifdef __clang_analyzer__ /* Convince it that the program is initialized.  */
385
  memset(this->program, 0, comp.regsize);
386
#endif
387
388
  // Second pass: emit code.
389
1.84k
  comp.regparse = exp;
390
1.84k
  comp.regnpar = 1;
391
1.84k
  comp.regcode = this->program;
392
1.84k
  comp.regc(static_cast<char>(MAGIC));
393
1.84k
  comp.reg(0, &flags);
394
395
  // Dig out information for optimizations.
396
1.84k
  this->regstart = '\0'; // Worst-case defaults.
397
1.84k
  this->reganch = 0;
398
1.84k
  this->regmust = nullptr;
399
1.84k
  this->regmlen = 0;
400
1.84k
  scan = this->program + 1;       // First BRANCH.
401
1.84k
  if (OP(regnext(scan)) == END) { // Only one top-level choice.
402
1.72k
    scan = OPERAND(scan);
403
404
    // Starting-point info.
405
1.72k
    if (OP(scan) == EXACTLY)
406
322
      this->regstart = *OPERAND(scan);
407
1.39k
    else if (OP(scan) == BOL)
408
39
      this->reganch++;
409
410
    //
411
    // If there's something expensive in the r.e., find the longest
412
    // literal string that must appear and make it the regmust.  Resolve
413
    // ties in favor of later strings, since the regstart check works
414
    // with the beginning of the r.e. and avoiding duplication
415
    // strengthens checking.  Not a strong reason, but sufficient in the
416
    // absence of others.
417
    //
418
1.72k
    if (flags & SPSTART) {
419
674
      longest = nullptr;
420
674
      size_t len = 0;
421
9.99k
      for (; scan; scan = regnext(scan))
422
9.31k
        if (OP(scan) == EXACTLY && strlen(OPERAND(scan)) >= len) {
423
881
          longest = OPERAND(scan);
424
881
          len = strlen(OPERAND(scan));
425
881
        }
426
674
      this->regmust = longest;
427
674
      this->regmlen = len;
428
674
    }
429
1.72k
  }
430
1.84k
  return true;
431
1.84k
}
432
433
/*
434
 - reg - regular expression, i.e. main body or parenthesized thing
435
 *
436
 * Caller must absorb opening parenthesis.
437
 *
438
 * Combining parenthesis handling with the base level of regular expression
439
 * is a trifle forced, but the need to tie the tails of the branches to what
440
 * follows makes it hard to avoid.
441
 */
442
char* RegExpCompile::reg(int paren, int* flagp)
443
18.7k
{
444
18.7k
  char* ret;
445
18.7k
  char* br;
446
18.7k
  char* ender;
447
18.7k
  int parno = 0;
448
18.7k
  int flags;
449
450
18.7k
  *flagp = HASWIDTH; // Tentatively.
451
452
  // Make an OPEN node, if parenthesized.
453
18.7k
  if (paren) {
454
14.9k
    if (regnpar >= RegularExpressionMatch::NSUBEXP) {
455
      // RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
456
4
      printf("RegularExpression::compile(): Too many parentheses.\n");
457
4
      return nullptr;
458
4
    }
459
14.8k
    parno = regnpar;
460
14.8k
    regnpar++;
461
14.8k
    ret = regnode(static_cast<char>(OPEN + parno));
462
14.8k
  } else
463
3.86k
    ret = nullptr;
464
465
  // Pick up the branches, linking them together.
466
18.7k
  br = regbranch(&flags);
467
18.7k
  if (!br)
468
355
    return (nullptr);
469
18.4k
  if (ret)
470
14.6k
    regtail(ret, br); // OPEN -> first.
471
3.72k
  else
472
3.72k
    ret = br;
473
18.4k
  if (!(flags & HASWIDTH))
474
11.7k
    *flagp &= ~HASWIDTH;
475
18.4k
  *flagp |= flags & SPSTART;
476
28.0k
  while (*regparse == '|') {
477
9.74k
    regparse++;
478
9.74k
    br = regbranch(&flags);
479
9.74k
    if (!br)
480
101
      return (nullptr);
481
9.64k
    regtail(ret, br); // BRANCH -> BRANCH.
482
9.64k
    if (!(flags & HASWIDTH))
483
5.34k
      *flagp &= ~HASWIDTH;
484
9.64k
    *flagp |= flags & SPSTART;
485
9.64k
  }
486
487
  // Make a closing node, and hook it on the end.
488
18.3k
  ender = regnode(static_cast<char>((paren) ? CLOSE + parno : END));
489
18.3k
  regtail(ret, ender);
490
491
  // Hook the tails of the branches to the closing node.
492
57.6k
  for (br = ret; br; br = regnext(br))
493
39.3k
    regoptail(br, ender);
494
495
  // Check for proper termination.
496
18.3k
  if (paren && *regparse++ != ')') {
497
    // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens),
498
43
    printf("RegularExpression::compile(): Unmatched parentheses.\n");
499
43
    return nullptr;
500
18.2k
  } else if (!paren && *regparse != '\0') {
501
17
    if (*regparse == ')') {
502
      // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Parens),
503
17
      printf("RegularExpression::compile(): Unmatched parentheses.\n");
504
17
      return nullptr;
505
17
    } else {
506
      // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
507
0
      printf("RegularExpression::compile(): Internal error.\n");
508
0
      return nullptr;
509
0
    }
510
    // NOTREACHED
511
17
  }
512
18.2k
  return (ret);
513
18.3k
}
514
515
/*
516
 - regbranch - one alternative of an | operator
517
 *
518
 * Implements the concatenation operator.
519
 */
520
char* RegExpCompile::regbranch(int* flagp)
521
28.5k
{
522
28.5k
  char* ret;
523
28.5k
  char* chain;
524
28.5k
  char* latest;
525
28.5k
  int flags;
526
527
28.5k
  *flagp = WORST; // Tentatively.
528
529
28.5k
  ret = regnode(BRANCH);
530
28.5k
  chain = nullptr;
531
95.2k
  while (*regparse != '\0' && *regparse != '|' && *regparse != ')') {
532
67.1k
    latest = regpiece(&flags);
533
67.1k
    if (!latest)
534
456
      return (nullptr);
535
66.7k
    *flagp |= flags & HASWIDTH;
536
66.7k
    if (!chain) // First piece.
537
15.7k
      *flagp |= flags & SPSTART;
538
50.9k
    else
539
50.9k
      regtail(chain, latest);
540
66.7k
    chain = latest;
541
66.7k
  }
542
28.0k
  if (!chain) // Loop ran zero times.
543
12.3k
    regnode(NOTHING);
544
545
28.0k
  return (ret);
546
28.5k
}
547
548
/*
549
 - regpiece - something followed by possible [*+?]
550
 *
551
 * Note that the branching code sequences used for ? and the general cases
552
 * of * and + are somewhat optimized:  they use the same NOTHING node as
553
 * both the endmarker for their branch list and the body of the last branch.
554
 * It might seem that this node could be dispensed with entirely, but the
555
 * endmarker role is not redundant.
556
 */
557
char* RegExpCompile::regpiece(int* flagp)
558
67.1k
{
559
67.1k
  char* ret;
560
67.1k
  char op;
561
67.1k
  char* next;
562
67.1k
  int flags;
563
564
67.1k
  ret = regatom(&flags);
565
67.1k
  if (!ret)
566
441
    return (nullptr);
567
568
66.7k
  op = *regparse;
569
66.7k
  if (!ISMULT(op)) {
570
47.6k
    *flagp = flags;
571
47.6k
    return (ret);
572
47.6k
  }
573
574
19.1k
  if (!(flags & HASWIDTH) && op != '?') {
575
    // RAISE Error, SYM(RegularExpression), SYM(Empty_Operand),
576
4
    printf("RegularExpression::compile() : *+ operand could be empty.\n");
577
4
    return nullptr;
578
4
  }
579
19.1k
  *flagp = (op != '+') ? (WORST | SPSTART) : (WORST | HASWIDTH);
580
581
19.1k
  if (op == '*' && (flags & SIMPLE))
582
5.25k
    reginsert(STAR, ret);
583
13.8k
  else if (op == '*') {
584
    // Emit x* as (x&|), where & means "self".
585
1.05k
    reginsert(BRANCH, ret);         // Either x
586
1.05k
    regoptail(ret, regnode(BACK));  // and loop
587
1.05k
    regoptail(ret, ret);            // back
588
1.05k
    regtail(ret, regnode(BRANCH));  // or
589
1.05k
    regtail(ret, regnode(NOTHING)); // null.
590
12.7k
  } else if (op == '+' && (flags & SIMPLE))
591
4.81k
    reginsert(PLUS, ret);
592
7.97k
  else if (op == '+') {
593
    // Emit x+ as x(&|), where & means "self".
594
605
    next = regnode(BRANCH); // Either
595
605
    regtail(ret, next);
596
605
    regtail(regnode(BACK), ret);    // loop back
597
605
    regtail(next, regnode(BRANCH)); // or
598
605
    regtail(ret, regnode(NOTHING)); // null.
599
7.37k
  } else if (op == '?') {
600
    // Emit x? as (x|)
601
7.37k
    reginsert(BRANCH, ret);        // Either x
602
7.37k
    regtail(ret, regnode(BRANCH)); // or
603
7.37k
    next = regnode(NOTHING);       // null.
604
7.37k
    regtail(ret, next);
605
7.37k
    regoptail(ret, next);
606
7.37k
  }
607
19.1k
  regparse++;
608
19.1k
  if (ISMULT(*regparse)) {
609
    // RAISE Error, SYM(RegularExpression), SYM(Nested_Operand),
610
11
    printf("RegularExpression::compile(): Nested *?+.\n");
611
11
    return nullptr;
612
11
  }
613
19.0k
  return (ret);
614
19.1k
}
615
616
/*
617
 - regatom - the lowest level
618
 *
619
 * Optimization:  gobbles an entire sequence of ordinary characters so that
620
 * it can turn them into a single node, which is smaller to store and
621
 * faster to run.  Backslashed characters are exceptions, each becoming a
622
 * separate node; the code is simpler that way and it's not worth fixing.
623
 */
624
char* RegExpCompile::regatom(int* flagp)
625
67.1k
{
626
67.1k
  char* ret;
627
67.1k
  int flags;
628
629
67.1k
  *flagp = WORST; // Tentatively.
630
631
67.1k
  switch (*regparse++) {
632
4.83k
    case '^':
633
4.83k
      ret = regnode(BOL);
634
4.83k
      break;
635
4.04k
    case '$':
636
4.04k
      ret = regnode(EOL);
637
4.04k
      break;
638
5.27k
    case '.':
639
5.27k
      ret = regnode(ANY);
640
5.27k
      *flagp |= HASWIDTH | SIMPLE;
641
5.27k
      break;
642
13.2k
    case '[': {
643
13.2k
      int rxpclass;
644
13.2k
      int rxpclassend;
645
646
13.2k
      if (*regparse == '^') { // Complement of range.
647
1.77k
        ret = regnode(ANYBUT);
648
1.77k
        regparse++;
649
1.77k
      } else
650
11.4k
        ret = regnode(ANYOF);
651
13.2k
      if (*regparse == ']' || *regparse == '-')
652
3.07k
        regc(*regparse++);
653
158k
      while (*regparse != '\0' && *regparse != ']') {
654
145k
        if (*regparse == '-') {
655
38.0k
          regparse++;
656
38.0k
          if (*regparse == ']' || *regparse == '\0')
657
1.04k
            regc('-');
658
37.0k
          else {
659
37.0k
            rxpclass = UCHARAT(regparse - 2) + 1;
660
37.0k
            rxpclassend = UCHARAT(regparse);
661
37.0k
            if (rxpclass > rxpclassend + 1) {
662
              // RAISE Error, SYM(RegularExpression), SYM(Invalid_Range),
663
11
              printf("RegularExpression::compile(): Invalid range in [].\n");
664
11
              return nullptr;
665
11
            }
666
6.05M
            for (; rxpclass <= rxpclassend; rxpclass++)
667
6.01M
              regc(static_cast<char>(rxpclass));
668
37.0k
            regparse++;
669
37.0k
          }
670
38.0k
        } else
671
107k
          regc(*regparse++);
672
145k
      }
673
13.2k
      regc('\0');
674
13.2k
      if (*regparse != ']') {
675
        // RAISE Error, SYM(RegularExpression), SYM(Unmatched_Bracket),
676
70
        printf("RegularExpression::compile(): Unmatched [].\n");
677
70
        return nullptr;
678
70
      }
679
13.1k
      regparse++;
680
13.1k
      *flagp |= HASWIDTH | SIMPLE;
681
13.1k
    } break;
682
14.9k
    case '(':
683
14.9k
      ret = reg(1, &flags);
684
14.9k
      if (!ret)
685
342
        return (nullptr);
686
14.5k
      *flagp |= flags & (HASWIDTH | SPSTART);
687
14.5k
      break;
688
0
    case '\0':
689
0
    case '|':
690
0
    case ')':
691
      // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
692
0
      printf("RegularExpression::compile(): Internal error.\n"); // Never here
693
0
      return nullptr;
694
4
    case '?':
695
7
    case '+':
696
8
    case '*':
697
      // RAISE Error, SYM(RegularExpression), SYM(No_Operand),
698
8
      printf("RegularExpression::compile(): ?+* follows nothing.\n");
699
8
      return nullptr;
700
1.64k
    case '\\':
701
1.64k
      if (*regparse == '\0') {
702
        // RAISE Error, SYM(RegularExpression), SYM(Trailing_Backslash),
703
10
        printf("RegularExpression::compile(): Trailing backslash.\n");
704
10
        return nullptr;
705
10
      }
706
1.63k
      ret = regnode(EXACTLY);
707
1.63k
      regc(*regparse++);
708
1.63k
      regc('\0');
709
1.63k
      *flagp |= HASWIDTH | SIMPLE;
710
1.63k
      break;
711
23.2k
    default: {
712
23.2k
      int len;
713
23.2k
      char ender;
714
715
23.2k
      regparse--;
716
23.2k
      len = int(strcspn(regparse, META));
717
23.2k
      if (len <= 0) {
718
        // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
719
0
        printf("RegularExpression::compile(): Internal error.\n");
720
0
        return nullptr;
721
0
      }
722
23.2k
      ender = *(regparse + len);
723
23.2k
      if (len > 1 && ISMULT(ender))
724
2.68k
        len--; // Back off clear of ?+* operand.
725
23.2k
      *flagp |= HASWIDTH;
726
23.2k
      if (len == 1)
727
16.1k
        *flagp |= SIMPLE;
728
23.2k
      ret = regnode(EXACTLY);
729
195k
      while (len > 0) {
730
172k
        regc(*regparse++);
731
172k
        len--;
732
172k
      }
733
23.2k
      regc('\0');
734
23.2k
    } break;
735
67.1k
  }
736
66.7k
  return (ret);
737
67.1k
}
738
739
/*
740
 - regnode - emit a node
741
   Location.
742
 */
743
char* RegExpCompile::regnode(char op)
744
146k
{
745
146k
  char* ret;
746
146k
  char* ptr;
747
748
146k
  ret = regcode;
749
146k
  if (ret == regdummyptr) {
750
74.8k
    regsize += 3;
751
74.8k
    return (ret);
752
74.8k
  }
753
754
71.7k
  ptr = ret;
755
71.7k
  *ptr++ = op;
756
71.7k
  *ptr++ = '\0'; // Null "next" pointer.
757
71.7k
  *ptr++ = '\0';
758
71.7k
  regcode = ptr;
759
760
71.7k
  return (ret);
761
146k
}
762
763
/*
764
 - regc - emit (if appropriate) a byte of code
765
 */
766
void RegExpCompile::regc(char b)
767
6.34M
{
768
6.34M
  if (regcode != regdummyptr)
769
3.02M
    *regcode++ = b;
770
3.31M
  else
771
3.31M
    regsize++;
772
6.34M
}
773
774
/*
775
 - reginsert - insert an operator in front of already-emitted operand
776
 *
777
 * Means relocating the operand.
778
 */
779
void RegExpCompile::reginsert(char op, char* opnd)
780
18.4k
{
781
18.4k
  char* src;
782
18.4k
  char* dst;
783
18.4k
  char* place;
784
785
18.4k
  if (regcode == regdummyptr) {
786
9.35k
    regsize += 3;
787
9.35k
    return;
788
9.35k
  }
789
790
9.13k
  src = regcode;
791
9.13k
  regcode += 3;
792
9.13k
  dst = regcode;
793
3.88M
  while (src > opnd)
794
3.87M
    *--dst = *--src;
795
796
9.13k
  place = opnd; // Op node, where operand used to be.
797
9.13k
  *place++ = op;
798
9.13k
  *place++ = '\0';
799
9.13k
  *place = '\0';
800
9.13k
}
801
802
/*
803
 - regtail - set the next-pointer at the end of a node chain
804
 */
805
void RegExpCompile::regtail(char* p, char const* val)
806
131k
{
807
131k
  char* scan;
808
131k
  char* temp;
809
131k
  int offset;
810
811
131k
  if (p == regdummyptr)
812
57.2k
    return;
813
814
  // Find last node.
815
74.1k
  scan = p;
816
415k
  for (;;) {
817
415k
    temp = regnext(scan);
818
415k
    if (!temp)
819
74.1k
      break;
820
341k
    scan = temp;
821
341k
  }
822
823
74.1k
  if (OP(scan) == BACK)
824
823
    offset = int(scan - val);
825
73.2k
  else
826
73.2k
    offset = int(val - scan);
827
74.1k
  *(scan + 1) = static_cast<char>((offset >> 8) & 0377);
828
74.1k
  *(scan + 2) = static_cast<char>(offset & 0377);
829
74.1k
}
830
831
/*
832
 - regoptail - regtail on operand of first argument; nop if operandless
833
 */
834
void RegExpCompile::regoptail(char* p, char const* val)
835
48.8k
{
836
  // "Operandless" and "op != BRANCH" are synonymous in practice.
837
48.8k
  if (!p || p == regdummyptr || OP(p) != BRANCH)
838
30.3k
    return;
839
18.5k
  regtail(OPERAND(p), val);
840
18.5k
}
841
842
////////////////////////////////////////////////////////////////////////
843
//
844
//  find and friends
845
//
846
////////////////////////////////////////////////////////////////////////
847
848
/*
849
 * Utility class for RegularExpression::find().
850
 */
851
class RegExpFind
852
{
853
public:
854
  char const* reginput;   // String-input pointer.
855
  char const* regbol;     // Beginning of input, for ^ check.
856
  char const** regstartp; // Pointer to startp array.
857
  char const** regendp;   // Ditto for endp.
858
  char const* regreject; // Reject matches ending here, for NONEMPTY_AT_OFFSET.
859
860
  int regtry(char const*, char const**, char const**, char const*);
861
  int regmatch(char const*);
862
  int regrepeat(char const*);
863
};
864
865
// find -- Matches the regular expression to the given string.
866
// Returns true if found, and sets start and end indexes accordingly.
867
bool RegularExpression::find(char const* string,
868
                             RegularExpressionMatch& rmatch,
869
                             std::string::size_type offset,
870
                             unsigned options) const
871
7.92k
{
872
7.92k
  char const* s;
873
874
7.92k
  rmatch.clear();
875
7.92k
  rmatch.searchstring = string;
876
877
7.92k
  if (!this->program) {
878
0
    return false;
879
0
  }
880
881
  // Check validity of program.
882
7.92k
  if (UCHARAT(this->program) != MAGIC) {
883
    // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
884
0
    printf(
885
0
      "RegularExpression::find(): Compiled regular expression corrupted.\n");
886
0
    return false;
887
0
  }
888
889
  // If there is a "must appear" string, look for it.
890
7.92k
  if (this->regmust) {
891
1.00k
    s = string + offset;
892
2.67k
    while ((s = strchr(s, this->regmust[0]))) {
893
2.02k
      if (strncmp(s, this->regmust, this->regmlen) == 0)
894
361
        break; // Found it.
895
1.66k
      s++;
896
1.66k
    }
897
1.00k
    if (!s) // Not present.
898
647
      return false;
899
1.00k
  }
900
901
7.28k
  RegExpFind regFind;
902
7.28k
  s = string + offset;
903
904
  // Mark beginning of line for ^ .
905
7.28k
  regFind.regbol = (options & BOL_AT_OFFSET) ? s : string;
906
7.28k
  regFind.regreject = (options & NONEMPTY_AT_OFFSET) ? s : nullptr;
907
908
  // Simplest case:  anchored match need be tried only once.
909
7.28k
  if (this->reganch)
910
2.55k
    return (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program) != 0);
911
912
  // Messy cases:  unanchored match.
913
4.72k
  if (this->regstart != '\0')
914
    // We know what char it must start with.
915
3.31k
    while ((s = strchr(s, this->regstart))) {
916
2.56k
      if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
917
171
        return true;
918
2.39k
      s++;
919
2.39k
    }
920
3.80k
  else
921
    // We don't -- general case.
922
75.7k
    do {
923
75.7k
      if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
924
2.33k
        return true;
925
75.7k
    } while (*s++ != '\0');
926
927
  // Failure.
928
2.22k
  return false;
929
4.72k
}
930
931
/*
932
 - regtry - try match at specific point
933
   0 failure, 1 success
934
 */
935
int RegExpFind::regtry(char const* string, char const** start,
936
                       char const** end, char const* prog)
937
80.9k
{
938
80.9k
  int i;
939
80.9k
  char const** sp1;
940
80.9k
  char const** ep;
941
942
80.9k
  reginput = string;
943
80.9k
  regstartp = start;
944
80.9k
  regendp = end;
945
946
80.9k
  sp1 = start;
947
80.9k
  ep = end;
948
2.67M
  for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
949
2.58M
    *sp1++ = nullptr;
950
2.58M
    *ep++ = nullptr;
951
2.58M
  }
952
80.9k
  if (regmatch(prog + 1)) {
953
2.56k
    start[0] = string;
954
2.56k
    end[0] = reginput;
955
2.56k
    return (1);
956
2.56k
  } else
957
78.3k
    return (0);
958
80.9k
}
959
960
/*
961
 - regmatch - main matching routine
962
 *
963
 * Conceptually the strategy is simple:  check to see whether the current
964
 * node matches, call self recursively to see whether the rest matches,
965
 * and then act accordingly.  In practice we make some effort to avoid
966
 * recursion, in particular by going through "ordinary" nodes (that don't
967
 * need to know whether the rest of the match failed) by a loop instead of
968
 * by recursion.
969
 * 0 failure, 1 success
970
 */
971
int RegExpFind::regmatch(char const* prog)
972
536M
{
973
536M
  char const* scan; // Current node.
974
536M
  char const* next; // Next node.
975
976
536M
  scan = prog;
977
978
838M
  while (scan) {
979
980
838M
    next = regnext(scan);
981
982
838M
    switch (OP(scan)) {
983
49.1M
      case BOL:
984
49.1M
        if (reginput != regbol)
985
45.9M
          return (0);
986
3.22M
        break;
987
101M
      case EOL:
988
101M
        if (*reginput != '\0')
989
272k
          return (0);
990
100M
        break;
991
100M
      case ANY:
992
23.5M
        if (*reginput == '\0')
993
21.2M
          return (0);
994
2.34M
        reginput++;
995
2.34M
        break;
996
92.4M
      case EXACTLY: {
997
92.4M
        size_t len;
998
92.4M
        char const* opnd;
999
1000
92.4M
        opnd = OPERAND(scan);
1001
        // Inline the first character, for speed.
1002
92.4M
        if (*opnd != *reginput)
1003
92.1M
          return (0);
1004
264k
        len = strlen(opnd);
1005
264k
        if (len > 1 && strncmp(opnd, reginput, len) != 0)
1006
78.6k
          return (0);
1007
185k
        reginput += len;
1008
185k
      } break;
1009
11.8M
      case ANYOF:
1010
11.8M
        if (*reginput == '\0' || !strchr(OPERAND(scan), *reginput))
1011
10.5M
          return (0);
1012
1.28M
        reginput++;
1013
1.28M
        break;
1014
10.0k
      case ANYBUT:
1015
10.0k
        if (*reginput == '\0' || strchr(OPERAND(scan), *reginput))
1016
2.59k
          return (0);
1017
7.47k
        reginput++;
1018
7.47k
        break;
1019
144M
      case NOTHING:
1020
144M
        break;
1021
6.35M
      case BACK:
1022
6.35M
        break;
1023
6.38M
      case OPEN + 1:
1024
12.7M
      case OPEN + 2:
1025
19.0M
      case OPEN + 3:
1026
25.4M
      case OPEN + 4:
1027
31.7M
      case OPEN + 5:
1028
38.0M
      case OPEN + 6:
1029
44.4M
      case OPEN + 7:
1030
44.5M
      case OPEN + 8:
1031
44.6M
      case OPEN + 9:
1032
44.6M
      case OPEN + 10:
1033
44.6M
      case OPEN + 11:
1034
44.7M
      case OPEN + 12:
1035
44.7M
      case OPEN + 13:
1036
44.8M
      case OPEN + 14:
1037
44.8M
      case OPEN + 15:
1038
44.9M
      case OPEN + 16:
1039
45.0M
      case OPEN + 17:
1040
45.2M
      case OPEN + 18:
1041
45.4M
      case OPEN + 19:
1042
45.7M
      case OPEN + 20:
1043
46.3M
      case OPEN + 21:
1044
47.1M
      case OPEN + 22:
1045
48.0M
      case OPEN + 23:
1046
49.3M
      case OPEN + 24:
1047
51.3M
      case OPEN + 25:
1048
54.4M
      case OPEN + 26:
1049
59.9M
      case OPEN + 27:
1050
67.2M
      case OPEN + 28:
1051
78.1M
      case OPEN + 29:
1052
93.4M
      case OPEN + 30:
1053
106M
      case OPEN + 31:
1054
106M
      case OPEN + 32: {
1055
106M
        int no;
1056
106M
        char const* save;
1057
1058
106M
        no = OP(scan) - OPEN;
1059
106M
        save = reginput;
1060
1061
106M
        if (regmatch(next)) {
1062
1063
          //
1064
          // Don't set startp if some later invocation of the
1065
          // same parentheses already has.
1066
          //
1067
388k
          if (!regstartp[no])
1068
8.57k
            regstartp[no] = save;
1069
388k
          return (1);
1070
388k
        } else
1071
106M
          return (0);
1072
106M
      }
1073
      //              break;
1074
6.36M
      case CLOSE + 1:
1075
12.7M
      case CLOSE + 2:
1076
19.0M
      case CLOSE + 3:
1077
25.3M
      case CLOSE + 4:
1078
31.7M
      case CLOSE + 5:
1079
40.1M
      case CLOSE + 6:
1080
46.4M
      case CLOSE + 7:
1081
46.6M
      case CLOSE + 8:
1082
46.6M
      case CLOSE + 9:
1083
46.6M
      case CLOSE + 10:
1084
46.7M
      case CLOSE + 11:
1085
46.7M
      case CLOSE + 12:
1086
46.7M
      case CLOSE + 13:
1087
46.8M
      case CLOSE + 14:
1088
46.9M
      case CLOSE + 15:
1089
47.0M
      case CLOSE + 16:
1090
47.1M
      case CLOSE + 17:
1091
47.4M
      case CLOSE + 18:
1092
47.7M
      case CLOSE + 19:
1093
48.3M
      case CLOSE + 20:
1094
49.1M
      case CLOSE + 21:
1095
50.0M
      case CLOSE + 22:
1096
51.3M
      case CLOSE + 23:
1097
53.3M
      case CLOSE + 24:
1098
56.4M
      case CLOSE + 25:
1099
61.9M
      case CLOSE + 26:
1100
69.8M
      case CLOSE + 27:
1101
82.5M
      case CLOSE + 28:
1102
98.2M
      case CLOSE + 29:
1103
114M
      case CLOSE + 30:
1104
128M
      case CLOSE + 31:
1105
128M
      case CLOSE + 32: {
1106
128M
        int no;
1107
128M
        char const* save;
1108
1109
128M
        no = OP(scan) - CLOSE;
1110
128M
        save = reginput;
1111
1112
128M
        if (regmatch(next)) {
1113
1114
          //
1115
          // Don't set endp if some later invocation of the
1116
          // same parentheses already has.
1117
          //
1118
388k
          if (!regendp[no])
1119
8.57k
            regendp[no] = save;
1120
388k
          return (1);
1121
388k
        } else
1122
127M
          return (0);
1123
128M
      }
1124
      //              break;
1125
165M
      case BRANCH: {
1126
1127
165M
        char const* save;
1128
1129
165M
        if (OP(next) != BRANCH) // No choice.
1130
43.4M
          next = OPERAND(scan); // Avoid recursion.
1131
121M
        else {
1132
291M
          do {
1133
291M
            save = reginput;
1134
291M
            if (regmatch(OPERAND(scan)))
1135
397k
              return (1);
1136
291M
            reginput = save;
1137
291M
            scan = regnext(scan);
1138
291M
          } while (scan && OP(scan) == BRANCH);
1139
121M
          return (0);
1140
          // NOTREACHED
1141
121M
        }
1142
165M
      } break;
1143
43.4M
      case STAR:
1144
9.24M
      case PLUS: {
1145
9.24M
        char nextch;
1146
9.24M
        int no;
1147
9.24M
        char const* save;
1148
9.24M
        int min_no;
1149
1150
        //
1151
        // Lookahead to avoid useless match attempts when we know
1152
        // what character comes next.
1153
        //
1154
9.24M
        nextch = '\0';
1155
9.24M
        if (OP(next) == EXACTLY)
1156
2.23M
          nextch = *OPERAND(next);
1157
9.24M
        min_no = (OP(scan) == STAR) ? 0 : 1;
1158
9.24M
        save = reginput;
1159
9.24M
        no = regrepeat(OPERAND(scan));
1160
28.6M
        while (no >= min_no) {
1161
          // If it could work, try it.
1162
19.3M
          if (nextch == '\0' || *reginput == nextch)
1163
9.48M
            if (regmatch(next))
1164
3.82k
              return (1);
1165
          // Couldn't or didn't -- back up.
1166
19.3M
          no--;
1167
19.3M
          reginput = save + no;
1168
19.3M
        }
1169
9.24M
        return (0);
1170
9.24M
      }
1171
      //              break;
1172
2.56k
      case END:
1173
2.56k
        if (reginput == regreject)
1174
0
          return (0); // Can't end a match here
1175
2.56k
        return (1);   // Success!
1176
1177
0
      default:
1178
        // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
1179
0
        printf(
1180
0
          "RegularExpression::find(): Internal error -- memory corrupted.\n");
1181
0
        return 0;
1182
838M
    }
1183
301M
    scan = next;
1184
301M
  }
1185
1186
  //
1187
  //  We get here only if there's trouble -- normally "case END" is the
1188
  //  terminating point.
1189
  //
1190
  // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
1191
0
  printf("RegularExpression::find(): Internal error -- corrupted pointers.\n");
1192
0
  return (0);
1193
536M
}
1194
1195
/*
1196
 - regrepeat - repeatedly match something simple, report how many
1197
 */
1198
int RegExpFind::regrepeat(char const* p)
1199
9.24M
{
1200
9.24M
  int count = 0;
1201
9.24M
  char const* scan;
1202
9.24M
  char const* opnd;
1203
1204
9.24M
  scan = reginput;
1205
9.24M
  opnd = OPERAND(p);
1206
9.24M
  switch (OP(p)) {
1207
4.22M
    case ANY:
1208
4.22M
      count = int(strlen(scan));
1209
4.22M
      scan += count;
1210
4.22M
      break;
1211
2.27M
    case EXACTLY:
1212
2.30M
      while (*opnd == *scan) {
1213
28.3k
        count++;
1214
28.3k
        scan++;
1215
28.3k
      }
1216
2.27M
      break;
1217
2.71M
    case ANYOF:
1218
6.02M
      while (*scan != '\0' && strchr(opnd, *scan)) {
1219
3.30M
        count++;
1220
3.30M
        scan++;
1221
3.30M
      }
1222
2.71M
      break;
1223
26.3k
    case ANYBUT:
1224
32.0k
      while (*scan != '\0' && !strchr(opnd, *scan)) {
1225
5.69k
        count++;
1226
5.69k
        scan++;
1227
5.69k
      }
1228
26.3k
      break;
1229
0
    default: // Oh dear.  Called inappropriately.
1230
      // RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
1231
0
      printf("cm RegularExpression::find(): Internal error.\n");
1232
0
      return 0;
1233
9.24M
  }
1234
9.24M
  reginput = scan;
1235
9.24M
  return (count);
1236
9.24M
}
1237
1238
/*
1239
 - regnext - dig the "next" pointer out of a node
1240
 */
1241
static char const* regnext(char const* p)
1242
1.12G
{
1243
1.12G
  int offset;
1244
1245
1.12G
  if (p == regdummyptr)
1246
0
    return (nullptr);
1247
1248
1.12G
  offset = NEXT(p);
1249
1.12G
  if (offset == 0)
1250
3.23k
    return (nullptr);
1251
1252
1.12G
  if (OP(p) == BACK)
1253
6.35M
    return (p - offset);
1254
1.12G
  else
1255
1.12G
    return (p + offset);
1256
1.12G
}
1257
1258
static char* regnext(char* p)
1259
455k
{
1260
455k
  int offset;
1261
1262
455k
  if (p == regdummyptr)
1263
9.26k
    return (nullptr);
1264
1265
446k
  offset = NEXT(p);
1266
446k
  if (offset == 0)
1267
83.1k
    return (nullptr);
1268
1269
362k
  if (OP(p) == BACK)
1270
0
    return (p - offset);
1271
362k
  else
1272
362k
    return (p + offset);
1273
362k
}
1274
1275
} // namespace KWSYS_NAMESPACE