Coverage Report

Created: 2026-03-12 06:35

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/CMake/build-dir/Source/cmsys/RegularExpression.hxx
Line
Count
Source
1
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
2
   file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3
// Original Copyright notice:
4
// Copyright (C) 1991 Texas Instruments Incorporated.
5
//
6
// Permission is granted to any individual or institution to use, copy, modify,
7
// and distribute this software, provided that this complete copyright and
8
// permission notice is maintained, intact, in all copies and supporting
9
// documentation.
10
//
11
// Texas Instruments Incorporated provides this software "as is" without
12
// express or implied warranty.
13
//
14
// Created: MNF 06/13/89  Initial Design and Implementation
15
// Updated: LGO 08/09/89  Inherit from Generic
16
// Updated: MBN 09/07/89  Added conditional exception handling
17
// Updated: MBN 12/15/89  Sprinkled "const" qualifiers all over the place!
18
// Updated: DLS 03/22/91  New lite version
19
//
20
21
#ifndef cmsys_RegularExpression_hxx
22
#define cmsys_RegularExpression_hxx
23
24
#include <cmsys/Configure.h>
25
#include <cmsys/Configure.hxx>
26
27
#include <string>
28
29
namespace cmsys {
30
31
// Forward declaration
32
class RegularExpression;
33
34
/** \class RegularExpressionMatch
35
 * \brief Stores the pattern matches of a RegularExpression
36
 */
37
class cmsys_EXPORT RegularExpressionMatch
38
{
39
public:
40
  RegularExpressionMatch();
41
42
  bool isValid() const;
43
  void clear();
44
45
  std::string::size_type start(int n = 0) const;
46
  std::string::size_type end(int n = 0) const;
47
  std::string match(int n = 0) const;
48
49
  enum
50
  {
51
    NSUBEXP = 32
52
  };
53
54
private:
55
  friend class RegularExpression;
56
  char const* startp[NSUBEXP];
57
  char const* endp[NSUBEXP];
58
  char const* searchstring;
59
};
60
61
#ifdef _MSC_VER
62
#  pragma warning(push)
63
#  if _MSC_VER < 1900
64
#    pragma warning(disable : 4351) /* new behavior */
65
#  endif
66
#endif
67
68
/**
69
 * \brief Creates an invalid match object
70
 */
71
inline RegularExpressionMatch::RegularExpressionMatch()
72
2.06k
  : startp{}
73
2.06k
  , endp{}
74
2.06k
  , searchstring{}
75
2.06k
{
76
2.06k
}
77
78
#ifdef _MSC_VER
79
#  pragma warning(pop)
80
#endif
81
82
/**
83
 * \brief Returns true if the match pointers are valid
84
 */
85
inline bool RegularExpressionMatch::isValid() const
86
0
{
87
0
  return (this->startp[0]);
88
0
}
89
90
/**
91
 * \brief Resets to the (invalid) construction state.
92
 */
93
inline void RegularExpressionMatch::clear()
94
9.85k
{
95
9.85k
  startp[0] = nullptr;
96
9.85k
  endp[0] = nullptr;
97
9.85k
  searchstring = nullptr;
98
9.85k
}
99
100
/**
101
 * \brief Returns the start index of nth submatch.
102
 *        start(0) is the start of the full match.
103
 */
104
inline std::string::size_type RegularExpressionMatch::start(int n) const
105
0
{
106
0
  if (!this->startp[n]) {
107
0
    return std::string::npos;
108
0
  }
109
0
  return static_cast<std::string::size_type>(this->startp[n] -
110
0
                                             this->searchstring);
111
0
}
112
113
/**
114
 * \brief Returns the end index of nth submatch.
115
 *        end(0) is the end of the full match.
116
 */
117
inline std::string::size_type RegularExpressionMatch::end(int n) const
118
0
{
119
0
  if (!this->endp[n]) {
120
0
    return std::string::npos;
121
0
  }
122
0
  return static_cast<std::string::size_type>(this->endp[n] -
123
0
                                             this->searchstring);
124
0
}
125
126
/**
127
 * \brief Returns the nth submatch as a string.
128
 */
129
inline std::string RegularExpressionMatch::match(int n) const
130
0
{
131
0
  if (!this->startp[n]) {
132
0
    return std::string();
133
0
  } else {
134
0
    return std::string(
135
0
      this->startp[n],
136
0
      static_cast<std::string::size_type>(this->endp[n] - this->startp[n]));
137
0
  }
138
0
}
139
140
/** \class RegularExpression
141
 * \brief Implements pattern matching with regular expressions.
142
 *
143
 * This is the header file for the regular expression class.  An object of
144
 * this class contains a regular expression, in a special "compiled" format.
145
 * This compiled format consists of several slots all kept as the objects
146
 * private data.  The RegularExpression class provides a convenient way to
147
 * represent regular expressions.  It makes it easy to search for the same
148
 * regular expression in many different strings without having to compile a
149
 * string to regular expression format more than necessary.
150
 *
151
 * This class implements pattern matching via regular expressions.
152
 * A regular expression allows a programmer to specify  complex
153
 * patterns  that  can  be searched for and matched against the
154
 * character string of a string object. In its simplest form, a
155
 * regular  expression  is  a  sequence  of  characters used to
156
 * search for exact character matches. However, many times  the
157
 * exact  sequence to be found is not known, or only a match at
158
 * the beginning or end of a string is desired. The RegularExpression regu-
159
 * lar  expression  class implements regular expression pattern
160
 * matching as is found and implemented in many  UNIX  commands
161
 * and utilities.
162
 *
163
 * Example: The perl code
164
 *
165
 *    $filename =~ m"([a-z]+)\.cc";
166
 *    print $1;
167
 *
168
 * Is written as follows in C++
169
 *
170
 *    RegularExpression re("([a-z]+)\\.cc");
171
 *    re.find(filename);
172
 *    cerr << re.match(1);
173
 *
174
 *
175
 * The regular expression class provides a convenient mechanism
176
 * for  specifying  and  manipulating  regular expressions. The
177
 * regular expression object allows specification of such  pat-
178
 * terns  by using the following regular expression metacharac-
179
 * ters:
180
 *
181
 *  ^        Matches at beginning of a line
182
 *
183
 *  $        Matches at end of a line
184
 *
185
 * .         Matches any single character
186
 *
187
 * [ ]       Matches any character(s) inside the brackets
188
 *
189
 * [^ ]      Matches any character(s) not inside the brackets
190
 *
191
 *  -        Matches any character in range on either side of a dash
192
 *
193
 *  *        Matches preceding pattern zero or more times
194
 *
195
 *  +        Matches preceding pattern one or more times
196
 *
197
 *  ?        Matches preceding pattern zero or once only
198
 *
199
 * ()        Saves a matched expression and uses it in a later match
200
 *
201
 * Note that more than one of these metacharacters can be  used
202
 * in  a  single  regular expression in order to create complex
203
 * search patterns. For example, the pattern [^ab1-9]  says  to
204
 * match  any  character  sequence that does not begin with the
205
 * characters "ab"  followed  by  numbers  in  the  series  one
206
 * through nine.
207
 *
208
 * There are three constructors for RegularExpression.  One just creates an
209
 * empty RegularExpression object.  Another creates a RegularExpression
210
 * object and initializes it with a regular expression that is given in the
211
 * form of a char*.  The third takes a reference to a RegularExpression
212
 * object as an argument and creates an object initialized with the
213
 * information from the given RegularExpression object.
214
 *
215
 * The  find  member function  finds   the  first  occurrence  of  the regular
216
 * expression of that object in the string given to find as an argument.  Find
217
 * returns a boolean, and  if true,  mutates  the private  data appropriately.
218
 * Find sets pointers to the beginning and end of  the thing last  found, they
219
 * are pointers into the actual string  that was searched.   The start and end
220
 * member functions return indices  into the searched string that  correspond
221
 * to the beginning   and  end pointers  respectively.   The    compile member
222
 * function takes a char* and puts the  compiled version of the char* argument
223
 * into the object's private data fields.  The == and  != operators only check
224
 * the  to see  if   the compiled  regular  expression   is the same, and  the
225
 * deep_equal functions also checks  to see if the  start and end pointers are
226
 * the same.  The is_valid  function returns false if  program is set to
227
 * nullptr, (i.e. there is no valid compiled  expression).  The  set_invalid
228
 * function sets the program to nullptr (Warning:  this deletes the compiled
229
 * expression). The following examples may help clarify regular expression
230
 * usage:
231
 *
232
 *   *  The regular expression  "^hello" matches  a "hello"  only at  the
233
 *      beginning of a  line.  It would match "hello  there" but not "hi,
234
 *      hello there".
235
 *
236
 *   *  The regular expression "long$" matches a  "long"  only at the end
237
 *      of a line. It would match "so long\0", but not "long ago".
238
 *
239
 *   *  The regular expression "t..t..g"  will match anything that  has a
240
 *      "t" then any two characters, another "t", any  two characters and
241
 *      then a "g".   It will match  "testing", or "test again" but would
242
 *      not match "toasting"
243
 *
244
 *   *  The regular  expression "[1-9ab]" matches any  number one through
245
 *      nine, and the characters  "a" and  "b".  It would match "hello 1"
246
 *      or "begin", but would not match "no-match".
247
 *
248
 *   *  The  regular expression "[^1-9ab]"  matches any character that is
249
 *      not a number one  through nine, or  an "a" or "b".   It would NOT
250
 *      match "hello 1" or "begin", but would match "no-match".
251
 *
252
 *   *  The regular expression "br* " matches  something that begins with
253
 *      a "b", is followed by zero or more "r"s, and ends in a space.  It
254
 *      would match "brrrrr ", and "b ", but would not match "brrh ".
255
 *
256
 *   *  The regular expression "br+ " matches something  that begins with
257
 *      a "b", is followed by one or more "r"s, and ends in  a space.  It
258
 *      would match "brrrrr ",  and  "br ", but would not  match "b  " or
259
 *      "brrh ".
260
 *
261
 *   *  The regular expression "br? " matches  something that begins with
262
 *      a "b", is followed by zero or one "r"s, and ends in  a space.  It
263
 *      would  match  "br ", and "b  ", but would not match  "brrrr "  or
264
 *      "brrh ".
265
 *
266
 *   *  The regular expression "(..p)b" matches  something ending with pb
267
 *      and beginning with whatever the two characters before the first p
268
 *      encountered in the line were.  It would find "repb" in "rep drepa
269
 *      qrepb".  The regular expression "(..p)a"  would find "repa qrepb"
270
 *      in "rep drepa qrepb"
271
 *
272
 *   *  The regular expression "d(..p)" matches something ending  with p,
273
 *      beginning with d, and having  two characters  in between that are
274
 *      the same as the two characters before  the first p encountered in
275
 *      the line.  It would match "drepa qrepb" in "rep drepa qrepb".
276
 *
277
 * All methods of RegularExpression can be called simultaneously from
278
 * different threads but only if each invocation uses an own instance of
279
 * RegularExpression.
280
 */
281
class cmsys_EXPORT RegularExpression
282
{
283
public:
284
  enum Options : unsigned
285
  {
286
    // Match ^ at offset instead of the input start.
287
    BOL_AT_OFFSET = 1,
288
    // If an empty match is found at offset, continue searching.
289
    NONEMPTY_AT_OFFSET = 2,
290
  };
291
292
  /**
293
   * Instantiate RegularExpression with program=nullptr.
294
   */
295
  inline RegularExpression();
296
297
  /**
298
   * Instantiate RegularExpression with compiled char*.
299
   */
300
  inline RegularExpression(char const*);
301
302
  /**
303
   * Instantiate RegularExpression as a copy of another regular expression.
304
   */
305
  RegularExpression(RegularExpression const&);
306
307
  /**
308
   * Instantiate RegularExpression with compiled string.
309
   */
310
  inline RegularExpression(std::string const&);
311
312
  /**
313
   * Destructor.
314
   */
315
  inline ~RegularExpression();
316
317
  /**
318
   * Compile a regular expression into internal code
319
   * for later pattern matching.
320
   */
321
  bool compile(char const*);
322
323
  /**
324
   * Compile a regular expression into internal code
325
   * for later pattern matching.
326
   */
327
  inline bool compile(std::string const&);
328
329
  /**
330
   * Matches the regular expression to the given string.
331
   * Returns true if found, and sets start and end indexes
332
   * in the RegularExpressionMatch instance accordingly.
333
   *
334
   * This method is thread safe when called with different
335
   * RegularExpressionMatch instances.
336
   */
337
  bool find(char const*, RegularExpressionMatch&,
338
            std::string::size_type offset = 0, unsigned options = 0) const;
339
340
  /**
341
   * Matches the regular expression to the given string.
342
   * Returns true if found, and sets start and end indexes accordingly.
343
   */
344
  inline bool find(char const*, std::string::size_type offset = 0,
345
                   unsigned options = 0);
346
347
  /**
348
   * Matches the regular expression to the given std string.
349
   * Returns true if found, and sets start and end indexes accordingly.
350
   */
351
  inline bool find(std::string const&, std::string::size_type offset = 0,
352
                   unsigned options = 0);
353
354
  /**
355
   * Match indices
356
   */
357
  inline RegularExpressionMatch const& regMatch() const;
358
  inline std::string::size_type start(int n = 0) const;
359
  inline std::string::size_type end(int n = 0) const;
360
361
  /**
362
   * Match strings
363
   */
364
  inline std::string match(int n = 0) const;
365
366
  /**
367
   * Copy the given regular expression.
368
   */
369
  RegularExpression& operator=(RegularExpression const& rxp);
370
371
  /**
372
   * Returns true if two regular expressions have the same
373
   * compiled program for pattern matching.
374
   */
375
  bool operator==(RegularExpression const&) const;
376
377
  /**
378
   * Returns true if two regular expressions have different
379
   * compiled program for pattern matching.
380
   */
381
  inline bool operator!=(RegularExpression const&) const;
382
383
  /**
384
   * Returns true if have the same compiled regular expressions
385
   * and the same start and end pointers.
386
   */
387
  bool deep_equal(RegularExpression const&) const;
388
389
  /**
390
   * True if the compiled regexp is valid.
391
   */
392
  inline bool is_valid() const;
393
394
  /**
395
   * Marks the regular expression as invalid.
396
   */
397
  inline void set_invalid();
398
399
  /**
400
   * The number of capture groups.
401
   */
402
  inline int num_groups();
403
404
private:
405
  RegularExpressionMatch regmatch;
406
  char regstart;                  // Internal use only
407
  char reganch;                   // Internal use only
408
  char const* regmust;            // Internal use only
409
  std::string::size_type regmlen; // Internal use only
410
  char* program;
411
  int progsize;
412
  int regnpar;
413
};
414
415
/**
416
 * Create an empty regular expression.
417
 */
418
inline RegularExpression::RegularExpression()
419
2.04k
  : regstart{}
420
2.04k
  , reganch{}
421
2.04k
  , regmust{}
422
2.04k
  , program{ nullptr }
423
2.04k
  , progsize{}
424
2.04k
  , regnpar{}
425
2.04k
{
426
2.04k
}
427
428
/**
429
 * Creates a regular expression from string s, and
430
 * compiles s.
431
 */
432
inline RegularExpression::RegularExpression(char const* s)
433
18
  : regstart{}
434
18
  , reganch{}
435
18
  , regmust{}
436
18
  , program{ nullptr }
437
18
  , progsize{}
438
18
  , regnpar{}
439
18
{
440
18
  if (s) {
441
18
    this->compile(s);
442
18
  }
443
18
}
444
445
/**
446
 * Creates a regular expression from string s, and
447
 * compiles s.
448
 */
449
inline RegularExpression::RegularExpression(std::string const& s)
450
0
  : regstart{}
451
0
  , reganch{}
452
0
  , regmust{}
453
0
  , program{ nullptr }
454
0
  , progsize{}
455
0
  , regnpar{}
456
0
{
457
0
  this->compile(s);
458
0
}
459
460
/**
461
 * Destroys and frees space allocated for the regular expression.
462
 */
463
inline RegularExpression::~RegularExpression()
464
2.04k
{
465
  // #ifndef _WIN32
466
2.04k
  delete[] this->program;
467
  // #endif
468
2.04k
}
469
470
/**
471
 * Compile a regular expression into internal code
472
 * for later pattern matching.
473
 */
474
inline bool RegularExpression::compile(std::string const& s)
475
2.03k
{
476
2.03k
  return this->compile(s.c_str());
477
2.03k
}
478
479
/**
480
 * Matches the regular expression to the given std string.
481
 * Returns true if found, and sets start and end indexes accordingly.
482
 */
483
inline bool RegularExpression::find(char const* s,
484
                                    std::string::size_type offset,
485
                                    unsigned options)
486
3.70k
{
487
3.70k
  return this->find(s, this->regmatch, offset, options);
488
3.70k
}
489
490
/**
491
 * Matches the regular expression to the given std string.
492
 * Returns true if found, and sets start and end indexes accordingly.
493
 */
494
inline bool RegularExpression::find(std::string const& s,
495
                                    std::string::size_type offset,
496
                                    unsigned options)
497
4.26k
{
498
4.26k
  return this->find(s.c_str(), this->regmatch, offset, options);
499
4.26k
}
500
501
/**
502
 * Returns the internal match object
503
 */
504
inline RegularExpressionMatch const& RegularExpression::regMatch() const
505
0
{
506
0
  return this->regmatch;
507
0
}
508
509
/**
510
 * Return start index of nth submatch. start(0) is the start of the full match.
511
 */
512
inline std::string::size_type RegularExpression::start(int n) const
513
0
{
514
0
  return regmatch.start(n);
515
0
}
516
517
/**
518
 * Return end index of nth submatch. end(0) is the end of the full match.
519
 */
520
inline std::string::size_type RegularExpression::end(int n) const
521
0
{
522
0
  return regmatch.end(n);
523
0
}
524
525
/**
526
 * Return nth submatch as a string.
527
 */
528
inline std::string RegularExpression::match(int n) const
529
0
{
530
0
  return regmatch.match(n);
531
0
}
532
533
/**
534
 * Returns true if two regular expressions have different
535
 * compiled program for pattern matching.
536
 */
537
inline bool RegularExpression::operator!=(RegularExpression const& r) const
538
0
{
539
0
  return (!(*this == r));
540
0
}
541
542
/**
543
 * Returns true if a valid regular expression is compiled
544
 * and ready for pattern matching.
545
 */
546
inline bool RegularExpression::is_valid() const
547
0
{
548
0
  return (this->program);
549
0
}
550
551
inline void RegularExpression::set_invalid()
552
0
{
553
0
  // #ifndef _WIN32
554
0
  delete[] this->program;
555
0
  // #endif
556
0
  this->program = nullptr;
557
0
}
558
559
inline int RegularExpression::num_groups()
560
0
{
561
0
  return this->regnpar - 1;
562
0
}
563
564
} // namespace cmsys
565
566
#endif