/src/CMake/build-dir/Source/cmsys/RegularExpression.hxx
Line | Count | Source |
1 | | /* Distributed under the OSI-approved BSD 3-Clause License. See accompanying |
2 | | file Copyright.txt or https://cmake.org/licensing#kwsys for details. */ |
3 | | // Original Copyright notice: |
4 | | // Copyright (C) 1991 Texas Instruments Incorporated. |
5 | | // |
6 | | // Permission is granted to any individual or institution to use, copy, modify, |
7 | | // and distribute this software, provided that this complete copyright and |
8 | | // permission notice is maintained, intact, in all copies and supporting |
9 | | // documentation. |
10 | | // |
11 | | // Texas Instruments Incorporated provides this software "as is" without |
12 | | // express or implied warranty. |
13 | | // |
14 | | // Created: MNF 06/13/89 Initial Design and Implementation |
15 | | // Updated: LGO 08/09/89 Inherit from Generic |
16 | | // Updated: MBN 09/07/89 Added conditional exception handling |
17 | | // Updated: MBN 12/15/89 Sprinkled "const" qualifiers all over the place! |
18 | | // Updated: DLS 03/22/91 New lite version |
19 | | // |
20 | | |
21 | | #ifndef cmsys_RegularExpression_hxx |
22 | | #define cmsys_RegularExpression_hxx |
23 | | |
24 | | #include <cmsys/Configure.h> |
25 | | #include <cmsys/Configure.hxx> |
26 | | |
27 | | #include <string> |
28 | | |
29 | | namespace cmsys { |
30 | | |
31 | | // Forward declaration |
32 | | class RegularExpression; |
33 | | |
34 | | /** \class RegularExpressionMatch |
35 | | * \brief Stores the pattern matches of a RegularExpression |
36 | | */ |
37 | | class cmsys_EXPORT RegularExpressionMatch |
38 | | { |
39 | | public: |
40 | | RegularExpressionMatch(); |
41 | | |
42 | | bool isValid() const; |
43 | | void clear(); |
44 | | |
45 | | std::string::size_type start(int n = 0) const; |
46 | | std::string::size_type end(int n = 0) const; |
47 | | std::string match(int n = 0) const; |
48 | | |
49 | | enum |
50 | | { |
51 | | NSUBEXP = 32 |
52 | | }; |
53 | | |
54 | | private: |
55 | | friend class RegularExpression; |
56 | | char const* startp[NSUBEXP]; |
57 | | char const* endp[NSUBEXP]; |
58 | | char const* searchstring; |
59 | | }; |
60 | | |
61 | | #ifdef _MSC_VER |
62 | | # pragma warning(push) |
63 | | # if _MSC_VER < 1900 |
64 | | # pragma warning(disable : 4351) /* new behavior */ |
65 | | # endif |
66 | | #endif |
67 | | |
68 | | /** |
69 | | * \brief Creates an invalid match object |
70 | | */ |
71 | | inline RegularExpressionMatch::RegularExpressionMatch() |
72 | 2.06k | : startp{} |
73 | 2.06k | , endp{} |
74 | 2.06k | , searchstring{} |
75 | 2.06k | { |
76 | 2.06k | } |
77 | | |
78 | | #ifdef _MSC_VER |
79 | | # pragma warning(pop) |
80 | | #endif |
81 | | |
82 | | /** |
83 | | * \brief Returns true if the match pointers are valid |
84 | | */ |
85 | | inline bool RegularExpressionMatch::isValid() const |
86 | 0 | { |
87 | 0 | return (this->startp[0]); |
88 | 0 | } |
89 | | |
90 | | /** |
91 | | * \brief Resets to the (invalid) construction state. |
92 | | */ |
93 | | inline void RegularExpressionMatch::clear() |
94 | 9.85k | { |
95 | 9.85k | startp[0] = nullptr; |
96 | 9.85k | endp[0] = nullptr; |
97 | 9.85k | searchstring = nullptr; |
98 | 9.85k | } |
99 | | |
100 | | /** |
101 | | * \brief Returns the start index of nth submatch. |
102 | | * start(0) is the start of the full match. |
103 | | */ |
104 | | inline std::string::size_type RegularExpressionMatch::start(int n) const |
105 | 0 | { |
106 | 0 | if (!this->startp[n]) { |
107 | 0 | return std::string::npos; |
108 | 0 | } |
109 | 0 | return static_cast<std::string::size_type>(this->startp[n] - |
110 | 0 | this->searchstring); |
111 | 0 | } |
112 | | |
113 | | /** |
114 | | * \brief Returns the end index of nth submatch. |
115 | | * end(0) is the end of the full match. |
116 | | */ |
117 | | inline std::string::size_type RegularExpressionMatch::end(int n) const |
118 | 0 | { |
119 | 0 | if (!this->endp[n]) { |
120 | 0 | return std::string::npos; |
121 | 0 | } |
122 | 0 | return static_cast<std::string::size_type>(this->endp[n] - |
123 | 0 | this->searchstring); |
124 | 0 | } |
125 | | |
126 | | /** |
127 | | * \brief Returns the nth submatch as a string. |
128 | | */ |
129 | | inline std::string RegularExpressionMatch::match(int n) const |
130 | 0 | { |
131 | 0 | if (!this->startp[n]) { |
132 | 0 | return std::string(); |
133 | 0 | } else { |
134 | 0 | return std::string( |
135 | 0 | this->startp[n], |
136 | 0 | static_cast<std::string::size_type>(this->endp[n] - this->startp[n])); |
137 | 0 | } |
138 | 0 | } |
139 | | |
140 | | /** \class RegularExpression |
141 | | * \brief Implements pattern matching with regular expressions. |
142 | | * |
143 | | * This is the header file for the regular expression class. An object of |
144 | | * this class contains a regular expression, in a special "compiled" format. |
145 | | * This compiled format consists of several slots all kept as the objects |
146 | | * private data. The RegularExpression class provides a convenient way to |
147 | | * represent regular expressions. It makes it easy to search for the same |
148 | | * regular expression in many different strings without having to compile a |
149 | | * string to regular expression format more than necessary. |
150 | | * |
151 | | * This class implements pattern matching via regular expressions. |
152 | | * A regular expression allows a programmer to specify complex |
153 | | * patterns that can be searched for and matched against the |
154 | | * character string of a string object. In its simplest form, a |
155 | | * regular expression is a sequence of characters used to |
156 | | * search for exact character matches. However, many times the |
157 | | * exact sequence to be found is not known, or only a match at |
158 | | * the beginning or end of a string is desired. The RegularExpression regu- |
159 | | * lar expression class implements regular expression pattern |
160 | | * matching as is found and implemented in many UNIX commands |
161 | | * and utilities. |
162 | | * |
163 | | * Example: The perl code |
164 | | * |
165 | | * $filename =~ m"([a-z]+)\.cc"; |
166 | | * print $1; |
167 | | * |
168 | | * Is written as follows in C++ |
169 | | * |
170 | | * RegularExpression re("([a-z]+)\\.cc"); |
171 | | * re.find(filename); |
172 | | * cerr << re.match(1); |
173 | | * |
174 | | * |
175 | | * The regular expression class provides a convenient mechanism |
176 | | * for specifying and manipulating regular expressions. The |
177 | | * regular expression object allows specification of such pat- |
178 | | * terns by using the following regular expression metacharac- |
179 | | * ters: |
180 | | * |
181 | | * ^ Matches at beginning of a line |
182 | | * |
183 | | * $ Matches at end of a line |
184 | | * |
185 | | * . Matches any single character |
186 | | * |
187 | | * [ ] Matches any character(s) inside the brackets |
188 | | * |
189 | | * [^ ] Matches any character(s) not inside the brackets |
190 | | * |
191 | | * - Matches any character in range on either side of a dash |
192 | | * |
193 | | * * Matches preceding pattern zero or more times |
194 | | * |
195 | | * + Matches preceding pattern one or more times |
196 | | * |
197 | | * ? Matches preceding pattern zero or once only |
198 | | * |
199 | | * () Saves a matched expression and uses it in a later match |
200 | | * |
201 | | * Note that more than one of these metacharacters can be used |
202 | | * in a single regular expression in order to create complex |
203 | | * search patterns. For example, the pattern [^ab1-9] says to |
204 | | * match any character sequence that does not begin with the |
205 | | * characters "ab" followed by numbers in the series one |
206 | | * through nine. |
207 | | * |
208 | | * There are three constructors for RegularExpression. One just creates an |
209 | | * empty RegularExpression object. Another creates a RegularExpression |
210 | | * object and initializes it with a regular expression that is given in the |
211 | | * form of a char*. The third takes a reference to a RegularExpression |
212 | | * object as an argument and creates an object initialized with the |
213 | | * information from the given RegularExpression object. |
214 | | * |
215 | | * The find member function finds the first occurrence of the regular |
216 | | * expression of that object in the string given to find as an argument. Find |
217 | | * returns a boolean, and if true, mutates the private data appropriately. |
218 | | * Find sets pointers to the beginning and end of the thing last found, they |
219 | | * are pointers into the actual string that was searched. The start and end |
220 | | * member functions return indices into the searched string that correspond |
221 | | * to the beginning and end pointers respectively. The compile member |
222 | | * function takes a char* and puts the compiled version of the char* argument |
223 | | * into the object's private data fields. The == and != operators only check |
224 | | * the to see if the compiled regular expression is the same, and the |
225 | | * deep_equal functions also checks to see if the start and end pointers are |
226 | | * the same. The is_valid function returns false if program is set to |
227 | | * nullptr, (i.e. there is no valid compiled expression). The set_invalid |
228 | | * function sets the program to nullptr (Warning: this deletes the compiled |
229 | | * expression). The following examples may help clarify regular expression |
230 | | * usage: |
231 | | * |
232 | | * * The regular expression "^hello" matches a "hello" only at the |
233 | | * beginning of a line. It would match "hello there" but not "hi, |
234 | | * hello there". |
235 | | * |
236 | | * * The regular expression "long$" matches a "long" only at the end |
237 | | * of a line. It would match "so long\0", but not "long ago". |
238 | | * |
239 | | * * The regular expression "t..t..g" will match anything that has a |
240 | | * "t" then any two characters, another "t", any two characters and |
241 | | * then a "g". It will match "testing", or "test again" but would |
242 | | * not match "toasting" |
243 | | * |
244 | | * * The regular expression "[1-9ab]" matches any number one through |
245 | | * nine, and the characters "a" and "b". It would match "hello 1" |
246 | | * or "begin", but would not match "no-match". |
247 | | * |
248 | | * * The regular expression "[^1-9ab]" matches any character that is |
249 | | * not a number one through nine, or an "a" or "b". It would NOT |
250 | | * match "hello 1" or "begin", but would match "no-match". |
251 | | * |
252 | | * * The regular expression "br* " matches something that begins with |
253 | | * a "b", is followed by zero or more "r"s, and ends in a space. It |
254 | | * would match "brrrrr ", and "b ", but would not match "brrh ". |
255 | | * |
256 | | * * The regular expression "br+ " matches something that begins with |
257 | | * a "b", is followed by one or more "r"s, and ends in a space. It |
258 | | * would match "brrrrr ", and "br ", but would not match "b " or |
259 | | * "brrh ". |
260 | | * |
261 | | * * The regular expression "br? " matches something that begins with |
262 | | * a "b", is followed by zero or one "r"s, and ends in a space. It |
263 | | * would match "br ", and "b ", but would not match "brrrr " or |
264 | | * "brrh ". |
265 | | * |
266 | | * * The regular expression "(..p)b" matches something ending with pb |
267 | | * and beginning with whatever the two characters before the first p |
268 | | * encountered in the line were. It would find "repb" in "rep drepa |
269 | | * qrepb". The regular expression "(..p)a" would find "repa qrepb" |
270 | | * in "rep drepa qrepb" |
271 | | * |
272 | | * * The regular expression "d(..p)" matches something ending with p, |
273 | | * beginning with d, and having two characters in between that are |
274 | | * the same as the two characters before the first p encountered in |
275 | | * the line. It would match "drepa qrepb" in "rep drepa qrepb". |
276 | | * |
277 | | * All methods of RegularExpression can be called simultaneously from |
278 | | * different threads but only if each invocation uses an own instance of |
279 | | * RegularExpression. |
280 | | */ |
281 | | class cmsys_EXPORT RegularExpression |
282 | | { |
283 | | public: |
284 | | enum Options : unsigned |
285 | | { |
286 | | // Match ^ at offset instead of the input start. |
287 | | BOL_AT_OFFSET = 1, |
288 | | // If an empty match is found at offset, continue searching. |
289 | | NONEMPTY_AT_OFFSET = 2, |
290 | | }; |
291 | | |
292 | | /** |
293 | | * Instantiate RegularExpression with program=nullptr. |
294 | | */ |
295 | | inline RegularExpression(); |
296 | | |
297 | | /** |
298 | | * Instantiate RegularExpression with compiled char*. |
299 | | */ |
300 | | inline RegularExpression(char const*); |
301 | | |
302 | | /** |
303 | | * Instantiate RegularExpression as a copy of another regular expression. |
304 | | */ |
305 | | RegularExpression(RegularExpression const&); |
306 | | |
307 | | /** |
308 | | * Instantiate RegularExpression with compiled string. |
309 | | */ |
310 | | inline RegularExpression(std::string const&); |
311 | | |
312 | | /** |
313 | | * Destructor. |
314 | | */ |
315 | | inline ~RegularExpression(); |
316 | | |
317 | | /** |
318 | | * Compile a regular expression into internal code |
319 | | * for later pattern matching. |
320 | | */ |
321 | | bool compile(char const*); |
322 | | |
323 | | /** |
324 | | * Compile a regular expression into internal code |
325 | | * for later pattern matching. |
326 | | */ |
327 | | inline bool compile(std::string const&); |
328 | | |
329 | | /** |
330 | | * Matches the regular expression to the given string. |
331 | | * Returns true if found, and sets start and end indexes |
332 | | * in the RegularExpressionMatch instance accordingly. |
333 | | * |
334 | | * This method is thread safe when called with different |
335 | | * RegularExpressionMatch instances. |
336 | | */ |
337 | | bool find(char const*, RegularExpressionMatch&, |
338 | | std::string::size_type offset = 0, unsigned options = 0) const; |
339 | | |
340 | | /** |
341 | | * Matches the regular expression to the given string. |
342 | | * Returns true if found, and sets start and end indexes accordingly. |
343 | | */ |
344 | | inline bool find(char const*, std::string::size_type offset = 0, |
345 | | unsigned options = 0); |
346 | | |
347 | | /** |
348 | | * Matches the regular expression to the given std string. |
349 | | * Returns true if found, and sets start and end indexes accordingly. |
350 | | */ |
351 | | inline bool find(std::string const&, std::string::size_type offset = 0, |
352 | | unsigned options = 0); |
353 | | |
354 | | /** |
355 | | * Match indices |
356 | | */ |
357 | | inline RegularExpressionMatch const& regMatch() const; |
358 | | inline std::string::size_type start(int n = 0) const; |
359 | | inline std::string::size_type end(int n = 0) const; |
360 | | |
361 | | /** |
362 | | * Match strings |
363 | | */ |
364 | | inline std::string match(int n = 0) const; |
365 | | |
366 | | /** |
367 | | * Copy the given regular expression. |
368 | | */ |
369 | | RegularExpression& operator=(RegularExpression const& rxp); |
370 | | |
371 | | /** |
372 | | * Returns true if two regular expressions have the same |
373 | | * compiled program for pattern matching. |
374 | | */ |
375 | | bool operator==(RegularExpression const&) const; |
376 | | |
377 | | /** |
378 | | * Returns true if two regular expressions have different |
379 | | * compiled program for pattern matching. |
380 | | */ |
381 | | inline bool operator!=(RegularExpression const&) const; |
382 | | |
383 | | /** |
384 | | * Returns true if have the same compiled regular expressions |
385 | | * and the same start and end pointers. |
386 | | */ |
387 | | bool deep_equal(RegularExpression const&) const; |
388 | | |
389 | | /** |
390 | | * True if the compiled regexp is valid. |
391 | | */ |
392 | | inline bool is_valid() const; |
393 | | |
394 | | /** |
395 | | * Marks the regular expression as invalid. |
396 | | */ |
397 | | inline void set_invalid(); |
398 | | |
399 | | /** |
400 | | * The number of capture groups. |
401 | | */ |
402 | | inline int num_groups(); |
403 | | |
404 | | private: |
405 | | RegularExpressionMatch regmatch; |
406 | | char regstart; // Internal use only |
407 | | char reganch; // Internal use only |
408 | | char const* regmust; // Internal use only |
409 | | std::string::size_type regmlen; // Internal use only |
410 | | char* program; |
411 | | int progsize; |
412 | | int regnpar; |
413 | | }; |
414 | | |
415 | | /** |
416 | | * Create an empty regular expression. |
417 | | */ |
418 | | inline RegularExpression::RegularExpression() |
419 | 2.04k | : regstart{} |
420 | 2.04k | , reganch{} |
421 | 2.04k | , regmust{} |
422 | 2.04k | , program{ nullptr } |
423 | 2.04k | , progsize{} |
424 | 2.04k | , regnpar{} |
425 | 2.04k | { |
426 | 2.04k | } |
427 | | |
428 | | /** |
429 | | * Creates a regular expression from string s, and |
430 | | * compiles s. |
431 | | */ |
432 | | inline RegularExpression::RegularExpression(char const* s) |
433 | 18 | : regstart{} |
434 | 18 | , reganch{} |
435 | 18 | , regmust{} |
436 | 18 | , program{ nullptr } |
437 | 18 | , progsize{} |
438 | 18 | , regnpar{} |
439 | 18 | { |
440 | 18 | if (s) { |
441 | 18 | this->compile(s); |
442 | 18 | } |
443 | 18 | } |
444 | | |
445 | | /** |
446 | | * Creates a regular expression from string s, and |
447 | | * compiles s. |
448 | | */ |
449 | | inline RegularExpression::RegularExpression(std::string const& s) |
450 | 0 | : regstart{} |
451 | 0 | , reganch{} |
452 | 0 | , regmust{} |
453 | 0 | , program{ nullptr } |
454 | 0 | , progsize{} |
455 | 0 | , regnpar{} |
456 | 0 | { |
457 | 0 | this->compile(s); |
458 | 0 | } |
459 | | |
460 | | /** |
461 | | * Destroys and frees space allocated for the regular expression. |
462 | | */ |
463 | | inline RegularExpression::~RegularExpression() |
464 | 2.04k | { |
465 | | // #ifndef _WIN32 |
466 | 2.04k | delete[] this->program; |
467 | | // #endif |
468 | 2.04k | } |
469 | | |
470 | | /** |
471 | | * Compile a regular expression into internal code |
472 | | * for later pattern matching. |
473 | | */ |
474 | | inline bool RegularExpression::compile(std::string const& s) |
475 | 2.03k | { |
476 | 2.03k | return this->compile(s.c_str()); |
477 | 2.03k | } |
478 | | |
479 | | /** |
480 | | * Matches the regular expression to the given std string. |
481 | | * Returns true if found, and sets start and end indexes accordingly. |
482 | | */ |
483 | | inline bool RegularExpression::find(char const* s, |
484 | | std::string::size_type offset, |
485 | | unsigned options) |
486 | 3.70k | { |
487 | 3.70k | return this->find(s, this->regmatch, offset, options); |
488 | 3.70k | } |
489 | | |
490 | | /** |
491 | | * Matches the regular expression to the given std string. |
492 | | * Returns true if found, and sets start and end indexes accordingly. |
493 | | */ |
494 | | inline bool RegularExpression::find(std::string const& s, |
495 | | std::string::size_type offset, |
496 | | unsigned options) |
497 | 4.26k | { |
498 | 4.26k | return this->find(s.c_str(), this->regmatch, offset, options); |
499 | 4.26k | } |
500 | | |
501 | | /** |
502 | | * Returns the internal match object |
503 | | */ |
504 | | inline RegularExpressionMatch const& RegularExpression::regMatch() const |
505 | 0 | { |
506 | 0 | return this->regmatch; |
507 | 0 | } |
508 | | |
509 | | /** |
510 | | * Return start index of nth submatch. start(0) is the start of the full match. |
511 | | */ |
512 | | inline std::string::size_type RegularExpression::start(int n) const |
513 | 0 | { |
514 | 0 | return regmatch.start(n); |
515 | 0 | } |
516 | | |
517 | | /** |
518 | | * Return end index of nth submatch. end(0) is the end of the full match. |
519 | | */ |
520 | | inline std::string::size_type RegularExpression::end(int n) const |
521 | 0 | { |
522 | 0 | return regmatch.end(n); |
523 | 0 | } |
524 | | |
525 | | /** |
526 | | * Return nth submatch as a string. |
527 | | */ |
528 | | inline std::string RegularExpression::match(int n) const |
529 | 0 | { |
530 | 0 | return regmatch.match(n); |
531 | 0 | } |
532 | | |
533 | | /** |
534 | | * Returns true if two regular expressions have different |
535 | | * compiled program for pattern matching. |
536 | | */ |
537 | | inline bool RegularExpression::operator!=(RegularExpression const& r) const |
538 | 0 | { |
539 | 0 | return (!(*this == r)); |
540 | 0 | } |
541 | | |
542 | | /** |
543 | | * Returns true if a valid regular expression is compiled |
544 | | * and ready for pattern matching. |
545 | | */ |
546 | | inline bool RegularExpression::is_valid() const |
547 | 0 | { |
548 | 0 | return (this->program); |
549 | 0 | } |
550 | | |
551 | | inline void RegularExpression::set_invalid() |
552 | 0 | { |
553 | 0 | // #ifndef _WIN32 |
554 | 0 | delete[] this->program; |
555 | 0 | // #endif |
556 | 0 | this->program = nullptr; |
557 | 0 | } |
558 | | |
559 | | inline int RegularExpression::num_groups() |
560 | 0 | { |
561 | 0 | return this->regnpar - 1; |
562 | 0 | } |
563 | | |
564 | | } // namespace cmsys |
565 | | |
566 | | #endif |