Coverage Report

Created: 2025-06-13 06:07

/src/poco/Foundation/include/Poco/RegularExpression.h
Line
Count
Source (jump to first uncovered line)
1
//
2
// RegularExpression.h
3
//
4
// Library: Foundation
5
// Package: RegExp
6
// Module:  RegularExpression
7
//
8
// Definitions of class RegularExpression.
9
//
10
// A wrapper class for Philip Hazel's PCRE - Perl Compatible Regular Expressions
11
// library (http://www.pcre.org).
12
//
13
// Copyright (c) 2004-2006, Applied Informatics Software Engineering GmbH.
14
// and Contributors.
15
//
16
// SPDX-License-Identifier: BSL-1.0
17
//
18
19
20
#ifndef Foundation_RegularExpression_INCLUDED
21
#define Foundation_RegularExpression_INCLUDED
22
23
24
#include "Poco/Foundation.h"
25
#include <vector>
26
#include <map>
27
28
29
namespace Poco {
30
31
32
class Foundation_API RegularExpression
33
  /// A class for working with regular expressions.
34
  /// Implemented using PCRE2, the Perl Compatible
35
  /// Regular Expressions library by Philip Hazel
36
  /// (see http://www.pcre.org).
37
{
38
public:
39
  enum Options
40
    /// Some of the following options can only be passed to the constructor;
41
    /// some can be passed only to matching functions, and some can be used
42
    /// everywhere.
43
    ///
44
    ///   * Options marked [ctor] can be passed to the constructor.
45
    ///   * Options marked [match] can be passed to match, extract, split and subst.
46
    ///   * Options marked [subst] can be passed to subst.
47
    ///
48
    /// See the PCRE documentation for more information.
49
  {
50
    RE_CASELESS        = 0x00000001, /// case insensitive matching (/i) [ctor]
51
    RE_MULTILINE       = 0x00000002, /// enable multi-line mode; affects ^ and $ (/m) [ctor]
52
    RE_DOTALL          = 0x00000004, /// dot matches all characters, including newline (/s) [ctor]
53
    RE_EXTENDED        = 0x00000008, /// totally ignore whitespace (/x) [ctor]
54
    RE_ANCHORED        = 0x00000010, /// treat pattern as if it starts with a ^ [ctor, match]
55
    RE_DOLLAR_ENDONLY  = 0x00000020, /// dollar matches end-of-string only, not last newline in string [ctor]
56
    RE_EXTRA           = 0x00000040, /// enable optional PCRE functionality [ctor]
57
    RE_NOTBOL          = 0x00000080, /// circumflex does not match beginning of string [match]
58
    RE_NOTEOL          = 0x00000100, /// $ does not match end of string [match]
59
    RE_UNGREEDY        = 0x00000200, /// make quantifiers ungreedy [ctor]
60
    RE_NOTEMPTY        = 0x00000400, /// empty string never matches [match]
61
    RE_UTF8            = 0x00000800, /// assume pattern and subject is UTF-8 encoded [ctor]
62
    RE_NO_AUTO_CAPTURE = 0x00001000, /// disable numbered capturing parentheses [ctor, match]
63
    RE_NO_UTF8_CHECK   = 0x00002000, /// do not check validity of UTF-8 code sequences [match]
64
    RE_FIRSTLINE       = 0x00040000, /// an  unanchored  pattern  is  required  to  match
65
                                     /// before  or  at  the  first  newline  in  the subject string,
66
                                     /// though the matched text may continue over the newline [ctor]
67
    RE_DUPNAMES        = 0x00080000, /// names used to identify capturing  subpatterns need not be unique [ctor]
68
    RE_NEWLINE_CR      = 0x00100000, /// assume newline is CR ('\r'), the default [ctor]
69
    RE_NEWLINE_LF      = 0x00200000, /// assume newline is LF ('\n') [ctor]
70
    RE_NEWLINE_CRLF    = 0x00300000, /// assume newline is CRLF ("\r\n") [ctor]
71
    RE_NEWLINE_ANY     = 0x00400000, /// assume newline is any valid Unicode newline character [ctor]
72
    RE_NEWLINE_ANYCRLF = 0x00500000, /// assume newline is any of CR, LF, CRLF [ctor]
73
    RE_GLOBAL          = 0x10000000, /// replace all occurences (/g) [subst]
74
    RE_NO_VARS         = 0x20000000  /// treat dollar in replacement string as ordinary character [subst]
75
  };
76
77
  struct Match
78
  {
79
    std::string::size_type offset; /// zero based offset (std::string::npos if subexpr does not match)
80
    std::string::size_type length; /// length of substring
81
    std::string name;              /// name of group
82
  };
83
  using MatchVec = std::vector<Match>;
84
  using GroupMap = std::map<int, std::string>;
85
86
  RegularExpression(const std::string& pattern, int options = 0, bool study = true);
87
    /// Creates a regular expression and parses the given pattern.
88
    /// Note: the study argument is only provided for backwards compatibility
89
    /// and is ignored since POCO release 1.12, which uses PCRE2.
90
    /// For a description of the options, please see the PCRE documentation.
91
    /// Throws a RegularExpressionException if the patter cannot be compiled.
92
93
  ~RegularExpression();
94
    /// Destroys the regular expression.
95
96
  int match(const std::string& subject, Match& mtch, int options = 0) const;
97
    /// Matches the given subject string against the pattern. Returns the position
98
    /// of the first captured substring in mtch.
99
    /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
100
    /// mtch.length is 0.
101
    /// Throws a RegularExpressionException in case of an error.
102
    /// Returns the number of matches.
103
104
  int match(const std::string& subject, std::string::size_type offset, Match& mtch, int options = 0) const;
105
    /// Matches the given subject string, starting at offset, against the pattern.
106
    /// Returns the position of the captured substring in mtch.
107
    /// If no part of the subject matches the pattern, mtch.offset is std::string::npos and
108
    /// mtch.length is 0.
109
    /// Throws a RegularExpressionException in case of an error.
110
    /// Returns the number of matches.
111
112
  int match(const std::string& subject, std::string::size_type offset, MatchVec& matches, int options = 0) const;
113
    /// Matches the given subject string against the pattern.
114
    /// The first entry in matches contains the position of the captured substring.
115
    /// The following entries identify matching subpatterns. See the PCRE documentation
116
    /// for a more detailed explanation.
117
    /// If no part of the subject matches the pattern, matches is empty.
118
    /// Throws a RegularExpressionException in case of an error.
119
    /// Returns the number of matches.
120
121
  bool match(const std::string& subject, std::string::size_type offset = 0) const;
122
    /// Returns true if and only if the subject matches the regular expression.
123
    ///
124
    /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
125
    /// matching, which means that the empty string will never match and
126
    /// the pattern is treated as if it starts with a ^.
127
128
  bool match(const std::string& subject, std::string::size_type offset, int options) const;
129
    /// Returns true if and only if the subject matches the regular expression.
130
131
  bool operator == (const std::string& subject) const;
132
    /// Returns true if and only if the subject matches the regular expression.
133
    ///
134
    /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
135
    /// matching, which means that the empty string will never match and
136
    /// the pattern is treated as if it starts with a ^.
137
138
  bool operator != (const std::string& subject) const;
139
    /// Returns true if and only if the subject does not match the regular expression.
140
    ///
141
    /// Internally, this method sets the RE_ANCHORED and RE_NOTEMPTY options for
142
    /// matching, which means that the empty string will never match and
143
    /// the pattern is treated as if it starts with a ^.
144
145
  int extract(const std::string& subject, std::string& str, int options = 0) const;
146
    /// Matches the given subject string against the pattern.
147
    /// Returns the captured string.
148
    /// Throws a RegularExpressionException in case of an error.
149
    /// Returns the number of matches.
150
151
  int extract(const std::string& subject, std::string::size_type offset, std::string& str, int options = 0) const;
152
    /// Matches the given subject string, starting at offset, against the pattern.
153
    /// Returns the captured string.
154
    /// Throws a RegularExpressionException in case of an error.
155
    /// Returns the number of matches.
156
157
  int split(const std::string& subject, std::vector<std::string>& strings, int options = 0) const;
158
    /// Matches the given subject string against the pattern.
159
    /// The first entry in captured is the captured substring.
160
    /// The following entries contain substrings matching subpatterns. See the PCRE documentation
161
    /// for a more detailed explanation.
162
    /// If no part of the subject matches the pattern, captured is empty.
163
    /// Throws a RegularExpressionException in case of an error.
164
    /// Returns the number of matches.
165
166
  int split(const std::string& subject, std::string::size_type offset, std::vector<std::string>& strings, int options = 0) const;
167
    /// Matches the given subject string against the pattern.
168
    /// The first entry in captured is the captured substring.
169
    /// The following entries contain substrings matching subpatterns. See the PCRE documentation
170
    /// for a more detailed explanation.
171
    /// If no part of the subject matches the pattern, captured is empty.
172
    /// Throws a RegularExpressionException in case of an error.
173
    /// Returns the number of matches.
174
175
  int subst(std::string& subject, const std::string& replacement, int options = 0) const;
176
    /// Substitute in subject all matches of the pattern with replacement.
177
    /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
178
    /// only the first match is replaced.
179
    /// Occurrences of $<n> (for example, $1, $2, ...) in replacement are replaced
180
    /// with the corresponding captured string. $0 is the original subject string.
181
    /// Returns the number of replaced occurrences.
182
183
  int subst(std::string& subject, std::string::size_type offset, const std::string& replacement, int options = 0) const;
184
    /// Substitute in subject all matches of the pattern with replacement,
185
    /// starting at offset.
186
    /// If RE_GLOBAL is specified as option, all matches are replaced. Otherwise,
187
    /// only the first match is replaced.
188
    /// Unless RE_NO_VARS is specified, occurrences of $<n> (for example, $0, $1, $2, ... $9)
189
    /// in replacement are replaced with the corresponding captured string.
190
    /// $0 is the captured substring. $1 ... $n are the substrings matching the subpatterns.
191
    /// Returns the number of replaced occurrences.
192
193
  static bool match(const std::string& subject, const std::string& pattern, int options = 0);
194
    /// Matches the given subject string against the regular expression given in pattern,
195
    /// using the given options.
196
197
protected:
198
  std::string::size_type substOne(std::string& subject, std::string::size_type offset, const std::string& replacement, int options) const;
199
  static int compileOptions(int options);
200
  static int matchOptions(int options);
201
202
private:
203
  // Note: to avoid a dependency on the pcre2.h header the following are
204
  // declared as void* and casted to the correct type in the implementation file.
205
  void* _pcre;  // Actual type is pcre2_code_8*
206
207
  GroupMap _groups;
208
209
  RegularExpression();
210
  RegularExpression(const RegularExpression&);
211
  RegularExpression& operator = (const RegularExpression&);
212
};
213
214
215
//
216
// inlines
217
//
218
inline int RegularExpression::match(const std::string& subject, Match& mtch, int options) const
219
0
{
220
0
  return match(subject, 0, mtch, options);
221
0
}
222
223
224
inline int RegularExpression::split(const std::string& subject, std::vector<std::string>& strings, int options) const
225
0
{
226
0
  return split(subject, 0, strings, options);
227
0
}
228
229
230
inline int RegularExpression::subst(std::string& subject, const std::string& replacement, int options) const
231
0
{
232
0
  return subst(subject, 0, replacement, options);
233
0
}
234
235
236
inline bool RegularExpression::operator == (const std::string& subject) const
237
0
{
238
0
  return match(subject);
239
0
}
240
241
242
inline bool RegularExpression::operator != (const std::string& subject) const
243
0
{
244
0
  return !match(subject);
245
0
}
246
247
248
} // namespace Poco
249
250
251
#endif // Foundation_RegularExpression_INCLUDED