Coverage Report

Created: 2026-02-10 06:42

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/trafficserver/include/tsutil/Regex.h
Line
Count
Source
1
/** @file
2
3
  A brief file description
4
5
  @section license License
6
7
  Licensed to the Apache Software Foundation (ASF) under one
8
  or more contributor license agreements.  See the NOTICE file
9
  distributed with this work for additional information
10
  regarding copyright ownership.  The ASF licenses this file
11
  to you under the Apache License, Version 2.0 (the
12
  "License"); you may not use this file except in compliance
13
  with the License.  You may obtain a copy of the License at
14
15
      http://www.apache.org/licenses/LICENSE-2.0
16
17
  Unless required by applicable law or agreed to in writing, software
18
  distributed under the License is distributed on an "AS IS" BASIS,
19
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20
  See the License for the specific language governing permissions and
21
  limitations under the License.
22
 */
23
24
#pragma once
25
26
#include <string_view>
27
#include <string>
28
#include <vector>
29
#include <memory>
30
31
/// @brief Match flags for regular expression evaluation.
32
///
33
/// @internal These values are copied from pcre2.h, to avoid having to include it.  The values are checked (with
34
/// static_assert) in Regex.cc against PCRE2 named constants, in case they change in future PCRE2 releases.
35
enum REFlags {
36
  RE_CASE_INSENSITIVE = 0x00000008u, ///< Ignore case (by default, matches are case sensitive).
37
  RE_UNANCHORED       = 0x00000400u, ///< Unanchored (@a DFA defaults to anchored).
38
  RE_ANCHORED         = 0x80000000u, ///< Anchored (@a Regex defaults to unanchored).
39
  RE_NOTEMPTY         = 0x00000004u  ///< Not empty (by default, matches may match empty string).
40
};
41
42
/// @brief Error codes returned by regular expression operations.
43
///
44
/// @internal As with REFlags, these values are copied from pcre2.h, to avoid having to include it.
45
enum REErrors {
46
  RE_ERROR_NOMATCH = -1, ///< No match found.
47
  RE_ERROR_NULL    = -51 ///< NULL code or subject was passed.
48
};
49
50
/// @brief Wrapper for PCRE2 match data.
51
class RegexMatches
52
{
53
  friend class Regex;
54
55
public:
56
  /** Construct a new RegexMatches object.
57
   *
58
   * @param size The number of matches to allocate space for.
59
   */
60
  RegexMatches(uint32_t size = DEFAULT_MATCHES);
61
  ~RegexMatches();
62
63
  /** Get the match at the given index.
64
   *
65
   * @return The match at the given index.
66
   */
67
  std::string_view operator[](size_t index) const;
68
  /** Get the ovector pointer for the capture groups.  Don't use this unless you know what you are doing.
69
   *
70
   * @return ovector pointer.
71
   */
72
  size_t *get_ovector_pointer();
73
  int32_t size() const;
74
75
private:
76
  constexpr static uint32_t DEFAULT_MATCHES = 10;
77
  static void              *malloc(size_t size, void *caller);
78
  static void               free(void *p, void *caller);
79
  std::string_view          _subject;
80
  char    _buffer[24 + 96 + 28 * DEFAULT_MATCHES]; // 24 bytes for the general context, 96 bytes overhead, 28 bytes per match.
81
  size_t  _buffer_bytes_used = 0;
82
  int32_t _size              = 0;
83
84
  /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex
85
  /// API (see Regex.cc).
86
  struct _MatchData;
87
  class _MatchDataPtr
88
  {
89
    friend struct _MatchData;
90
91
  private:
92
    void *_ptr = nullptr;
93
  };
94
  _MatchDataPtr _match_data;
95
};
96
97
/// @brief Wrapper for PCRE2 match context
98
///
99
/// @internal This instance is not tied to any Regex and can be used with one of the Regex::exec overloads.
100
class RegexMatchContext
101
{
102
  friend class Regex;
103
104
public:
105
  /** Construct a new RegexMatchContext object.
106
   */
107
  RegexMatchContext();
108
  ~RegexMatchContext();
109
110
  /// uses pcre2_match_context_copy for a deep copy.
111
  RegexMatchContext(RegexMatchContext const &orig);
112
  RegexMatchContext &operator=(RegexMatchContext const &orig);
113
114
  RegexMatchContext(RegexMatchContext &&)            = default;
115
  RegexMatchContext &operator=(RegexMatchContext &&) = default;
116
117
  /** Limits the amount of backtracking that can take place.
118
   * Any regex exec call that fails will return PCRE2_ERROR_MATCHLIMIT(-47)
119
   */
120
  void set_match_limit(uint32_t limit);
121
122
private:
123
  /// @internal This wraps a void* so to avoid requiring a pcre2 include.
124
  struct _MatchContext;
125
  struct _MatchContextPtr {
126
    void *_ptr = nullptr;
127
  };
128
129
  _MatchContextPtr _match_context;
130
};
131
132
/// @brief Wrapper for PCRE2 regular expression.
133
class Regex
134
{
135
public:
136
0
  Regex() = default;
137
  /** Deep copy constructor.
138
   *
139
   * Creates a new Regex object with a deep copy of the compiled pattern.
140
   * Uses pcre2_code_copy() to duplicate the compiled pattern without
141
   * requiring the original pattern string.
142
   *
143
   * @param other The Regex object to copy from.
144
   */
145
  Regex(Regex const &other);
146
  /** Deep copy assignment operator.
147
   *
148
   * Replaces the current compiled pattern with a deep copy of the other's pattern.
149
   *
150
   * @param other The Regex object to copy from.
151
   * @return Reference to this object.
152
   */
153
  Regex &operator=(Regex const &other);
154
  Regex(Regex &&that) noexcept;
155
  Regex &operator=(Regex &&other);
156
  ~Regex();
157
158
  /** Compile the @a pattern into a regular expression.
159
   *
160
   * @param pattern Source pattern for regular expression (null terminated).
161
   * @param flags Compilation flags.
162
   * @return @a true if compiled successfully, @a false otherwise.
163
   *
164
   * @a flags should be the bitwise @c or of @c REFlags values.
165
   */
166
  bool compile(std::string_view pattern, uint32_t flags = 0);
167
168
  /** Compile the @a pattern into a regular expression.
169
   *
170
   * @param pattern Source pattern for regular expression (null terminated).
171
   * @param error String to receive error message.
172
   * @param erroffset Pointer to integer to receive error offset.
173
   * @param flags Compilation flags.
174
   * @return @a true if compiled successfully, @a false otherwise.
175
   *
176
   * @a flags should be the bitwise @c or of @c REFlags values.
177
   */
178
  bool compile(std::string_view pattern, std::string &error, int &erroffset, unsigned flags = 0);
179
180
  /** Execute the regular expression.
181
   *
182
   * @param subject String to match against.
183
   * @return @c true if the pattern matched, @a false if not.
184
   *
185
   * It is safe to call this method concurrently on the same instance of @a this.
186
   */
187
  bool exec(std::string_view subject) const;
188
189
  /** Execute the regular expression.
190
   *
191
   * @param subject String to match against.
192
   * @param flags Match flags (e.g., RE_NOTEMPTY).
193
   * @return @c true if the pattern matched, @a false if not.
194
   *
195
   * It is safe to call this method concurrently on the same instance of @a this.
196
   */
197
  bool exec(std::string_view subject, uint32_t flags) const;
198
199
  /** Execute the regular expression.
200
   *
201
   * @param subject String to match against.
202
   * @param matches Place to store the capture groups.
203
   * @return @c The number of capture groups. < 0 if an error occurred. 0 if the number of Matches is too small.
204
   *
205
   * It is safe to call this method concurrently on the same instance of @a this.
206
   *
207
   * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize must
208
   * be a multiple of 3 and at least three times the number of desired capture groups.
209
   */
210
  int exec(std::string_view subject, RegexMatches &matches) const;
211
212
  /** Execute the regular expression.
213
   *
214
   * @param subject String to match against.
215
   * @param matches Place to store the capture groups.
216
   * @param flags Match flags (e.g., RE_NOTEMPTY).
217
   * @param optional context Match context (set matching limits).
218
   * @return @c The number of capture groups. < 0 if an error occurred. 0 if the number of Matches is too small.
219
   *
220
   * It is safe to call this method concurrently on the same instance of @a this.
221
   *
222
   * Each capture group takes 3 elements of @a ovector, therefore @a ovecsize must
223
   * be a multiple of 3 and at least three times the number of desired capture groups.
224
   */
225
  int exec(std::string_view subject, RegexMatches &matches, uint32_t flags,
226
           RegexMatchContext const *const matchContext = nullptr) const;
227
228
  /** Error string for exec failure.
229
   *
230
   * @param int return code from exec call.
231
   */
232
  static std::string get_error_string(int rc);
233
234
  /// @return The number of capture groups in the compiled pattern, -1 for fail.
235
  int32_t get_capture_count() const;
236
237
  /// @return number of highest back references, -1 for fail.
238
  int32_t get_backref_max() const;
239
240
  /// @return Is the compiled pattern empty?
241
  bool empty() const;
242
243
private:
244
  /// @internal This effectively wraps a void* so that we can avoid requiring the pcre2.h include for the user of the Regex
245
  /// API (see Regex.cc).
246
  struct _Code;
247
  class _CodePtr
248
  {
249
    friend struct _Code;
250
251
  private:
252
    void *_ptr = nullptr;
253
  };
254
  _CodePtr _code;
255
};
256
257
/** Deterministic Finite state Automata container.
258
 *
259
 * This contains a set of patterns (which may be of size 1) and matches if any of the patterns
260
 * match.
261
 */
262
class DFA
263
{
264
public:
265
0
  DFA() = default;
266
  ~DFA();
267
268
  /// @return The number of patterns successfully compiled.
269
  int32_t compile(const std::string_view pattern, unsigned flags = 0);
270
  /// @return The number of patterns successfully compiled.
271
  int32_t compile(const std::string_view *const patterns, int npatterns, unsigned flags = 0);
272
  /// @return The number of patterns successfully compiled.
273
  int32_t compile(const char *const *patterns, int npatterns, unsigned flags = 0);
274
275
  /** Match @a str against the internal patterns.
276
   *
277
   * @param str String to match.
278
   * @return Index of the matched pattern, -1 if no match.
279
   */
280
  int32_t match(std::string_view str) const;
281
282
private:
283
  struct Pattern {
284
0
    Pattern(Regex &&rxp, std::string &&s) : _re(std::move(rxp)), _p(std::move(s)) {}
285
    Regex       _re; ///< The compile pattern.
286
    std::string _p;  ///< The original pattern.
287
  };
288
289
  /** Compile @a pattern and add it to the pattern set.
290
   *
291
   * @param pattern Regular expression to compile.
292
   * @param flags Regular expression compilation flags.
293
   * @return @c true if @a pattern was successfully compiled, @c false if not.
294
   */
295
  bool build(std::string_view pattern, unsigned flags = 0);
296
297
  std::vector<Pattern> _patterns;
298
};