/src/php-src/ext/pcre/pcre2lib/pcre2_error.c
Line | Count | Source |
1 | | /************************************************* |
2 | | * Perl-Compatible Regular Expressions * |
3 | | *************************************************/ |
4 | | |
5 | | /* PCRE is a library of functions to support regular expressions whose syntax |
6 | | and semantics are as close as possible to those of the Perl 5 language. |
7 | | |
8 | | Written by Philip Hazel |
9 | | Original API code Copyright (c) 1997-2012 University of Cambridge |
10 | | New API code Copyright (c) 2016-2024 University of Cambridge |
11 | | |
12 | | ----------------------------------------------------------------------------- |
13 | | Redistribution and use in source and binary forms, with or without |
14 | | modification, are permitted provided that the following conditions are met: |
15 | | |
16 | | * Redistributions of source code must retain the above copyright notice, |
17 | | this list of conditions and the following disclaimer. |
18 | | |
19 | | * Redistributions in binary form must reproduce the above copyright |
20 | | notice, this list of conditions and the following disclaimer in the |
21 | | documentation and/or other materials provided with the distribution. |
22 | | |
23 | | * Neither the name of the University of Cambridge nor the names of its |
24 | | contributors may be used to endorse or promote products derived from |
25 | | this software without specific prior written permission. |
26 | | |
27 | | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
28 | | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
29 | | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
30 | | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
31 | | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
32 | | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
33 | | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
34 | | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
35 | | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
36 | | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
37 | | POSSIBILITY OF SUCH DAMAGE. |
38 | | ----------------------------------------------------------------------------- |
39 | | */ |
40 | | |
41 | | |
42 | | #ifdef HAVE_CONFIG_H |
43 | | #include "config.h" |
44 | | #endif |
45 | | |
46 | | #include "pcre2_internal.h" |
47 | | |
48 | | #define STRING(a) # a |
49 | | #define XSTRING(s) STRING(s) |
50 | | |
51 | | /* The texts of compile-time error messages. Compile-time error numbers start |
52 | | at COMPILE_ERROR_BASE (100). |
53 | | |
54 | | This used to be a table of strings, but in order to reduce the number of |
55 | | relocations needed when a shared library is loaded dynamically, it is now one |
56 | | long string. We cannot use a table of offsets, because the lengths of inserts |
57 | | such as XSTRING(MAX_NAME_SIZE) are not known. Instead, |
58 | | pcre2_get_error_message() counts through to the one it wants - this isn't a |
59 | | performance issue because these strings are used only when there is an error. |
60 | | |
61 | | Each substring ends with \0 to insert a null character. This includes the final |
62 | | substring, so that the whole string ends with \0\0, which can be detected when |
63 | | counting through. */ |
64 | | |
65 | | static const unsigned char compile_error_texts[] = |
66 | | "no error\0" |
67 | | "\\ at end of pattern\0" |
68 | | "\\c at end of pattern\0" |
69 | | "unrecognized character follows \\\0" |
70 | | "numbers out of order in {} quantifier\0" |
71 | | /* 5 */ |
72 | | "number too big in {} quantifier\0" |
73 | | "missing terminating ] for character class\0" |
74 | | "escape sequence is invalid in character class\0" |
75 | | "range out of order in character class\0" |
76 | | "quantifier does not follow a repeatable item\0" |
77 | | /* 10 */ |
78 | | "internal error: unexpected repeat\0" |
79 | | "unrecognized character after (? or (?-\0" |
80 | | "POSIX named classes are supported only within a class\0" |
81 | | "POSIX collating elements are not supported\0" |
82 | | "missing closing parenthesis\0" |
83 | | /* 15 */ |
84 | | "reference to non-existent subpattern\0" |
85 | | "pattern passed as NULL with non-zero length\0" |
86 | | "unrecognised compile-time option bit(s)\0" |
87 | | "missing ) after (?# comment\0" |
88 | | "parentheses are too deeply nested\0" |
89 | | /* 20 */ |
90 | | "regular expression is too large\0" |
91 | | "failed to allocate heap memory\0" |
92 | | "unmatched closing parenthesis\0" |
93 | | "internal error: code overflow\0" |
94 | | "missing closing parenthesis for condition\0" |
95 | | /* 25 */ |
96 | | "length of lookbehind assertion is not limited\0" |
97 | | "a relative value of zero is not allowed\0" |
98 | | "conditional subpattern contains more than two branches\0" |
99 | | "assertion expected after (?( or (?(?C)\0" |
100 | | "digit expected after (?+ or (?-\0" |
101 | | /* 30 */ |
102 | | "unknown POSIX class name\0" |
103 | | "internal error in pcre2_study(): should not occur\0" |
104 | | "this version of PCRE2 does not have Unicode support\0" |
105 | | "parentheses are too deeply nested (stack check)\0" |
106 | | "character code point value in \\x{} or \\o{} is too large\0" |
107 | | /* 35 */ |
108 | | "lookbehind is too complicated\0" |
109 | | "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" |
110 | | "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" |
111 | | "number after (?C is greater than 255\0" |
112 | | "closing parenthesis for (?C expected\0" |
113 | | /* 40 */ |
114 | | "invalid escape sequence in (*VERB) name\0" |
115 | | "unrecognized character after (?P\0" |
116 | | "syntax error in subpattern name (missing terminator?)\0" |
117 | | "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" |
118 | | "subpattern name must start with a non-digit\0" |
119 | | /* 45 */ |
120 | | "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" |
121 | | "malformed \\P or \\p sequence\0" |
122 | | "unknown property after \\P or \\p\0" |
123 | | "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" |
124 | | "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" |
125 | | /* 50 */ |
126 | | "invalid range in character class\0" |
127 | | "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" |
128 | | "internal error: overran compiling workspace\0" |
129 | | "internal error: previously-checked referenced subpattern not found\0" |
130 | | "DEFINE subpattern contains more than one branch\0" |
131 | | /* 55 */ |
132 | | "missing opening brace after \\o\0" |
133 | | "internal error: unknown newline setting\0" |
134 | | "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" |
135 | | "(?R (recursive pattern call) must be followed by a closing parenthesis\0" |
136 | | /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ |
137 | | "obsolete error (should not occur)\0" /* Was the above */ |
138 | | /* 60 */ |
139 | | "(*VERB) not recognized or malformed\0" |
140 | | "subpattern number is too big\0" |
141 | | "subpattern name expected\0" |
142 | | "internal error: parsed pattern overflow\0" |
143 | | "non-octal character in \\o{} (closing brace missing?)\0" |
144 | | /* 65 */ |
145 | | "different names for subpatterns of the same number are not allowed\0" |
146 | | "(*MARK) must have an argument\0" |
147 | | "non-hex character in \\x{} (closing brace missing?)\0" |
148 | | #ifndef EBCDIC |
149 | | "\\c must be followed by a printable ASCII character\0" |
150 | | #else |
151 | | "\\c must be followed by a letter or one of [\\]^_?\0" |
152 | | #endif |
153 | | "\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
154 | | /* 70 */ |
155 | | "internal error: unknown meta code in check_lookbehinds()\0" |
156 | | "\\N is not supported in a class\0" |
157 | | "callout string is too long\0" |
158 | | "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" |
159 | | "using UTF is disabled by the application\0" |
160 | | /* 75 */ |
161 | | "using UCP is disabled by the application\0" |
162 | | "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" |
163 | | "character code point value in \\u.... sequence is too large\0" |
164 | | "digits missing in \\x{} or \\o{} or \\N{U+}\0" |
165 | | "syntax error or number too big in (?(VERSION condition\0" |
166 | | /* 80 */ |
167 | | "internal error: unknown opcode in auto_possessify()\0" |
168 | | "missing terminating delimiter for callout with string argument\0" |
169 | | "unrecognized string delimiter follows (?C\0" |
170 | | "using \\C is disabled by the application\0" |
171 | | "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" |
172 | | /* 85 */ |
173 | | "using \\C is disabled in this PCRE2 library\0" |
174 | | "regular expression is too complicated\0" |
175 | | "lookbehind assertion is too long\0" |
176 | | "pattern string is longer than the limit set by the application\0" |
177 | | "internal error: unknown code in parsed pattern\0" |
178 | | /* 90 */ |
179 | | "internal error: bad code value in parsed_skip()\0" |
180 | | "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" |
181 | | "invalid option bits with PCRE2_LITERAL\0" |
182 | | "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" |
183 | | "invalid hyphen in option setting\0" |
184 | | /* 95 */ |
185 | | "(*alpha_assertion) not recognized\0" |
186 | | "script runs require Unicode support, which this version of PCRE2 does not have\0" |
187 | | "too many capturing groups (maximum 65535)\0" |
188 | | "atomic assertion expected after (?( or (?(?C)\0" |
189 | | "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" |
190 | | /* 100 */ |
191 | | "branch too long in variable-length lookbehind assertion\0" |
192 | | "compiled pattern would be longer than the limit set by the application\0" |
193 | | ; |
194 | | |
195 | | /* Match-time and UTF error texts are in the same format. */ |
196 | | |
197 | | static const unsigned char match_error_texts[] = |
198 | | "no error\0" |
199 | | "no match\0" |
200 | | "partial match\0" |
201 | | "UTF-8 error: 1 byte missing at end\0" |
202 | | "UTF-8 error: 2 bytes missing at end\0" |
203 | | /* 5 */ |
204 | | "UTF-8 error: 3 bytes missing at end\0" |
205 | | "UTF-8 error: 4 bytes missing at end\0" |
206 | | "UTF-8 error: 5 bytes missing at end\0" |
207 | | "UTF-8 error: byte 2 top bits not 0x80\0" |
208 | | "UTF-8 error: byte 3 top bits not 0x80\0" |
209 | | /* 10 */ |
210 | | "UTF-8 error: byte 4 top bits not 0x80\0" |
211 | | "UTF-8 error: byte 5 top bits not 0x80\0" |
212 | | "UTF-8 error: byte 6 top bits not 0x80\0" |
213 | | "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" |
214 | | "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" |
215 | | /* 15 */ |
216 | | "UTF-8 error: code points greater than 0x10ffff are not defined\0" |
217 | | "UTF-8 error: code points 0xd800-0xdfff are not defined\0" |
218 | | "UTF-8 error: overlong 2-byte sequence\0" |
219 | | "UTF-8 error: overlong 3-byte sequence\0" |
220 | | "UTF-8 error: overlong 4-byte sequence\0" |
221 | | /* 20 */ |
222 | | "UTF-8 error: overlong 5-byte sequence\0" |
223 | | "UTF-8 error: overlong 6-byte sequence\0" |
224 | | "UTF-8 error: isolated byte with 0x80 bit set\0" |
225 | | "UTF-8 error: illegal byte (0xfe or 0xff)\0" |
226 | | "UTF-16 error: missing low surrogate at end\0" |
227 | | /* 25 */ |
228 | | "UTF-16 error: invalid low surrogate\0" |
229 | | "UTF-16 error: isolated low surrogate\0" |
230 | | "UTF-32 error: code points 0xd800-0xdfff are not defined\0" |
231 | | "UTF-32 error: code points greater than 0x10ffff are not defined\0" |
232 | | "bad data value\0" |
233 | | /* 30 */ |
234 | | "patterns do not all use the same character tables\0" |
235 | | "magic number missing\0" |
236 | | "pattern compiled in wrong mode: 8/16/32-bit error\0" |
237 | | "bad offset value\0" |
238 | | "bad option value\0" |
239 | | /* 35 */ |
240 | | "invalid replacement string\0" |
241 | | "bad offset into UTF string\0" |
242 | | "callout error code\0" /* Never returned by PCRE2 itself */ |
243 | | "invalid data in workspace for DFA restart\0" |
244 | | "too much recursion for DFA matching\0" |
245 | | /* 40 */ |
246 | | "backreference condition or recursion test is not supported for DFA matching\0" |
247 | | "function is not supported for DFA matching\0" |
248 | | "pattern contains an item that is not supported for DFA matching\0" |
249 | | "workspace size exceeded in DFA matching\0" |
250 | | "internal error - pattern overwritten?\0" |
251 | | /* 45 */ |
252 | | "bad JIT option\0" |
253 | | "JIT stack limit reached\0" |
254 | | "match limit exceeded\0" |
255 | | "no more memory\0" |
256 | | "unknown substring\0" |
257 | | /* 50 */ |
258 | | "non-unique substring name\0" |
259 | | "NULL argument passed with non-zero length\0" |
260 | | "nested recursion at the same subject position\0" |
261 | | "matching depth limit exceeded\0" |
262 | | "requested value is not available\0" |
263 | | /* 55 */ |
264 | | "requested value is not set\0" |
265 | | "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" |
266 | | "bad escape sequence in replacement string\0" |
267 | | "expected closing curly bracket in replacement string\0" |
268 | | "bad substitution in replacement string\0" |
269 | | /* 60 */ |
270 | | "match with end before start or start moved backwards is not supported\0" |
271 | | "too many replacements (more than INT_MAX)\0" |
272 | | "bad serialized data\0" |
273 | | "heap limit exceeded\0" |
274 | | "invalid syntax\0" |
275 | | /* 65 */ |
276 | | "internal error - duplicate substitution match\0" |
277 | | "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" |
278 | | "INTERNAL ERROR: invalid substring offset\0" |
279 | | ; |
280 | | |
281 | | |
282 | | /************************************************* |
283 | | * Return error message * |
284 | | *************************************************/ |
285 | | |
286 | | /* This function copies an error message into a buffer whose units are of an |
287 | | appropriate width. Error numbers are positive for compile-time errors, and |
288 | | negative for match-time errors (except for UTF errors), but the numbers are all |
289 | | distinct. |
290 | | |
291 | | Arguments: |
292 | | enumber error number |
293 | | buffer where to put the message (zero terminated) |
294 | | size size of the buffer in code units |
295 | | |
296 | | Returns: length of message if all is well |
297 | | negative on error |
298 | | */ |
299 | | |
300 | | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION |
301 | | pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) |
302 | 566 | { |
303 | 566 | const unsigned char *message; |
304 | 566 | PCRE2_SIZE i; |
305 | 566 | int n; |
306 | | |
307 | 566 | if (size == 0) return PCRE2_ERROR_NOMEMORY; |
308 | | |
309 | 566 | if (enumber >= COMPILE_ERROR_BASE) /* Compile error */ |
310 | 540 | { |
311 | 540 | message = compile_error_texts; |
312 | 540 | n = enumber - COMPILE_ERROR_BASE; |
313 | 540 | } |
314 | 26 | else if (enumber < 0) /* Match or UTF error */ |
315 | 26 | { |
316 | 26 | message = match_error_texts; |
317 | 26 | n = -enumber; |
318 | 26 | } |
319 | 0 | else /* Invalid error number */ |
320 | 0 | { |
321 | 0 | message = (unsigned char *)"\0"; /* Empty message list */ |
322 | 0 | n = 1; |
323 | 0 | } |
324 | | |
325 | 9.31k | for (; n > 0; n--) |
326 | 8.74k | { |
327 | 307k | while (*message++ != CHAR_NUL) {}; |
328 | 8.74k | if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA; |
329 | 8.74k | } |
330 | | |
331 | 21.4k | for (i = 0; *message != 0; i++) |
332 | 20.9k | { |
333 | 20.9k | if (i >= size - 1) |
334 | 0 | { |
335 | 0 | buffer[i] = 0; /* Terminate partial message */ |
336 | 0 | return PCRE2_ERROR_NOMEMORY; |
337 | 0 | } |
338 | 20.9k | buffer[i] = *message++; |
339 | 20.9k | } |
340 | | |
341 | 566 | buffer[i] = 0; |
342 | 566 | return (int)i; |
343 | 566 | } |
344 | | |
345 | | /* End of pcre2_error.c */ |