Line | Count | Source (jump to first uncovered line) |
1 | | /* GRegex -- regular expression API wrapper around PCRE. |
2 | | * |
3 | | * Copyright (C) 1999, 2000 Scott Wimer |
4 | | * Copyright (C) 2004, Matthias Clasen <mclasen@redhat.com> |
5 | | * Copyright (C) 2005 - 2007, Marco Barisione <marco@barisione.org> |
6 | | * |
7 | | * This library is free software; you can redistribute it and/or |
8 | | * modify it under the terms of the GNU Lesser General Public |
9 | | * License as published by the Free Software Foundation; either |
10 | | * version 2.1 of the License, or (at your option) any later version. |
11 | | * |
12 | | * This library is distributed in the hope that it will be useful, |
13 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
15 | | * Lesser General Public License for more details. |
16 | | * |
17 | | * You should have received a copy of the GNU Lesser General Public License |
18 | | * along with this library; if not, see <http://www.gnu.org/licenses/>. |
19 | | */ |
20 | | |
21 | | #include "config.h" |
22 | | |
23 | | #include <string.h> |
24 | | |
25 | | #ifdef USE_SYSTEM_PCRE |
26 | | #include <pcre.h> |
27 | | #else |
28 | | #include "pcre/pcre.h" |
29 | | #endif |
30 | | |
31 | | #include "gtypes.h" |
32 | | #include "gregex.h" |
33 | | #include "glibintl.h" |
34 | | #include "glist.h" |
35 | | #include "gmessages.h" |
36 | | #include "gstrfuncs.h" |
37 | | #include "gatomic.h" |
38 | | #include "gthread.h" |
39 | | |
40 | | /** |
41 | | * SECTION:gregex |
42 | | * @title: Perl-compatible regular expressions |
43 | | * @short_description: matches strings against regular expressions |
44 | | * @see_also: [Regular expression syntax][glib-regex-syntax] |
45 | | * |
46 | | * The g_regex_*() functions implement regular |
47 | | * expression pattern matching using syntax and semantics similar to |
48 | | * Perl regular expression. |
49 | | * |
50 | | * Some functions accept a @start_position argument, setting it differs |
51 | | * from just passing over a shortened string and setting #G_REGEX_MATCH_NOTBOL |
52 | | * in the case of a pattern that begins with any kind of lookbehind assertion. |
53 | | * For example, consider the pattern "\Biss\B" which finds occurrences of "iss" |
54 | | * in the middle of words. ("\B" matches only if the current position in the |
55 | | * subject is not a word boundary.) When applied to the string "Mississipi" |
56 | | * from the fourth byte, namely "issipi", it does not match, because "\B" is |
57 | | * always false at the start of the subject, which is deemed to be a word |
58 | | * boundary. However, if the entire string is passed , but with |
59 | | * @start_position set to 4, it finds the second occurrence of "iss" because |
60 | | * it is able to look behind the starting point to discover that it is |
61 | | * preceded by a letter. |
62 | | * |
63 | | * Note that, unless you set the #G_REGEX_RAW flag, all the strings passed |
64 | | * to these functions must be encoded in UTF-8. The lengths and the positions |
65 | | * inside the strings are in bytes and not in characters, so, for instance, |
66 | | * "\xc3\xa0" (i.e. "à") is two bytes long but it is treated as a |
67 | | * single character. If you set #G_REGEX_RAW the strings can be non-valid |
68 | | * UTF-8 strings and a byte is treated as a character, so "\xc3\xa0" is two |
69 | | * bytes and two characters long. |
70 | | * |
71 | | * When matching a pattern, "\n" matches only against a "\n" character in |
72 | | * the string, and "\r" matches only a "\r" character. To match any newline |
73 | | * sequence use "\R". This particular group matches either the two-character |
74 | | * sequence CR + LF ("\r\n"), or one of the single characters LF (linefeed, |
75 | | * U+000A, "\n"), VT vertical tab, U+000B, "\v"), FF (formfeed, U+000C, "\f"), |
76 | | * CR (carriage return, U+000D, "\r"), NEL (next line, U+0085), LS (line |
77 | | * separator, U+2028), or PS (paragraph separator, U+2029). |
78 | | * |
79 | | * The behaviour of the dot, circumflex, and dollar metacharacters are |
80 | | * affected by newline characters, the default is to recognize any newline |
81 | | * character (the same characters recognized by "\R"). This can be changed |
82 | | * with #G_REGEX_NEWLINE_CR, #G_REGEX_NEWLINE_LF and #G_REGEX_NEWLINE_CRLF |
83 | | * compile options, and with #G_REGEX_MATCH_NEWLINE_ANY, |
84 | | * #G_REGEX_MATCH_NEWLINE_CR, #G_REGEX_MATCH_NEWLINE_LF and |
85 | | * #G_REGEX_MATCH_NEWLINE_CRLF match options. These settings are also |
86 | | * relevant when compiling a pattern if #G_REGEX_EXTENDED is set, and an |
87 | | * unescaped "#" outside a character class is encountered. This indicates |
88 | | * a comment that lasts until after the next newline. |
89 | | * |
90 | | * When setting the %G_REGEX_JAVASCRIPT_COMPAT flag, pattern syntax and pattern |
91 | | * matching is changed to be compatible with the way that regular expressions |
92 | | * work in JavaScript. More precisely, a lonely ']' character in the pattern |
93 | | * is a syntax error; the '\x' escape only allows 0 to 2 hexadecimal digits, and |
94 | | * you must use the '\u' escape sequence with 4 hex digits to specify a unicode |
95 | | * codepoint instead of '\x' or 'x{....}'. If '\x' or '\u' are not followed by |
96 | | * the specified number of hex digits, they match 'x' and 'u' literally; also |
97 | | * '\U' always matches 'U' instead of being an error in the pattern. Finally, |
98 | | * pattern matching is modified so that back references to an unset subpattern |
99 | | * group produces a match with the empty string instead of an error. See |
100 | | * pcreapi(3) for more information. |
101 | | * |
102 | | * Creating and manipulating the same #GRegex structure from different |
103 | | * threads is not a problem as #GRegex does not modify its internal |
104 | | * state between creation and destruction, on the other hand #GMatchInfo |
105 | | * is not threadsafe. |
106 | | * |
107 | | * The regular expressions low-level functionalities are obtained through |
108 | | * the excellent |
109 | | * [PCRE](http://www.pcre.org/) |
110 | | * library written by Philip Hazel. |
111 | | */ |
112 | | |
113 | | /* Mask of all the possible values for GRegexCompileFlags. */ |
114 | 0 | #define G_REGEX_COMPILE_MASK (G_REGEX_CASELESS | \ |
115 | 0 | G_REGEX_MULTILINE | \ |
116 | 0 | G_REGEX_DOTALL | \ |
117 | 0 | G_REGEX_EXTENDED | \ |
118 | 0 | G_REGEX_ANCHORED | \ |
119 | 0 | G_REGEX_DOLLAR_ENDONLY | \ |
120 | 0 | G_REGEX_UNGREEDY | \ |
121 | 0 | G_REGEX_RAW | \ |
122 | 0 | G_REGEX_NO_AUTO_CAPTURE | \ |
123 | 0 | G_REGEX_OPTIMIZE | \ |
124 | 0 | G_REGEX_FIRSTLINE | \ |
125 | 0 | G_REGEX_DUPNAMES | \ |
126 | 0 | G_REGEX_NEWLINE_CR | \ |
127 | 0 | G_REGEX_NEWLINE_LF | \ |
128 | 0 | G_REGEX_NEWLINE_CRLF | \ |
129 | 0 | G_REGEX_NEWLINE_ANYCRLF | \ |
130 | 0 | G_REGEX_BSR_ANYCRLF | \ |
131 | 0 | G_REGEX_JAVASCRIPT_COMPAT) |
132 | | |
133 | | /* Mask of all GRegexCompileFlags values that are (not) passed trough to PCRE */ |
134 | 0 | #define G_REGEX_COMPILE_PCRE_MASK (G_REGEX_COMPILE_MASK & ~G_REGEX_COMPILE_NONPCRE_MASK) |
135 | 0 | #define G_REGEX_COMPILE_NONPCRE_MASK (G_REGEX_RAW | \ |
136 | 0 | G_REGEX_OPTIMIZE) |
137 | | |
138 | | /* Mask of all the possible values for GRegexMatchFlags. */ |
139 | 0 | #define G_REGEX_MATCH_MASK (G_REGEX_MATCH_ANCHORED | \ |
140 | 0 | G_REGEX_MATCH_NOTBOL | \ |
141 | 0 | G_REGEX_MATCH_NOTEOL | \ |
142 | 0 | G_REGEX_MATCH_NOTEMPTY | \ |
143 | 0 | G_REGEX_MATCH_PARTIAL | \ |
144 | 0 | G_REGEX_MATCH_NEWLINE_CR | \ |
145 | 0 | G_REGEX_MATCH_NEWLINE_LF | \ |
146 | 0 | G_REGEX_MATCH_NEWLINE_CRLF | \ |
147 | 0 | G_REGEX_MATCH_NEWLINE_ANY | \ |
148 | 0 | G_REGEX_MATCH_NEWLINE_ANYCRLF | \ |
149 | 0 | G_REGEX_MATCH_BSR_ANYCRLF | \ |
150 | 0 | G_REGEX_MATCH_BSR_ANY | \ |
151 | 0 | G_REGEX_MATCH_PARTIAL_SOFT | \ |
152 | 0 | G_REGEX_MATCH_PARTIAL_HARD | \ |
153 | 0 | G_REGEX_MATCH_NOTEMPTY_ATSTART) |
154 | | |
155 | | /* we rely on these flags having the same values */ |
156 | | G_STATIC_ASSERT (G_REGEX_CASELESS == PCRE_CASELESS); |
157 | | G_STATIC_ASSERT (G_REGEX_MULTILINE == PCRE_MULTILINE); |
158 | | G_STATIC_ASSERT (G_REGEX_DOTALL == PCRE_DOTALL); |
159 | | G_STATIC_ASSERT (G_REGEX_EXTENDED == PCRE_EXTENDED); |
160 | | G_STATIC_ASSERT (G_REGEX_ANCHORED == PCRE_ANCHORED); |
161 | | G_STATIC_ASSERT (G_REGEX_DOLLAR_ENDONLY == PCRE_DOLLAR_ENDONLY); |
162 | | G_STATIC_ASSERT (G_REGEX_UNGREEDY == PCRE_UNGREEDY); |
163 | | G_STATIC_ASSERT (G_REGEX_NO_AUTO_CAPTURE == PCRE_NO_AUTO_CAPTURE); |
164 | | G_STATIC_ASSERT (G_REGEX_FIRSTLINE == PCRE_FIRSTLINE); |
165 | | G_STATIC_ASSERT (G_REGEX_DUPNAMES == PCRE_DUPNAMES); |
166 | | G_STATIC_ASSERT (G_REGEX_NEWLINE_CR == PCRE_NEWLINE_CR); |
167 | | G_STATIC_ASSERT (G_REGEX_NEWLINE_LF == PCRE_NEWLINE_LF); |
168 | | G_STATIC_ASSERT (G_REGEX_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); |
169 | | G_STATIC_ASSERT (G_REGEX_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); |
170 | | G_STATIC_ASSERT (G_REGEX_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); |
171 | | G_STATIC_ASSERT (G_REGEX_JAVASCRIPT_COMPAT == PCRE_JAVASCRIPT_COMPAT); |
172 | | |
173 | | G_STATIC_ASSERT (G_REGEX_MATCH_ANCHORED == PCRE_ANCHORED); |
174 | | G_STATIC_ASSERT (G_REGEX_MATCH_NOTBOL == PCRE_NOTBOL); |
175 | | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEOL == PCRE_NOTEOL); |
176 | | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY == PCRE_NOTEMPTY); |
177 | | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL == PCRE_PARTIAL); |
178 | | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CR == PCRE_NEWLINE_CR); |
179 | | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_LF == PCRE_NEWLINE_LF); |
180 | | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_CRLF == PCRE_NEWLINE_CRLF); |
181 | | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANY == PCRE_NEWLINE_ANY); |
182 | | G_STATIC_ASSERT (G_REGEX_MATCH_NEWLINE_ANYCRLF == PCRE_NEWLINE_ANYCRLF); |
183 | | G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANYCRLF == PCRE_BSR_ANYCRLF); |
184 | | G_STATIC_ASSERT (G_REGEX_MATCH_BSR_ANY == PCRE_BSR_UNICODE); |
185 | | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_SOFT == PCRE_PARTIAL_SOFT); |
186 | | G_STATIC_ASSERT (G_REGEX_MATCH_PARTIAL_HARD == PCRE_PARTIAL_HARD); |
187 | | G_STATIC_ASSERT (G_REGEX_MATCH_NOTEMPTY_ATSTART == PCRE_NOTEMPTY_ATSTART); |
188 | | |
189 | | /* These PCRE flags are unused or not exposed publicly in GRegexFlags, so |
190 | | * it should be ok to reuse them for different things. |
191 | | */ |
192 | | G_STATIC_ASSERT (G_REGEX_OPTIMIZE == PCRE_NO_UTF8_CHECK); |
193 | | G_STATIC_ASSERT (G_REGEX_RAW == PCRE_UTF8); |
194 | | |
195 | | /* if the string is in UTF-8 use g_utf8_ functions, else use |
196 | | * use just +/- 1. */ |
197 | 0 | #define NEXT_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ |
198 | 0 | ((s) + 1) : \ |
199 | 0 | g_utf8_next_char (s)) |
200 | 0 | #define PREV_CHAR(re, s) (((re)->compile_opts & G_REGEX_RAW) ? \ |
201 | 0 | ((s) - 1) : \ |
202 | 0 | g_utf8_prev_char (s)) |
203 | | |
204 | | struct _GMatchInfo |
205 | | { |
206 | | gint ref_count; /* the ref count (atomic) */ |
207 | | GRegex *regex; /* the regex */ |
208 | | GRegexMatchFlags match_opts; /* options used at match time on the regex */ |
209 | | gint matches; /* number of matching sub patterns */ |
210 | | gint pos; /* position in the string where last match left off */ |
211 | | gint n_offsets; /* number of offsets */ |
212 | | gint *offsets; /* array of offsets paired 0,1 ; 2,3 ; 3,4 etc */ |
213 | | gint *workspace; /* workspace for pcre_dfa_exec() */ |
214 | | gint n_workspace; /* number of workspace elements */ |
215 | | const gchar *string; /* string passed to the match function */ |
216 | | gssize string_len; /* length of string, in bytes */ |
217 | | }; |
218 | | |
219 | | struct _GRegex |
220 | | { |
221 | | gint ref_count; /* the ref count for the immutable part (atomic) */ |
222 | | gchar *pattern; /* the pattern */ |
223 | | pcre *pcre_re; /* compiled form of the pattern */ |
224 | | GRegexCompileFlags compile_opts; /* options used at compile time on the pattern */ |
225 | | GRegexMatchFlags match_opts; /* options used at match time on the regex */ |
226 | | pcre_extra *extra; /* data stored when G_REGEX_OPTIMIZE is used */ |
227 | | }; |
228 | | |
229 | | /* TRUE if ret is an error code, FALSE otherwise. */ |
230 | 0 | #define IS_PCRE_ERROR(ret) ((ret) < PCRE_ERROR_NOMATCH && (ret) != PCRE_ERROR_PARTIAL) |
231 | | |
232 | | typedef struct _InterpolationData InterpolationData; |
233 | | static gboolean interpolation_list_needs_match (GList *list); |
234 | | static gboolean interpolate_replacement (const GMatchInfo *match_info, |
235 | | GString *result, |
236 | | gpointer data); |
237 | | static GList *split_replacement (const gchar *replacement, |
238 | | GError **error); |
239 | | static void free_interpolation_data (InterpolationData *data); |
240 | | |
241 | | |
242 | | static const gchar * |
243 | | match_error (gint errcode) |
244 | 0 | { |
245 | 0 | switch (errcode) |
246 | 0 | { |
247 | 0 | case PCRE_ERROR_NOMATCH: |
248 | | /* not an error */ |
249 | 0 | break; |
250 | 0 | case PCRE_ERROR_NULL: |
251 | | /* NULL argument, this should not happen in GRegex */ |
252 | 0 | g_warning ("A NULL argument was passed to PCRE"); |
253 | 0 | break; |
254 | 0 | case PCRE_ERROR_BADOPTION: |
255 | 0 | return "bad options"; |
256 | 0 | case PCRE_ERROR_BADMAGIC: |
257 | 0 | return _("corrupted object"); |
258 | 0 | case PCRE_ERROR_UNKNOWN_OPCODE: |
259 | 0 | return N_("internal error or corrupted object"); |
260 | 0 | case PCRE_ERROR_NOMEMORY: |
261 | 0 | return _("out of memory"); |
262 | 0 | case PCRE_ERROR_NOSUBSTRING: |
263 | | /* not used by pcre_exec() */ |
264 | 0 | break; |
265 | 0 | case PCRE_ERROR_MATCHLIMIT: |
266 | 0 | return _("backtracking limit reached"); |
267 | 0 | case PCRE_ERROR_CALLOUT: |
268 | | /* callouts are not implemented */ |
269 | 0 | break; |
270 | 0 | case PCRE_ERROR_BADUTF8: |
271 | 0 | case PCRE_ERROR_BADUTF8_OFFSET: |
272 | | /* we do not check if strings are valid */ |
273 | 0 | break; |
274 | 0 | case PCRE_ERROR_PARTIAL: |
275 | | /* not an error */ |
276 | 0 | break; |
277 | 0 | case PCRE_ERROR_BADPARTIAL: |
278 | 0 | return _("the pattern contains items not supported for partial matching"); |
279 | 0 | case PCRE_ERROR_INTERNAL: |
280 | 0 | return _("internal error"); |
281 | 0 | case PCRE_ERROR_BADCOUNT: |
282 | | /* negative ovecsize, this should not happen in GRegex */ |
283 | 0 | g_warning ("A negative ovecsize was passed to PCRE"); |
284 | 0 | break; |
285 | 0 | case PCRE_ERROR_DFA_UITEM: |
286 | 0 | return _("the pattern contains items not supported for partial matching"); |
287 | 0 | case PCRE_ERROR_DFA_UCOND: |
288 | 0 | return _("back references as conditions are not supported for partial matching"); |
289 | 0 | case PCRE_ERROR_DFA_UMLIMIT: |
290 | | /* the match_field field is not used in GRegex */ |
291 | 0 | break; |
292 | 0 | case PCRE_ERROR_DFA_WSSIZE: |
293 | | /* handled expanding the workspace */ |
294 | 0 | break; |
295 | 0 | case PCRE_ERROR_DFA_RECURSE: |
296 | 0 | case PCRE_ERROR_RECURSIONLIMIT: |
297 | 0 | return _("recursion limit reached"); |
298 | 0 | case PCRE_ERROR_BADNEWLINE: |
299 | 0 | return _("invalid combination of newline flags"); |
300 | 0 | case PCRE_ERROR_BADOFFSET: |
301 | 0 | return _("bad offset"); |
302 | 0 | case PCRE_ERROR_SHORTUTF8: |
303 | 0 | return _("short utf8"); |
304 | 0 | case PCRE_ERROR_RECURSELOOP: |
305 | 0 | return _("recursion loop"); |
306 | 0 | default: |
307 | 0 | break; |
308 | 0 | } |
309 | 0 | return _("unknown error"); |
310 | 0 | } |
311 | | |
312 | | static void |
313 | | translate_compile_error (gint *errcode, const gchar **errmsg) |
314 | 0 | { |
315 | | /* Compile errors are created adding 100 to the error code returned |
316 | | * by PCRE. |
317 | | * If errcode is known we put the translatable error message in |
318 | | * erromsg. If errcode is unknown we put the generic |
319 | | * G_REGEX_ERROR_COMPILE error code in errcode and keep the |
320 | | * untranslated error message returned by PCRE. |
321 | | * Note that there can be more PCRE errors with the same GRegexError |
322 | | * and that some PCRE errors are useless for us. |
323 | | */ |
324 | 0 | *errcode += 100; |
325 | |
|
326 | 0 | switch (*errcode) |
327 | 0 | { |
328 | 0 | case G_REGEX_ERROR_STRAY_BACKSLASH: |
329 | 0 | *errmsg = _("\\ at end of pattern"); |
330 | 0 | break; |
331 | 0 | case G_REGEX_ERROR_MISSING_CONTROL_CHAR: |
332 | 0 | *errmsg = _("\\c at end of pattern"); |
333 | 0 | break; |
334 | 0 | case G_REGEX_ERROR_UNRECOGNIZED_ESCAPE: |
335 | 0 | *errmsg = _("unrecognized character following \\"); |
336 | 0 | break; |
337 | 0 | case G_REGEX_ERROR_QUANTIFIERS_OUT_OF_ORDER: |
338 | 0 | *errmsg = _("numbers out of order in {} quantifier"); |
339 | 0 | break; |
340 | 0 | case G_REGEX_ERROR_QUANTIFIER_TOO_BIG: |
341 | 0 | *errmsg = _("number too big in {} quantifier"); |
342 | 0 | break; |
343 | 0 | case G_REGEX_ERROR_UNTERMINATED_CHARACTER_CLASS: |
344 | 0 | *errmsg = _("missing terminating ] for character class"); |
345 | 0 | break; |
346 | 0 | case G_REGEX_ERROR_INVALID_ESCAPE_IN_CHARACTER_CLASS: |
347 | 0 | *errmsg = _("invalid escape sequence in character class"); |
348 | 0 | break; |
349 | 0 | case G_REGEX_ERROR_RANGE_OUT_OF_ORDER: |
350 | 0 | *errmsg = _("range out of order in character class"); |
351 | 0 | break; |
352 | 0 | case G_REGEX_ERROR_NOTHING_TO_REPEAT: |
353 | 0 | *errmsg = _("nothing to repeat"); |
354 | 0 | break; |
355 | 0 | case 111: /* internal error: unexpected repeat */ |
356 | 0 | *errcode = G_REGEX_ERROR_INTERNAL; |
357 | 0 | *errmsg = _("unexpected repeat"); |
358 | 0 | break; |
359 | 0 | case G_REGEX_ERROR_UNRECOGNIZED_CHARACTER: |
360 | 0 | *errmsg = _("unrecognized character after (? or (?-"); |
361 | 0 | break; |
362 | 0 | case G_REGEX_ERROR_POSIX_NAMED_CLASS_OUTSIDE_CLASS: |
363 | 0 | *errmsg = _("POSIX named classes are supported only within a class"); |
364 | 0 | break; |
365 | 0 | case G_REGEX_ERROR_UNMATCHED_PARENTHESIS: |
366 | 0 | *errmsg = _("missing terminating )"); |
367 | 0 | break; |
368 | 0 | case G_REGEX_ERROR_INEXISTENT_SUBPATTERN_REFERENCE: |
369 | 0 | *errmsg = _("reference to non-existent subpattern"); |
370 | 0 | break; |
371 | 0 | case G_REGEX_ERROR_UNTERMINATED_COMMENT: |
372 | 0 | *errmsg = _("missing ) after comment"); |
373 | 0 | break; |
374 | 0 | case G_REGEX_ERROR_EXPRESSION_TOO_LARGE: |
375 | 0 | *errmsg = _("regular expression is too large"); |
376 | 0 | break; |
377 | 0 | case G_REGEX_ERROR_MEMORY_ERROR: |
378 | 0 | *errmsg = _("failed to get memory"); |
379 | 0 | break; |
380 | 0 | case 122: /* unmatched parentheses */ |
381 | 0 | *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; |
382 | 0 | *errmsg = _(") without opening ("); |
383 | 0 | break; |
384 | 0 | case 123: /* internal error: code overflow */ |
385 | 0 | *errcode = G_REGEX_ERROR_INTERNAL; |
386 | 0 | *errmsg = _("code overflow"); |
387 | 0 | break; |
388 | 0 | case 124: /* "unrecognized character after (?<\0 */ |
389 | 0 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; |
390 | 0 | *errmsg = _("unrecognized character after (?<"); |
391 | 0 | break; |
392 | 0 | case G_REGEX_ERROR_VARIABLE_LENGTH_LOOKBEHIND: |
393 | 0 | *errmsg = _("lookbehind assertion is not fixed length"); |
394 | 0 | break; |
395 | 0 | case G_REGEX_ERROR_MALFORMED_CONDITION: |
396 | 0 | *errmsg = _("malformed number or name after (?("); |
397 | 0 | break; |
398 | 0 | case G_REGEX_ERROR_TOO_MANY_CONDITIONAL_BRANCHES: |
399 | 0 | *errmsg = _("conditional group contains more than two branches"); |
400 | 0 | break; |
401 | 0 | case G_REGEX_ERROR_ASSERTION_EXPECTED: |
402 | 0 | *errmsg = _("assertion expected after (?("); |
403 | 0 | break; |
404 | 0 | case 129: |
405 | 0 | *errcode = G_REGEX_ERROR_UNMATCHED_PARENTHESIS; |
406 | | /* translators: '(?R' and '(?[+-]digits' are both meant as (groups of) |
407 | | * sequences here, '(?-54' would be an example for the second group. |
408 | | */ |
409 | 0 | *errmsg = _("(?R or (?[+-]digits must be followed by )"); |
410 | 0 | break; |
411 | 0 | case G_REGEX_ERROR_UNKNOWN_POSIX_CLASS_NAME: |
412 | 0 | *errmsg = _("unknown POSIX class name"); |
413 | 0 | break; |
414 | 0 | case G_REGEX_ERROR_POSIX_COLLATING_ELEMENTS_NOT_SUPPORTED: |
415 | 0 | *errmsg = _("POSIX collating elements are not supported"); |
416 | 0 | break; |
417 | 0 | case G_REGEX_ERROR_HEX_CODE_TOO_LARGE: |
418 | 0 | *errmsg = _("character value in \\x{...} sequence is too large"); |
419 | 0 | break; |
420 | 0 | case G_REGEX_ERROR_INVALID_CONDITION: |
421 | 0 | *errmsg = _("invalid condition (?(0)"); |
422 | 0 | break; |
423 | 0 | case G_REGEX_ERROR_SINGLE_BYTE_MATCH_IN_LOOKBEHIND: |
424 | 0 | *errmsg = _("\\C not allowed in lookbehind assertion"); |
425 | 0 | break; |
426 | 0 | case 137: /* PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0 */ |
427 | | /* A number of Perl escapes are not handled by PCRE. |
428 | | * Therefore it explicitly raises ERR37. |
429 | | */ |
430 | 0 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_ESCAPE; |
431 | 0 | *errmsg = _("escapes \\L, \\l, \\N{name}, \\U, and \\u are not supported"); |
432 | 0 | break; |
433 | 0 | case G_REGEX_ERROR_INFINITE_LOOP: |
434 | 0 | *errmsg = _("recursive call could loop indefinitely"); |
435 | 0 | break; |
436 | 0 | case 141: /* unrecognized character after (?P\0 */ |
437 | 0 | *errcode = G_REGEX_ERROR_UNRECOGNIZED_CHARACTER; |
438 | 0 | *errmsg = _("unrecognized character after (?P"); |
439 | 0 | break; |
440 | 0 | case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME_TERMINATOR: |
441 | 0 | *errmsg = _("missing terminator in subpattern name"); |
442 | 0 | break; |
443 | 0 | case G_REGEX_ERROR_DUPLICATE_SUBPATTERN_NAME: |
444 | 0 | *errmsg = _("two named subpatterns have the same name"); |
445 | 0 | break; |
446 | 0 | case G_REGEX_ERROR_MALFORMED_PROPERTY: |
447 | 0 | *errmsg = _("malformed \\P or \\p sequence"); |
448 | 0 | break; |
449 | 0 | case G_REGEX_ERROR_UNKNOWN_PROPERTY: |
450 | 0 | *errmsg = _("unknown property name after \\P or \\p"); |
451 | 0 | break; |
452 | 0 | case G_REGEX_ERROR_SUBPATTERN_NAME_TOO_LONG: |
453 | 0 | *errmsg = _("subpattern name is too long (maximum 32 characters)"); |
454 | 0 | break; |
455 | 0 | case G_REGEX_ERROR_TOO_MANY_SUBPATTERNS: |
456 | 0 | *errmsg = _("too many named subpatterns (maximum 10,000)"); |
457 | 0 | break; |
458 | 0 | case G_REGEX_ERROR_INVALID_OCTAL_VALUE: |
459 | 0 | *errmsg = _("octal value is greater than \\377"); |
460 | 0 | break; |
461 | 0 | case 152: /* internal error: overran compiling workspace */ |
462 | 0 | *errcode = G_REGEX_ERROR_INTERNAL; |
463 | 0 | *errmsg = _("overran compiling workspace"); |
464 | 0 | break; |
465 | 0 | case 153: /* internal error: previously-checked referenced subpattern not found */ |
466 | 0 | *errcode = G_REGEX_ERROR_INTERNAL; |
467 | 0 | *errmsg = _("previously-checked referenced subpattern not found"); |
468 | 0 | break; |
469 | 0 | case G_REGEX_ERROR_TOO_MANY_BRANCHES_IN_DEFINE: |
470 | 0 | *errmsg = _("DEFINE group contains more than one branch"); |
471 | 0 | break; |
472 | 0 | case G_REGEX_ERROR_INCONSISTENT_NEWLINE_OPTIONS: |
473 | 0 | *errmsg = _("inconsistent NEWLINE options"); |
474 | 0 | break; |
475 | 0 | case G_REGEX_ERROR_MISSING_BACK_REFERENCE: |
476 | 0 | *errmsg = _("\\g is not followed by a braced, angle-bracketed, or quoted name or " |
477 | 0 | "number, or by a plain number"); |
478 | 0 | break; |
479 | 0 | case G_REGEX_ERROR_INVALID_RELATIVE_REFERENCE: |
480 | 0 | *errmsg = _("a numbered reference must not be zero"); |
481 | 0 | break; |
482 | 0 | case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_FORBIDDEN: |
483 | 0 | *errmsg = _("an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"); |
484 | 0 | break; |
485 | 0 | case G_REGEX_ERROR_UNKNOWN_BACKTRACKING_CONTROL_VERB: |
486 | 0 | *errmsg = _("(*VERB) not recognized"); |
487 | 0 | break; |
488 | 0 | case G_REGEX_ERROR_NUMBER_TOO_BIG: |
489 | 0 | *errmsg = _("number is too big"); |
490 | 0 | break; |
491 | 0 | case G_REGEX_ERROR_MISSING_SUBPATTERN_NAME: |
492 | 0 | *errmsg = _("missing subpattern name after (?&"); |
493 | 0 | break; |
494 | 0 | case G_REGEX_ERROR_MISSING_DIGIT: |
495 | 0 | *errmsg = _("digit expected after (?+"); |
496 | 0 | break; |
497 | 0 | case G_REGEX_ERROR_INVALID_DATA_CHARACTER: |
498 | 0 | *errmsg = _("] is an invalid data character in JavaScript compatibility mode"); |
499 | 0 | break; |
500 | 0 | case G_REGEX_ERROR_EXTRA_SUBPATTERN_NAME: |
501 | 0 | *errmsg = _("different names for subpatterns of the same number are not allowed"); |
502 | 0 | break; |
503 | 0 | case G_REGEX_ERROR_BACKTRACKING_CONTROL_VERB_ARGUMENT_REQUIRED: |
504 | 0 | *errmsg = _("(*MARK) must have an argument"); |
505 | 0 | break; |
506 | 0 | case G_REGEX_ERROR_INVALID_CONTROL_CHAR: |
507 | 0 | *errmsg = _( "\\c must be followed by an ASCII character"); |
508 | 0 | break; |
509 | 0 | case G_REGEX_ERROR_MISSING_NAME: |
510 | 0 | *errmsg = _("\\k is not followed by a braced, angle-bracketed, or quoted name"); |
511 | 0 | break; |
512 | 0 | case G_REGEX_ERROR_NOT_SUPPORTED_IN_CLASS: |
513 | 0 | *errmsg = _("\\N is not supported in a class"); |
514 | 0 | break; |
515 | 0 | case G_REGEX_ERROR_TOO_MANY_FORWARD_REFERENCES: |
516 | 0 | *errmsg = _("too many forward references"); |
517 | 0 | break; |
518 | 0 | case G_REGEX_ERROR_NAME_TOO_LONG: |
519 | 0 | *errmsg = _("name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)"); |
520 | 0 | break; |
521 | 0 | case G_REGEX_ERROR_CHARACTER_VALUE_TOO_LARGE: |
522 | 0 | *errmsg = _("character value in \\u.... sequence is too large"); |
523 | 0 | break; |
524 | | |
525 | 0 | case 116: /* erroffset passed as NULL */ |
526 | | /* This should not happen as we never pass a NULL erroffset */ |
527 | 0 | g_warning ("erroffset passed as NULL"); |
528 | 0 | *errcode = G_REGEX_ERROR_COMPILE; |
529 | 0 | break; |
530 | 0 | case 117: /* unknown option bit(s) set */ |
531 | | /* This should not happen as we check options before passing them |
532 | | * to pcre_compile2() */ |
533 | 0 | g_warning ("unknown option bit(s) set"); |
534 | 0 | *errcode = G_REGEX_ERROR_COMPILE; |
535 | 0 | break; |
536 | 0 | case 132: /* this version of PCRE is compiled without UTF support */ |
537 | 0 | case 144: /* invalid UTF-8 string */ |
538 | 0 | case 145: /* support for \\P, \\p, and \\X has not been compiled */ |
539 | 0 | case 167: /* this version of PCRE is not compiled with Unicode property support */ |
540 | 0 | case 173: /* disallowed Unicode code point (>= 0xd800 && <= 0xdfff) */ |
541 | 0 | case 174: /* invalid UTF-16 string */ |
542 | | /* These errors should not happen as we are using an UTF-8 and UCP-enabled PCRE |
543 | | * and we do not check if strings are valid */ |
544 | 0 | case 170: /* internal error: unknown opcode in find_fixedlength() */ |
545 | 0 | *errcode = G_REGEX_ERROR_INTERNAL; |
546 | 0 | break; |
547 | | |
548 | 0 | default: |
549 | 0 | *errcode = G_REGEX_ERROR_COMPILE; |
550 | 0 | } |
551 | 0 | } |
552 | | |
553 | | /* GMatchInfo */ |
554 | | |
555 | | static GMatchInfo * |
556 | | match_info_new (const GRegex *regex, |
557 | | const gchar *string, |
558 | | gint string_len, |
559 | | gint start_position, |
560 | | gint match_options, |
561 | | gboolean is_dfa) |
562 | 0 | { |
563 | 0 | GMatchInfo *match_info; |
564 | |
|
565 | 0 | if (string_len < 0) |
566 | 0 | string_len = strlen (string); |
567 | |
|
568 | 0 | match_info = g_new0 (GMatchInfo, 1); |
569 | 0 | match_info->ref_count = 1; |
570 | 0 | match_info->regex = g_regex_ref ((GRegex *)regex); |
571 | 0 | match_info->string = string; |
572 | 0 | match_info->string_len = string_len; |
573 | 0 | match_info->matches = PCRE_ERROR_NOMATCH; |
574 | 0 | match_info->pos = start_position; |
575 | 0 | match_info->match_opts = match_options; |
576 | |
|
577 | 0 | if (is_dfa) |
578 | 0 | { |
579 | | /* These values should be enough for most cases, if they are not |
580 | | * enough g_regex_match_all_full() will expand them. */ |
581 | 0 | match_info->n_offsets = 24; |
582 | 0 | match_info->n_workspace = 100; |
583 | 0 | match_info->workspace = g_new (gint, match_info->n_workspace); |
584 | 0 | } |
585 | 0 | else |
586 | 0 | { |
587 | 0 | gint capture_count; |
588 | 0 | pcre_fullinfo (regex->pcre_re, regex->extra, |
589 | 0 | PCRE_INFO_CAPTURECOUNT, &capture_count); |
590 | 0 | match_info->n_offsets = (capture_count + 1) * 3; |
591 | 0 | } |
592 | |
|
593 | 0 | match_info->offsets = g_new0 (gint, match_info->n_offsets); |
594 | | /* Set an invalid position for the previous match. */ |
595 | 0 | match_info->offsets[0] = -1; |
596 | 0 | match_info->offsets[1] = -1; |
597 | |
|
598 | 0 | return match_info; |
599 | 0 | } |
600 | | |
601 | | /** |
602 | | * g_match_info_get_regex: |
603 | | * @match_info: a #GMatchInfo |
604 | | * |
605 | | * Returns #GRegex object used in @match_info. It belongs to Glib |
606 | | * and must not be freed. Use g_regex_ref() if you need to keep it |
607 | | * after you free @match_info object. |
608 | | * |
609 | | * Returns: #GRegex object used in @match_info |
610 | | * |
611 | | * Since: 2.14 |
612 | | */ |
613 | | GRegex * |
614 | | g_match_info_get_regex (const GMatchInfo *match_info) |
615 | 0 | { |
616 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
617 | 0 | return match_info->regex; |
618 | 0 | } |
619 | | |
620 | | /** |
621 | | * g_match_info_get_string: |
622 | | * @match_info: a #GMatchInfo |
623 | | * |
624 | | * Returns the string searched with @match_info. This is the |
625 | | * string passed to g_regex_match() or g_regex_replace() so |
626 | | * you may not free it before calling this function. |
627 | | * |
628 | | * Returns: the string searched with @match_info |
629 | | * |
630 | | * Since: 2.14 |
631 | | */ |
632 | | const gchar * |
633 | | g_match_info_get_string (const GMatchInfo *match_info) |
634 | 0 | { |
635 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
636 | 0 | return match_info->string; |
637 | 0 | } |
638 | | |
639 | | /** |
640 | | * g_match_info_ref: |
641 | | * @match_info: a #GMatchInfo |
642 | | * |
643 | | * Increases reference count of @match_info by 1. |
644 | | * |
645 | | * Returns: @match_info |
646 | | * |
647 | | * Since: 2.30 |
648 | | */ |
649 | | GMatchInfo * |
650 | | g_match_info_ref (GMatchInfo *match_info) |
651 | 0 | { |
652 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
653 | 0 | g_atomic_int_inc (&match_info->ref_count); |
654 | 0 | return match_info; |
655 | 0 | } |
656 | | |
657 | | /** |
658 | | * g_match_info_unref: |
659 | | * @match_info: a #GMatchInfo |
660 | | * |
661 | | * Decreases reference count of @match_info by 1. When reference count drops |
662 | | * to zero, it frees all the memory associated with the match_info structure. |
663 | | * |
664 | | * Since: 2.30 |
665 | | */ |
666 | | void |
667 | | g_match_info_unref (GMatchInfo *match_info) |
668 | 0 | { |
669 | 0 | if (g_atomic_int_dec_and_test (&match_info->ref_count)) |
670 | 0 | { |
671 | 0 | g_regex_unref (match_info->regex); |
672 | 0 | g_free (match_info->offsets); |
673 | 0 | g_free (match_info->workspace); |
674 | 0 | g_free (match_info); |
675 | 0 | } |
676 | 0 | } |
677 | | |
678 | | /** |
679 | | * g_match_info_free: |
680 | | * @match_info: (nullable): a #GMatchInfo, or %NULL |
681 | | * |
682 | | * If @match_info is not %NULL, calls g_match_info_unref(); otherwise does |
683 | | * nothing. |
684 | | * |
685 | | * Since: 2.14 |
686 | | */ |
687 | | void |
688 | | g_match_info_free (GMatchInfo *match_info) |
689 | 0 | { |
690 | 0 | if (match_info == NULL) |
691 | 0 | return; |
692 | | |
693 | 0 | g_match_info_unref (match_info); |
694 | 0 | } |
695 | | |
696 | | /** |
697 | | * g_match_info_next: |
698 | | * @match_info: a #GMatchInfo structure |
699 | | * @error: location to store the error occurring, or %NULL to ignore errors |
700 | | * |
701 | | * Scans for the next match using the same parameters of the previous |
702 | | * call to g_regex_match_full() or g_regex_match() that returned |
703 | | * @match_info. |
704 | | * |
705 | | * The match is done on the string passed to the match function, so you |
706 | | * cannot free it before calling this function. |
707 | | * |
708 | | * Returns: %TRUE is the string matched, %FALSE otherwise |
709 | | * |
710 | | * Since: 2.14 |
711 | | */ |
712 | | gboolean |
713 | | g_match_info_next (GMatchInfo *match_info, |
714 | | GError **error) |
715 | 0 | { |
716 | 0 | gint prev_match_start; |
717 | 0 | gint prev_match_end; |
718 | |
|
719 | 0 | g_return_val_if_fail (match_info != NULL, FALSE); |
720 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
721 | 0 | g_return_val_if_fail (match_info->pos >= 0, FALSE); |
722 | | |
723 | 0 | prev_match_start = match_info->offsets[0]; |
724 | 0 | prev_match_end = match_info->offsets[1]; |
725 | |
|
726 | 0 | if (match_info->pos > match_info->string_len) |
727 | 0 | { |
728 | | /* we have reached the end of the string */ |
729 | 0 | match_info->pos = -1; |
730 | 0 | match_info->matches = PCRE_ERROR_NOMATCH; |
731 | 0 | return FALSE; |
732 | 0 | } |
733 | | |
734 | 0 | match_info->matches = pcre_exec (match_info->regex->pcre_re, |
735 | 0 | match_info->regex->extra, |
736 | 0 | match_info->string, |
737 | 0 | match_info->string_len, |
738 | 0 | match_info->pos, |
739 | 0 | match_info->regex->match_opts | match_info->match_opts, |
740 | 0 | match_info->offsets, |
741 | 0 | match_info->n_offsets); |
742 | 0 | if (IS_PCRE_ERROR (match_info->matches)) |
743 | 0 | { |
744 | 0 | g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, |
745 | 0 | _("Error while matching regular expression %s: %s"), |
746 | 0 | match_info->regex->pattern, match_error (match_info->matches)); |
747 | 0 | return FALSE; |
748 | 0 | } |
749 | | |
750 | | /* avoid infinite loops if the pattern is an empty string or something |
751 | | * equivalent */ |
752 | 0 | if (match_info->pos == match_info->offsets[1]) |
753 | 0 | { |
754 | 0 | if (match_info->pos > match_info->string_len) |
755 | 0 | { |
756 | | /* we have reached the end of the string */ |
757 | 0 | match_info->pos = -1; |
758 | 0 | match_info->matches = PCRE_ERROR_NOMATCH; |
759 | 0 | return FALSE; |
760 | 0 | } |
761 | | |
762 | 0 | match_info->pos = NEXT_CHAR (match_info->regex, |
763 | 0 | &match_info->string[match_info->pos]) - |
764 | 0 | match_info->string; |
765 | 0 | } |
766 | 0 | else |
767 | 0 | { |
768 | 0 | match_info->pos = match_info->offsets[1]; |
769 | 0 | } |
770 | | |
771 | | /* it's possible to get two identical matches when we are matching |
772 | | * empty strings, for instance if the pattern is "(?=[A-Z0-9])" and |
773 | | * the string is "RegExTest" we have: |
774 | | * - search at position 0: match from 0 to 0 |
775 | | * - search at position 1: match from 3 to 3 |
776 | | * - search at position 3: match from 3 to 3 (duplicate) |
777 | | * - search at position 4: match from 5 to 5 |
778 | | * - search at position 5: match from 5 to 5 (duplicate) |
779 | | * - search at position 6: no match -> stop |
780 | | * so we have to ignore the duplicates. |
781 | | * see bug #515944: http://bugzilla.gnome.org/show_bug.cgi?id=515944 */ |
782 | 0 | if (match_info->matches >= 0 && |
783 | 0 | prev_match_start == match_info->offsets[0] && |
784 | 0 | prev_match_end == match_info->offsets[1]) |
785 | 0 | { |
786 | | /* ignore this match and search the next one */ |
787 | 0 | return g_match_info_next (match_info, error); |
788 | 0 | } |
789 | | |
790 | 0 | return match_info->matches >= 0; |
791 | 0 | } |
792 | | |
793 | | /** |
794 | | * g_match_info_matches: |
795 | | * @match_info: a #GMatchInfo structure |
796 | | * |
797 | | * Returns whether the previous match operation succeeded. |
798 | | * |
799 | | * Returns: %TRUE if the previous match operation succeeded, |
800 | | * %FALSE otherwise |
801 | | * |
802 | | * Since: 2.14 |
803 | | */ |
804 | | gboolean |
805 | | g_match_info_matches (const GMatchInfo *match_info) |
806 | 0 | { |
807 | 0 | g_return_val_if_fail (match_info != NULL, FALSE); |
808 | | |
809 | 0 | return match_info->matches >= 0; |
810 | 0 | } |
811 | | |
812 | | /** |
813 | | * g_match_info_get_match_count: |
814 | | * @match_info: a #GMatchInfo structure |
815 | | * |
816 | | * Retrieves the number of matched substrings (including substring 0, |
817 | | * that is the whole matched text), so 1 is returned if the pattern |
818 | | * has no substrings in it and 0 is returned if the match failed. |
819 | | * |
820 | | * If the last match was obtained using the DFA algorithm, that is |
821 | | * using g_regex_match_all() or g_regex_match_all_full(), the retrieved |
822 | | * count is not that of the number of capturing parentheses but that of |
823 | | * the number of matched substrings. |
824 | | * |
825 | | * Returns: Number of matched substrings, or -1 if an error occurred |
826 | | * |
827 | | * Since: 2.14 |
828 | | */ |
829 | | gint |
830 | | g_match_info_get_match_count (const GMatchInfo *match_info) |
831 | 0 | { |
832 | 0 | g_return_val_if_fail (match_info, -1); |
833 | | |
834 | 0 | if (match_info->matches == PCRE_ERROR_NOMATCH) |
835 | | /* no match */ |
836 | 0 | return 0; |
837 | 0 | else if (match_info->matches < PCRE_ERROR_NOMATCH) |
838 | | /* error */ |
839 | 0 | return -1; |
840 | 0 | else |
841 | | /* match */ |
842 | 0 | return match_info->matches; |
843 | 0 | } |
844 | | |
845 | | /** |
846 | | * g_match_info_is_partial_match: |
847 | | * @match_info: a #GMatchInfo structure |
848 | | * |
849 | | * Usually if the string passed to g_regex_match*() matches as far as |
850 | | * it goes, but is too short to match the entire pattern, %FALSE is |
851 | | * returned. There are circumstances where it might be helpful to |
852 | | * distinguish this case from other cases in which there is no match. |
853 | | * |
854 | | * Consider, for example, an application where a human is required to |
855 | | * type in data for a field with specific formatting requirements. An |
856 | | * example might be a date in the form ddmmmyy, defined by the pattern |
857 | | * "^\d?\d(jan|feb|mar|apr|may|jun|jul|aug|sep|oct|nov|dec)\d\d$". |
858 | | * If the application sees the user’s keystrokes one by one, and can |
859 | | * check that what has been typed so far is potentially valid, it is |
860 | | * able to raise an error as soon as a mistake is made. |
861 | | * |
862 | | * GRegex supports the concept of partial matching by means of the |
863 | | * #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD flags. |
864 | | * When they are used, the return code for |
865 | | * g_regex_match() or g_regex_match_full() is, as usual, %TRUE |
866 | | * for a complete match, %FALSE otherwise. But, when these functions |
867 | | * return %FALSE, you can check if the match was partial calling |
868 | | * g_match_info_is_partial_match(). |
869 | | * |
870 | | * The difference between #G_REGEX_MATCH_PARTIAL_SOFT and |
871 | | * #G_REGEX_MATCH_PARTIAL_HARD is that when a partial match is encountered |
872 | | * with #G_REGEX_MATCH_PARTIAL_SOFT, matching continues to search for a |
873 | | * possible complete match, while with #G_REGEX_MATCH_PARTIAL_HARD matching |
874 | | * stops at the partial match. |
875 | | * When both #G_REGEX_MATCH_PARTIAL_SOFT and #G_REGEX_MATCH_PARTIAL_HARD |
876 | | * are set, the latter takes precedence. |
877 | | * |
878 | | * There were formerly some restrictions on the pattern for partial matching. |
879 | | * The restrictions no longer apply. |
880 | | * |
881 | | * See pcrepartial(3) for more information on partial matching. |
882 | | * |
883 | | * Returns: %TRUE if the match was partial, %FALSE otherwise |
884 | | * |
885 | | * Since: 2.14 |
886 | | */ |
887 | | gboolean |
888 | | g_match_info_is_partial_match (const GMatchInfo *match_info) |
889 | 0 | { |
890 | 0 | g_return_val_if_fail (match_info != NULL, FALSE); |
891 | | |
892 | 0 | return match_info->matches == PCRE_ERROR_PARTIAL; |
893 | 0 | } |
894 | | |
895 | | /** |
896 | | * g_match_info_expand_references: |
897 | | * @match_info: (nullable): a #GMatchInfo or %NULL |
898 | | * @string_to_expand: the string to expand |
899 | | * @error: location to store the error occurring, or %NULL to ignore errors |
900 | | * |
901 | | * Returns a new string containing the text in @string_to_expand with |
902 | | * references and escape sequences expanded. References refer to the last |
903 | | * match done with @string against @regex and have the same syntax used by |
904 | | * g_regex_replace(). |
905 | | * |
906 | | * The @string_to_expand must be UTF-8 encoded even if #G_REGEX_RAW was |
907 | | * passed to g_regex_new(). |
908 | | * |
909 | | * The backreferences are extracted from the string passed to the match |
910 | | * function, so you cannot call this function after freeing the string. |
911 | | * |
912 | | * @match_info may be %NULL in which case @string_to_expand must not |
913 | | * contain references. For instance "foo\n" does not refer to an actual |
914 | | * pattern and '\n' merely will be replaced with \n character, |
915 | | * while to expand "\0" (whole match) one needs the result of a match. |
916 | | * Use g_regex_check_replacement() to find out whether @string_to_expand |
917 | | * contains references. |
918 | | * |
919 | | * Returns: (nullable): the expanded string, or %NULL if an error occurred |
920 | | * |
921 | | * Since: 2.14 |
922 | | */ |
923 | | gchar * |
924 | | g_match_info_expand_references (const GMatchInfo *match_info, |
925 | | const gchar *string_to_expand, |
926 | | GError **error) |
927 | 0 | { |
928 | 0 | GString *result; |
929 | 0 | GList *list; |
930 | 0 | GError *tmp_error = NULL; |
931 | |
|
932 | 0 | g_return_val_if_fail (string_to_expand != NULL, NULL); |
933 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
934 | | |
935 | 0 | list = split_replacement (string_to_expand, &tmp_error); |
936 | 0 | if (tmp_error != NULL) |
937 | 0 | { |
938 | 0 | g_propagate_error (error, tmp_error); |
939 | 0 | return NULL; |
940 | 0 | } |
941 | | |
942 | 0 | if (!match_info && interpolation_list_needs_match (list)) |
943 | 0 | { |
944 | 0 | g_critical ("String '%s' contains references to the match, can't " |
945 | 0 | "expand references without GMatchInfo object", |
946 | 0 | string_to_expand); |
947 | 0 | return NULL; |
948 | 0 | } |
949 | | |
950 | 0 | result = g_string_sized_new (strlen (string_to_expand)); |
951 | 0 | interpolate_replacement (match_info, result, list); |
952 | |
|
953 | 0 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
954 | |
|
955 | 0 | return g_string_free (result, FALSE); |
956 | 0 | } |
957 | | |
958 | | /** |
959 | | * g_match_info_fetch: |
960 | | * @match_info: #GMatchInfo structure |
961 | | * @match_num: number of the sub expression |
962 | | * |
963 | | * Retrieves the text matching the @match_num'th capturing |
964 | | * parentheses. 0 is the full text of the match, 1 is the first paren |
965 | | * set, 2 the second, and so on. |
966 | | * |
967 | | * If @match_num is a valid sub pattern but it didn't match anything |
968 | | * (e.g. sub pattern 1, matching "b" against "(a)?b") then an empty |
969 | | * string is returned. |
970 | | * |
971 | | * If the match was obtained using the DFA algorithm, that is using |
972 | | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
973 | | * string is not that of a set of parentheses but that of a matched |
974 | | * substring. Substrings are matched in reverse order of length, so |
975 | | * 0 is the longest match. |
976 | | * |
977 | | * The string is fetched from the string passed to the match function, |
978 | | * so you cannot call this function after freeing the string. |
979 | | * |
980 | | * Returns: (nullable): The matched substring, or %NULL if an error |
981 | | * occurred. You have to free the string yourself |
982 | | * |
983 | | * Since: 2.14 |
984 | | */ |
985 | | gchar * |
986 | | g_match_info_fetch (const GMatchInfo *match_info, |
987 | | gint match_num) |
988 | 0 | { |
989 | | /* we cannot use pcre_get_substring() because it allocates the |
990 | | * string using pcre_malloc(). */ |
991 | 0 | gchar *match = NULL; |
992 | 0 | gint start, end; |
993 | |
|
994 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
995 | 0 | g_return_val_if_fail (match_num >= 0, NULL); |
996 | | |
997 | | /* match_num does not exist or it didn't matched, i.e. matching "b" |
998 | | * against "(a)?b" then group 0 is empty. */ |
999 | 0 | if (!g_match_info_fetch_pos (match_info, match_num, &start, &end)) |
1000 | 0 | match = NULL; |
1001 | 0 | else if (start == -1) |
1002 | 0 | match = g_strdup (""); |
1003 | 0 | else |
1004 | 0 | match = g_strndup (&match_info->string[start], end - start); |
1005 | |
|
1006 | 0 | return match; |
1007 | 0 | } |
1008 | | |
1009 | | /** |
1010 | | * g_match_info_fetch_pos: |
1011 | | * @match_info: #GMatchInfo structure |
1012 | | * @match_num: number of the sub expression |
1013 | | * @start_pos: (out) (optional): pointer to location where to store |
1014 | | * the start position, or %NULL |
1015 | | * @end_pos: (out) (optional): pointer to location where to store |
1016 | | * the end position, or %NULL |
1017 | | * |
1018 | | * Retrieves the position in bytes of the @match_num'th capturing |
1019 | | * parentheses. 0 is the full text of the match, 1 is the first |
1020 | | * paren set, 2 the second, and so on. |
1021 | | * |
1022 | | * If @match_num is a valid sub pattern but it didn't match anything |
1023 | | * (e.g. sub pattern 1, matching "b" against "(a)?b") then @start_pos |
1024 | | * and @end_pos are set to -1 and %TRUE is returned. |
1025 | | * |
1026 | | * If the match was obtained using the DFA algorithm, that is using |
1027 | | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
1028 | | * position is not that of a set of parentheses but that of a matched |
1029 | | * substring. Substrings are matched in reverse order of length, so |
1030 | | * 0 is the longest match. |
1031 | | * |
1032 | | * Returns: %TRUE if the position was fetched, %FALSE otherwise. If |
1033 | | * the position cannot be fetched, @start_pos and @end_pos are left |
1034 | | * unchanged |
1035 | | * |
1036 | | * Since: 2.14 |
1037 | | */ |
1038 | | gboolean |
1039 | | g_match_info_fetch_pos (const GMatchInfo *match_info, |
1040 | | gint match_num, |
1041 | | gint *start_pos, |
1042 | | gint *end_pos) |
1043 | 0 | { |
1044 | 0 | g_return_val_if_fail (match_info != NULL, FALSE); |
1045 | 0 | g_return_val_if_fail (match_num >= 0, FALSE); |
1046 | | |
1047 | | /* make sure the sub expression number they're requesting is less than |
1048 | | * the total number of sub expressions that were matched. */ |
1049 | 0 | if (match_num >= match_info->matches) |
1050 | 0 | return FALSE; |
1051 | | |
1052 | 0 | if (start_pos != NULL) |
1053 | 0 | *start_pos = match_info->offsets[2 * match_num]; |
1054 | |
|
1055 | 0 | if (end_pos != NULL) |
1056 | 0 | *end_pos = match_info->offsets[2 * match_num + 1]; |
1057 | |
|
1058 | 0 | return TRUE; |
1059 | 0 | } |
1060 | | |
1061 | | /* |
1062 | | * Returns number of first matched subpattern with name @name. |
1063 | | * There may be more than one in case when DUPNAMES is used, |
1064 | | * and not all subpatterns with that name match; |
1065 | | * pcre_get_stringnumber() does not work in that case. |
1066 | | */ |
1067 | | static gint |
1068 | | get_matched_substring_number (const GMatchInfo *match_info, |
1069 | | const gchar *name) |
1070 | 0 | { |
1071 | 0 | gint entrysize; |
1072 | 0 | gchar *first, *last; |
1073 | 0 | guchar *entry; |
1074 | |
|
1075 | 0 | if (!(match_info->regex->compile_opts & G_REGEX_DUPNAMES)) |
1076 | 0 | return pcre_get_stringnumber (match_info->regex->pcre_re, name); |
1077 | | |
1078 | | /* This code is copied from pcre_get.c: get_first_set() */ |
1079 | 0 | entrysize = pcre_get_stringtable_entries (match_info->regex->pcre_re, |
1080 | 0 | name, |
1081 | 0 | &first, |
1082 | 0 | &last); |
1083 | |
|
1084 | 0 | if (entrysize <= 0) |
1085 | 0 | return entrysize; |
1086 | | |
1087 | 0 | for (entry = (guchar*) first; entry <= (guchar*) last; entry += entrysize) |
1088 | 0 | { |
1089 | 0 | gint n = (entry[0] << 8) + entry[1]; |
1090 | 0 | if (match_info->offsets[n*2] >= 0) |
1091 | 0 | return n; |
1092 | 0 | } |
1093 | | |
1094 | 0 | return (first[0] << 8) + first[1]; |
1095 | 0 | } |
1096 | | |
1097 | | /** |
1098 | | * g_match_info_fetch_named: |
1099 | | * @match_info: #GMatchInfo structure |
1100 | | * @name: name of the subexpression |
1101 | | * |
1102 | | * Retrieves the text matching the capturing parentheses named @name. |
1103 | | * |
1104 | | * If @name is a valid sub pattern name but it didn't match anything |
1105 | | * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") |
1106 | | * then an empty string is returned. |
1107 | | * |
1108 | | * The string is fetched from the string passed to the match function, |
1109 | | * so you cannot call this function after freeing the string. |
1110 | | * |
1111 | | * Returns: (nullable): The matched substring, or %NULL if an error |
1112 | | * occurred. You have to free the string yourself |
1113 | | * |
1114 | | * Since: 2.14 |
1115 | | */ |
1116 | | gchar * |
1117 | | g_match_info_fetch_named (const GMatchInfo *match_info, |
1118 | | const gchar *name) |
1119 | 0 | { |
1120 | | /* we cannot use pcre_get_named_substring() because it allocates the |
1121 | | * string using pcre_malloc(). */ |
1122 | 0 | gint num; |
1123 | |
|
1124 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
1125 | 0 | g_return_val_if_fail (name != NULL, NULL); |
1126 | | |
1127 | 0 | num = get_matched_substring_number (match_info, name); |
1128 | 0 | if (num < 0) |
1129 | 0 | return NULL; |
1130 | 0 | else |
1131 | 0 | return g_match_info_fetch (match_info, num); |
1132 | 0 | } |
1133 | | |
1134 | | /** |
1135 | | * g_match_info_fetch_named_pos: |
1136 | | * @match_info: #GMatchInfo structure |
1137 | | * @name: name of the subexpression |
1138 | | * @start_pos: (out) (optional): pointer to location where to store |
1139 | | * the start position, or %NULL |
1140 | | * @end_pos: (out) (optional): pointer to location where to store |
1141 | | * the end position, or %NULL |
1142 | | * |
1143 | | * Retrieves the position in bytes of the capturing parentheses named @name. |
1144 | | * |
1145 | | * If @name is a valid sub pattern name but it didn't match anything |
1146 | | * (e.g. sub pattern "X", matching "b" against "(?P<X>a)?b") |
1147 | | * then @start_pos and @end_pos are set to -1 and %TRUE is returned. |
1148 | | * |
1149 | | * Returns: %TRUE if the position was fetched, %FALSE otherwise. |
1150 | | * If the position cannot be fetched, @start_pos and @end_pos |
1151 | | * are left unchanged. |
1152 | | * |
1153 | | * Since: 2.14 |
1154 | | */ |
1155 | | gboolean |
1156 | | g_match_info_fetch_named_pos (const GMatchInfo *match_info, |
1157 | | const gchar *name, |
1158 | | gint *start_pos, |
1159 | | gint *end_pos) |
1160 | 0 | { |
1161 | 0 | gint num; |
1162 | |
|
1163 | 0 | g_return_val_if_fail (match_info != NULL, FALSE); |
1164 | 0 | g_return_val_if_fail (name != NULL, FALSE); |
1165 | | |
1166 | 0 | num = get_matched_substring_number (match_info, name); |
1167 | 0 | if (num < 0) |
1168 | 0 | return FALSE; |
1169 | | |
1170 | 0 | return g_match_info_fetch_pos (match_info, num, start_pos, end_pos); |
1171 | 0 | } |
1172 | | |
1173 | | /** |
1174 | | * g_match_info_fetch_all: |
1175 | | * @match_info: a #GMatchInfo structure |
1176 | | * |
1177 | | * Bundles up pointers to each of the matching substrings from a match |
1178 | | * and stores them in an array of gchar pointers. The first element in |
1179 | | * the returned array is the match number 0, i.e. the entire matched |
1180 | | * text. |
1181 | | * |
1182 | | * If a sub pattern didn't match anything (e.g. sub pattern 1, matching |
1183 | | * "b" against "(a)?b") then an empty string is inserted. |
1184 | | * |
1185 | | * If the last match was obtained using the DFA algorithm, that is using |
1186 | | * g_regex_match_all() or g_regex_match_all_full(), the retrieved |
1187 | | * strings are not that matched by sets of parentheses but that of the |
1188 | | * matched substring. Substrings are matched in reverse order of length, |
1189 | | * so the first one is the longest match. |
1190 | | * |
1191 | | * The strings are fetched from the string passed to the match function, |
1192 | | * so you cannot call this function after freeing the string. |
1193 | | * |
1194 | | * Returns: (transfer full): a %NULL-terminated array of gchar * |
1195 | | * pointers. It must be freed using g_strfreev(). If the previous |
1196 | | * match failed %NULL is returned |
1197 | | * |
1198 | | * Since: 2.14 |
1199 | | */ |
1200 | | gchar ** |
1201 | | g_match_info_fetch_all (const GMatchInfo *match_info) |
1202 | 0 | { |
1203 | | /* we cannot use pcre_get_substring_list() because the returned value |
1204 | | * isn't suitable for g_strfreev(). */ |
1205 | 0 | gchar **result; |
1206 | 0 | gint i; |
1207 | |
|
1208 | 0 | g_return_val_if_fail (match_info != NULL, NULL); |
1209 | | |
1210 | 0 | if (match_info->matches < 0) |
1211 | 0 | return NULL; |
1212 | | |
1213 | 0 | result = g_new (gchar *, match_info->matches + 1); |
1214 | 0 | for (i = 0; i < match_info->matches; i++) |
1215 | 0 | result[i] = g_match_info_fetch (match_info, i); |
1216 | 0 | result[i] = NULL; |
1217 | |
|
1218 | 0 | return result; |
1219 | 0 | } |
1220 | | |
1221 | | |
1222 | | /* GRegex */ |
1223 | | |
1224 | | G_DEFINE_QUARK (g-regex-error-quark, g_regex_error) |
1225 | | |
1226 | | /** |
1227 | | * g_regex_ref: |
1228 | | * @regex: a #GRegex |
1229 | | * |
1230 | | * Increases reference count of @regex by 1. |
1231 | | * |
1232 | | * Returns: @regex |
1233 | | * |
1234 | | * Since: 2.14 |
1235 | | */ |
1236 | | GRegex * |
1237 | | g_regex_ref (GRegex *regex) |
1238 | 0 | { |
1239 | 0 | g_return_val_if_fail (regex != NULL, NULL); |
1240 | 0 | g_atomic_int_inc (®ex->ref_count); |
1241 | 0 | return regex; |
1242 | 0 | } |
1243 | | |
1244 | | /** |
1245 | | * g_regex_unref: |
1246 | | * @regex: a #GRegex |
1247 | | * |
1248 | | * Decreases reference count of @regex by 1. When reference count drops |
1249 | | * to zero, it frees all the memory associated with the regex structure. |
1250 | | * |
1251 | | * Since: 2.14 |
1252 | | */ |
1253 | | void |
1254 | | g_regex_unref (GRegex *regex) |
1255 | 0 | { |
1256 | 0 | g_return_if_fail (regex != NULL); |
1257 | | |
1258 | 0 | if (g_atomic_int_dec_and_test (®ex->ref_count)) |
1259 | 0 | { |
1260 | 0 | g_free (regex->pattern); |
1261 | 0 | if (regex->pcre_re != NULL) |
1262 | 0 | pcre_free (regex->pcre_re); |
1263 | 0 | if (regex->extra != NULL) |
1264 | 0 | pcre_free (regex->extra); |
1265 | 0 | g_free (regex); |
1266 | 0 | } |
1267 | 0 | } |
1268 | | |
1269 | | /* |
1270 | | * @match_options: (inout) (optional): |
1271 | | */ |
1272 | | static pcre *regex_compile (const gchar *pattern, |
1273 | | GRegexCompileFlags compile_options, |
1274 | | GRegexCompileFlags *compile_options_out, |
1275 | | GRegexMatchFlags *match_options, |
1276 | | GError **error); |
1277 | | |
1278 | | /** |
1279 | | * g_regex_new: |
1280 | | * @pattern: the regular expression |
1281 | | * @compile_options: compile options for the regular expression, or 0 |
1282 | | * @match_options: match options for the regular expression, or 0 |
1283 | | * @error: return location for a #GError |
1284 | | * |
1285 | | * Compiles the regular expression to an internal form, and does |
1286 | | * the initial setup of the #GRegex structure. |
1287 | | * |
1288 | | * Returns: (nullable): a #GRegex structure or %NULL if an error occurred. Call |
1289 | | * g_regex_unref() when you are done with it |
1290 | | * |
1291 | | * Since: 2.14 |
1292 | | */ |
1293 | | GRegex * |
1294 | | g_regex_new (const gchar *pattern, |
1295 | | GRegexCompileFlags compile_options, |
1296 | | GRegexMatchFlags match_options, |
1297 | | GError **error) |
1298 | 0 | { |
1299 | 0 | GRegex *regex; |
1300 | 0 | pcre *re; |
1301 | 0 | const gchar *errmsg; |
1302 | 0 | gboolean optimize = FALSE; |
1303 | 0 | static gsize initialised = 0; |
1304 | |
|
1305 | 0 | g_return_val_if_fail (pattern != NULL, NULL); |
1306 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
1307 | 0 | g_return_val_if_fail ((compile_options & ~G_REGEX_COMPILE_MASK) == 0, NULL); |
1308 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
1309 | | |
1310 | 0 | if (g_once_init_enter (&initialised)) |
1311 | 0 | { |
1312 | 0 | int supports_utf8, supports_ucp; |
1313 | |
|
1314 | 0 | pcre_config (PCRE_CONFIG_UTF8, &supports_utf8); |
1315 | 0 | if (!supports_utf8) |
1316 | 0 | g_critical (_("PCRE library is compiled without UTF8 support")); |
1317 | |
|
1318 | 0 | pcre_config (PCRE_CONFIG_UNICODE_PROPERTIES, &supports_ucp); |
1319 | 0 | if (!supports_ucp) |
1320 | 0 | g_critical (_("PCRE library is compiled without UTF8 properties support")); |
1321 | |
|
1322 | 0 | g_once_init_leave (&initialised, supports_utf8 && supports_ucp ? 1 : 2); |
1323 | 0 | } |
1324 | |
|
1325 | 0 | if (G_UNLIKELY (initialised != 1)) |
1326 | 0 | { |
1327 | 0 | g_set_error_literal (error, G_REGEX_ERROR, G_REGEX_ERROR_COMPILE, |
1328 | 0 | _("PCRE library is compiled with incompatible options")); |
1329 | 0 | return NULL; |
1330 | 0 | } |
1331 | | |
1332 | | /* G_REGEX_OPTIMIZE has the same numeric value of PCRE_NO_UTF8_CHECK, |
1333 | | * as we do not need to wrap PCRE_NO_UTF8_CHECK. */ |
1334 | 0 | if (compile_options & G_REGEX_OPTIMIZE) |
1335 | 0 | optimize = TRUE; |
1336 | |
|
1337 | 0 | re = regex_compile (pattern, compile_options, &compile_options, |
1338 | 0 | &match_options, error); |
1339 | |
|
1340 | 0 | if (re == NULL) |
1341 | 0 | return NULL; |
1342 | | |
1343 | 0 | regex = g_new0 (GRegex, 1); |
1344 | 0 | regex->ref_count = 1; |
1345 | 0 | regex->pattern = g_strdup (pattern); |
1346 | 0 | regex->pcre_re = re; |
1347 | 0 | regex->compile_opts = compile_options; |
1348 | 0 | regex->match_opts = match_options; |
1349 | |
|
1350 | 0 | if (optimize) |
1351 | 0 | { |
1352 | 0 | regex->extra = pcre_study (regex->pcre_re, 0, &errmsg); |
1353 | 0 | if (errmsg != NULL) |
1354 | 0 | { |
1355 | 0 | GError *tmp_error = g_error_new (G_REGEX_ERROR, |
1356 | 0 | G_REGEX_ERROR_OPTIMIZE, |
1357 | 0 | _("Error while optimizing " |
1358 | 0 | "regular expression %s: %s"), |
1359 | 0 | regex->pattern, |
1360 | 0 | errmsg); |
1361 | 0 | g_propagate_error (error, tmp_error); |
1362 | |
|
1363 | 0 | g_regex_unref (regex); |
1364 | 0 | return NULL; |
1365 | 0 | } |
1366 | 0 | } |
1367 | | |
1368 | 0 | return regex; |
1369 | 0 | } |
1370 | | |
1371 | | static pcre * |
1372 | | regex_compile (const gchar *pattern, |
1373 | | GRegexCompileFlags compile_options, |
1374 | | GRegexCompileFlags *compile_options_out, |
1375 | | GRegexMatchFlags *match_options, |
1376 | | GError **error) |
1377 | 0 | { |
1378 | 0 | pcre *re; |
1379 | 0 | const gchar *errmsg; |
1380 | 0 | gint erroffset; |
1381 | 0 | gint errcode; |
1382 | 0 | GRegexCompileFlags nonpcre_compile_options; |
1383 | 0 | unsigned long int pcre_compile_options; |
1384 | |
|
1385 | 0 | nonpcre_compile_options = compile_options & G_REGEX_COMPILE_NONPCRE_MASK; |
1386 | | |
1387 | | /* In GRegex the string are, by default, UTF-8 encoded. PCRE |
1388 | | * instead uses UTF-8 only if required with PCRE_UTF8. */ |
1389 | 0 | if (compile_options & G_REGEX_RAW) |
1390 | 0 | { |
1391 | | /* disable utf-8 */ |
1392 | 0 | compile_options &= ~G_REGEX_RAW; |
1393 | 0 | } |
1394 | 0 | else |
1395 | 0 | { |
1396 | | /* enable utf-8 */ |
1397 | 0 | compile_options |= PCRE_UTF8 | PCRE_NO_UTF8_CHECK; |
1398 | |
|
1399 | 0 | if (match_options != NULL) |
1400 | 0 | *match_options |= PCRE_NO_UTF8_CHECK; |
1401 | 0 | } |
1402 | | |
1403 | | /* PCRE_NEWLINE_ANY is the default for the internal PCRE but |
1404 | | * not for the system one. */ |
1405 | 0 | if (!(compile_options & G_REGEX_NEWLINE_CR) && |
1406 | 0 | !(compile_options & G_REGEX_NEWLINE_LF)) |
1407 | 0 | { |
1408 | 0 | compile_options |= PCRE_NEWLINE_ANY; |
1409 | 0 | } |
1410 | |
|
1411 | 0 | compile_options |= PCRE_UCP; |
1412 | | |
1413 | | /* PCRE_BSR_UNICODE is the default for the internal PCRE but |
1414 | | * possibly not for the system one. |
1415 | | */ |
1416 | 0 | if (~compile_options & G_REGEX_BSR_ANYCRLF) |
1417 | 0 | compile_options |= PCRE_BSR_UNICODE; |
1418 | | |
1419 | | /* compile the pattern */ |
1420 | 0 | re = pcre_compile2 (pattern, compile_options, &errcode, |
1421 | 0 | &errmsg, &erroffset, NULL); |
1422 | | |
1423 | | /* if the compilation failed, set the error member and return |
1424 | | * immediately */ |
1425 | 0 | if (re == NULL) |
1426 | 0 | { |
1427 | 0 | GError *tmp_error; |
1428 | | |
1429 | | /* Translate the PCRE error code to GRegexError and use a translated |
1430 | | * error message if possible */ |
1431 | 0 | translate_compile_error (&errcode, &errmsg); |
1432 | | |
1433 | | /* PCRE uses byte offsets but we want to show character offsets */ |
1434 | 0 | erroffset = g_utf8_pointer_to_offset (pattern, &pattern[erroffset]); |
1435 | |
|
1436 | 0 | tmp_error = g_error_new (G_REGEX_ERROR, errcode, |
1437 | 0 | _("Error while compiling regular " |
1438 | 0 | "expression %s at char %d: %s"), |
1439 | 0 | pattern, erroffset, errmsg); |
1440 | 0 | g_propagate_error (error, tmp_error); |
1441 | |
|
1442 | 0 | return NULL; |
1443 | 0 | } |
1444 | | |
1445 | | /* For options set at the beginning of the pattern, pcre puts them into |
1446 | | * compile options, e.g. "(?i)foo" will make the pcre structure store |
1447 | | * PCRE_CASELESS even though it wasn't explicitly given for compilation. */ |
1448 | 0 | pcre_fullinfo (re, NULL, PCRE_INFO_OPTIONS, &pcre_compile_options); |
1449 | 0 | compile_options = pcre_compile_options & G_REGEX_COMPILE_PCRE_MASK; |
1450 | | |
1451 | | /* Don't leak PCRE_NEWLINE_ANY, which is part of PCRE_NEWLINE_ANYCRLF */ |
1452 | 0 | if ((pcre_compile_options & PCRE_NEWLINE_ANYCRLF) != PCRE_NEWLINE_ANYCRLF) |
1453 | 0 | compile_options &= ~PCRE_NEWLINE_ANY; |
1454 | |
|
1455 | 0 | compile_options |= nonpcre_compile_options; |
1456 | |
|
1457 | 0 | if (!(compile_options & G_REGEX_DUPNAMES)) |
1458 | 0 | { |
1459 | 0 | gboolean jchanged = FALSE; |
1460 | 0 | pcre_fullinfo (re, NULL, PCRE_INFO_JCHANGED, &jchanged); |
1461 | 0 | if (jchanged) |
1462 | 0 | compile_options |= G_REGEX_DUPNAMES; |
1463 | 0 | } |
1464 | |
|
1465 | 0 | if (compile_options_out != 0) |
1466 | 0 | *compile_options_out = compile_options; |
1467 | |
|
1468 | 0 | return re; |
1469 | 0 | } |
1470 | | |
1471 | | /** |
1472 | | * g_regex_get_pattern: |
1473 | | * @regex: a #GRegex structure |
1474 | | * |
1475 | | * Gets the pattern string associated with @regex, i.e. a copy of |
1476 | | * the string passed to g_regex_new(). |
1477 | | * |
1478 | | * Returns: the pattern of @regex |
1479 | | * |
1480 | | * Since: 2.14 |
1481 | | */ |
1482 | | const gchar * |
1483 | | g_regex_get_pattern (const GRegex *regex) |
1484 | 0 | { |
1485 | 0 | g_return_val_if_fail (regex != NULL, NULL); |
1486 | | |
1487 | 0 | return regex->pattern; |
1488 | 0 | } |
1489 | | |
1490 | | /** |
1491 | | * g_regex_get_max_backref: |
1492 | | * @regex: a #GRegex |
1493 | | * |
1494 | | * Returns the number of the highest back reference |
1495 | | * in the pattern, or 0 if the pattern does not contain |
1496 | | * back references. |
1497 | | * |
1498 | | * Returns: the number of the highest back reference |
1499 | | * |
1500 | | * Since: 2.14 |
1501 | | */ |
1502 | | gint |
1503 | | g_regex_get_max_backref (const GRegex *regex) |
1504 | 0 | { |
1505 | 0 | gint value; |
1506 | |
|
1507 | 0 | pcre_fullinfo (regex->pcre_re, regex->extra, |
1508 | 0 | PCRE_INFO_BACKREFMAX, &value); |
1509 | |
|
1510 | 0 | return value; |
1511 | 0 | } |
1512 | | |
1513 | | /** |
1514 | | * g_regex_get_capture_count: |
1515 | | * @regex: a #GRegex |
1516 | | * |
1517 | | * Returns the number of capturing subpatterns in the pattern. |
1518 | | * |
1519 | | * Returns: the number of capturing subpatterns |
1520 | | * |
1521 | | * Since: 2.14 |
1522 | | */ |
1523 | | gint |
1524 | | g_regex_get_capture_count (const GRegex *regex) |
1525 | 0 | { |
1526 | 0 | gint value; |
1527 | |
|
1528 | 0 | pcre_fullinfo (regex->pcre_re, regex->extra, |
1529 | 0 | PCRE_INFO_CAPTURECOUNT, &value); |
1530 | |
|
1531 | 0 | return value; |
1532 | 0 | } |
1533 | | |
1534 | | /** |
1535 | | * g_regex_get_has_cr_or_lf: |
1536 | | * @regex: a #GRegex structure |
1537 | | * |
1538 | | * Checks whether the pattern contains explicit CR or LF references. |
1539 | | * |
1540 | | * Returns: %TRUE if the pattern contains explicit CR or LF references |
1541 | | * |
1542 | | * Since: 2.34 |
1543 | | */ |
1544 | | gboolean |
1545 | | g_regex_get_has_cr_or_lf (const GRegex *regex) |
1546 | 0 | { |
1547 | 0 | gint value; |
1548 | |
|
1549 | 0 | pcre_fullinfo (regex->pcre_re, regex->extra, |
1550 | 0 | PCRE_INFO_HASCRORLF, &value); |
1551 | |
|
1552 | 0 | return !!value; |
1553 | 0 | } |
1554 | | |
1555 | | /** |
1556 | | * g_regex_get_max_lookbehind: |
1557 | | * @regex: a #GRegex structure |
1558 | | * |
1559 | | * Gets the number of characters in the longest lookbehind assertion in the |
1560 | | * pattern. This information is useful when doing multi-segment matching using |
1561 | | * the partial matching facilities. |
1562 | | * |
1563 | | * Returns: the number of characters in the longest lookbehind assertion. |
1564 | | * |
1565 | | * Since: 2.38 |
1566 | | */ |
1567 | | gint |
1568 | | g_regex_get_max_lookbehind (const GRegex *regex) |
1569 | 0 | { |
1570 | 0 | gint max_lookbehind; |
1571 | |
|
1572 | 0 | pcre_fullinfo (regex->pcre_re, regex->extra, |
1573 | 0 | PCRE_INFO_MAXLOOKBEHIND, &max_lookbehind); |
1574 | |
|
1575 | 0 | return max_lookbehind; |
1576 | 0 | } |
1577 | | |
1578 | | /** |
1579 | | * g_regex_get_compile_flags: |
1580 | | * @regex: a #GRegex |
1581 | | * |
1582 | | * Returns the compile options that @regex was created with. |
1583 | | * |
1584 | | * Depending on the version of PCRE that is used, this may or may not |
1585 | | * include flags set by option expressions such as `(?i)` found at the |
1586 | | * top-level within the compiled pattern. |
1587 | | * |
1588 | | * Returns: flags from #GRegexCompileFlags |
1589 | | * |
1590 | | * Since: 2.26 |
1591 | | */ |
1592 | | GRegexCompileFlags |
1593 | | g_regex_get_compile_flags (const GRegex *regex) |
1594 | 0 | { |
1595 | 0 | g_return_val_if_fail (regex != NULL, 0); |
1596 | | |
1597 | 0 | return regex->compile_opts; |
1598 | 0 | } |
1599 | | |
1600 | | /** |
1601 | | * g_regex_get_match_flags: |
1602 | | * @regex: a #GRegex |
1603 | | * |
1604 | | * Returns the match options that @regex was created with. |
1605 | | * |
1606 | | * Returns: flags from #GRegexMatchFlags |
1607 | | * |
1608 | | * Since: 2.26 |
1609 | | */ |
1610 | | GRegexMatchFlags |
1611 | | g_regex_get_match_flags (const GRegex *regex) |
1612 | 0 | { |
1613 | 0 | g_return_val_if_fail (regex != NULL, 0); |
1614 | | |
1615 | 0 | return regex->match_opts & G_REGEX_MATCH_MASK; |
1616 | 0 | } |
1617 | | |
1618 | | /** |
1619 | | * g_regex_match_simple: |
1620 | | * @pattern: the regular expression |
1621 | | * @string: the string to scan for matches |
1622 | | * @compile_options: compile options for the regular expression, or 0 |
1623 | | * @match_options: match options, or 0 |
1624 | | * |
1625 | | * Scans for a match in @string for @pattern. |
1626 | | * |
1627 | | * This function is equivalent to g_regex_match() but it does not |
1628 | | * require to compile the pattern with g_regex_new(), avoiding some |
1629 | | * lines of code when you need just to do a match without extracting |
1630 | | * substrings, capture counts, and so on. |
1631 | | * |
1632 | | * If this function is to be called on the same @pattern more than |
1633 | | * once, it's more efficient to compile the pattern once with |
1634 | | * g_regex_new() and then use g_regex_match(). |
1635 | | * |
1636 | | * Returns: %TRUE if the string matched, %FALSE otherwise |
1637 | | * |
1638 | | * Since: 2.14 |
1639 | | */ |
1640 | | gboolean |
1641 | | g_regex_match_simple (const gchar *pattern, |
1642 | | const gchar *string, |
1643 | | GRegexCompileFlags compile_options, |
1644 | | GRegexMatchFlags match_options) |
1645 | 0 | { |
1646 | 0 | GRegex *regex; |
1647 | 0 | gboolean result; |
1648 | |
|
1649 | 0 | regex = g_regex_new (pattern, compile_options, 0, NULL); |
1650 | 0 | if (!regex) |
1651 | 0 | return FALSE; |
1652 | 0 | result = g_regex_match_full (regex, string, -1, 0, match_options, NULL, NULL); |
1653 | 0 | g_regex_unref (regex); |
1654 | 0 | return result; |
1655 | 0 | } |
1656 | | |
1657 | | /** |
1658 | | * g_regex_match: |
1659 | | * @regex: a #GRegex structure from g_regex_new() |
1660 | | * @string: the string to scan for matches |
1661 | | * @match_options: match options |
1662 | | * @match_info: (out) (optional): pointer to location where to store |
1663 | | * the #GMatchInfo, or %NULL if you do not need it |
1664 | | * |
1665 | | * Scans for a match in @string for the pattern in @regex. |
1666 | | * The @match_options are combined with the match options specified |
1667 | | * when the @regex structure was created, letting you have more |
1668 | | * flexibility in reusing #GRegex structures. |
1669 | | * |
1670 | | * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8. |
1671 | | * |
1672 | | * A #GMatchInfo structure, used to get information on the match, |
1673 | | * is stored in @match_info if not %NULL. Note that if @match_info |
1674 | | * is not %NULL then it is created even if the function returns %FALSE, |
1675 | | * i.e. you must free it regardless if regular expression actually matched. |
1676 | | * |
1677 | | * To retrieve all the non-overlapping matches of the pattern in |
1678 | | * string you can use g_match_info_next(). |
1679 | | * |
1680 | | * |[<!-- language="C" --> |
1681 | | * static void |
1682 | | * print_uppercase_words (const gchar *string) |
1683 | | * { |
1684 | | * // Print all uppercase-only words. |
1685 | | * GRegex *regex; |
1686 | | * GMatchInfo *match_info; |
1687 | | * |
1688 | | * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); |
1689 | | * g_regex_match (regex, string, 0, &match_info); |
1690 | | * while (g_match_info_matches (match_info)) |
1691 | | * { |
1692 | | * gchar *word = g_match_info_fetch (match_info, 0); |
1693 | | * g_print ("Found: %s\n", word); |
1694 | | * g_free (word); |
1695 | | * g_match_info_next (match_info, NULL); |
1696 | | * } |
1697 | | * g_match_info_free (match_info); |
1698 | | * g_regex_unref (regex); |
1699 | | * } |
1700 | | * ]| |
1701 | | * |
1702 | | * @string is not copied and is used in #GMatchInfo internally. If |
1703 | | * you use any #GMatchInfo method (except g_match_info_free()) after |
1704 | | * freeing or modifying @string then the behaviour is undefined. |
1705 | | * |
1706 | | * Returns: %TRUE is the string matched, %FALSE otherwise |
1707 | | * |
1708 | | * Since: 2.14 |
1709 | | */ |
1710 | | gboolean |
1711 | | g_regex_match (const GRegex *regex, |
1712 | | const gchar *string, |
1713 | | GRegexMatchFlags match_options, |
1714 | | GMatchInfo **match_info) |
1715 | 0 | { |
1716 | 0 | return g_regex_match_full (regex, string, -1, 0, match_options, |
1717 | 0 | match_info, NULL); |
1718 | 0 | } |
1719 | | |
1720 | | /** |
1721 | | * g_regex_match_full: |
1722 | | * @regex: a #GRegex structure from g_regex_new() |
1723 | | * @string: (array length=string_len): the string to scan for matches |
1724 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
1725 | | * @start_position: starting index of the string to match, in bytes |
1726 | | * @match_options: match options |
1727 | | * @match_info: (out) (optional): pointer to location where to store |
1728 | | * the #GMatchInfo, or %NULL if you do not need it |
1729 | | * @error: location to store the error occurring, or %NULL to ignore errors |
1730 | | * |
1731 | | * Scans for a match in @string for the pattern in @regex. |
1732 | | * The @match_options are combined with the match options specified |
1733 | | * when the @regex structure was created, letting you have more |
1734 | | * flexibility in reusing #GRegex structures. |
1735 | | * |
1736 | | * Setting @start_position differs from just passing over a shortened |
1737 | | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
1738 | | * that begins with any kind of lookbehind assertion, such as "\b". |
1739 | | * |
1740 | | * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8. |
1741 | | * |
1742 | | * A #GMatchInfo structure, used to get information on the match, is |
1743 | | * stored in @match_info if not %NULL. Note that if @match_info is |
1744 | | * not %NULL then it is created even if the function returns %FALSE, |
1745 | | * i.e. you must free it regardless if regular expression actually |
1746 | | * matched. |
1747 | | * |
1748 | | * @string is not copied and is used in #GMatchInfo internally. If |
1749 | | * you use any #GMatchInfo method (except g_match_info_free()) after |
1750 | | * freeing or modifying @string then the behaviour is undefined. |
1751 | | * |
1752 | | * To retrieve all the non-overlapping matches of the pattern in |
1753 | | * string you can use g_match_info_next(). |
1754 | | * |
1755 | | * |[<!-- language="C" --> |
1756 | | * static void |
1757 | | * print_uppercase_words (const gchar *string) |
1758 | | * { |
1759 | | * // Print all uppercase-only words. |
1760 | | * GRegex *regex; |
1761 | | * GMatchInfo *match_info; |
1762 | | * GError *error = NULL; |
1763 | | * |
1764 | | * regex = g_regex_new ("[A-Z]+", 0, 0, NULL); |
1765 | | * g_regex_match_full (regex, string, -1, 0, 0, &match_info, &error); |
1766 | | * while (g_match_info_matches (match_info)) |
1767 | | * { |
1768 | | * gchar *word = g_match_info_fetch (match_info, 0); |
1769 | | * g_print ("Found: %s\n", word); |
1770 | | * g_free (word); |
1771 | | * g_match_info_next (match_info, &error); |
1772 | | * } |
1773 | | * g_match_info_free (match_info); |
1774 | | * g_regex_unref (regex); |
1775 | | * if (error != NULL) |
1776 | | * { |
1777 | | * g_printerr ("Error while matching: %s\n", error->message); |
1778 | | * g_error_free (error); |
1779 | | * } |
1780 | | * } |
1781 | | * ]| |
1782 | | * |
1783 | | * Returns: %TRUE is the string matched, %FALSE otherwise |
1784 | | * |
1785 | | * Since: 2.14 |
1786 | | */ |
1787 | | gboolean |
1788 | | g_regex_match_full (const GRegex *regex, |
1789 | | const gchar *string, |
1790 | | gssize string_len, |
1791 | | gint start_position, |
1792 | | GRegexMatchFlags match_options, |
1793 | | GMatchInfo **match_info, |
1794 | | GError **error) |
1795 | 0 | { |
1796 | 0 | GMatchInfo *info; |
1797 | 0 | gboolean match_ok; |
1798 | |
|
1799 | 0 | g_return_val_if_fail (regex != NULL, FALSE); |
1800 | 0 | g_return_val_if_fail (string != NULL, FALSE); |
1801 | 0 | g_return_val_if_fail (start_position >= 0, FALSE); |
1802 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
1803 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); |
1804 | | |
1805 | 0 | info = match_info_new (regex, string, string_len, start_position, |
1806 | 0 | match_options, FALSE); |
1807 | 0 | match_ok = g_match_info_next (info, error); |
1808 | 0 | if (match_info != NULL) |
1809 | 0 | *match_info = info; |
1810 | 0 | else |
1811 | 0 | g_match_info_free (info); |
1812 | |
|
1813 | 0 | return match_ok; |
1814 | 0 | } |
1815 | | |
1816 | | /** |
1817 | | * g_regex_match_all: |
1818 | | * @regex: a #GRegex structure from g_regex_new() |
1819 | | * @string: the string to scan for matches |
1820 | | * @match_options: match options |
1821 | | * @match_info: (out) (optional): pointer to location where to store |
1822 | | * the #GMatchInfo, or %NULL if you do not need it |
1823 | | * |
1824 | | * Using the standard algorithm for regular expression matching only |
1825 | | * the longest match in the string is retrieved. This function uses |
1826 | | * a different algorithm so it can retrieve all the possible matches. |
1827 | | * For more documentation see g_regex_match_all_full(). |
1828 | | * |
1829 | | * A #GMatchInfo structure, used to get information on the match, is |
1830 | | * stored in @match_info if not %NULL. Note that if @match_info is |
1831 | | * not %NULL then it is created even if the function returns %FALSE, |
1832 | | * i.e. you must free it regardless if regular expression actually |
1833 | | * matched. |
1834 | | * |
1835 | | * @string is not copied and is used in #GMatchInfo internally. If |
1836 | | * you use any #GMatchInfo method (except g_match_info_free()) after |
1837 | | * freeing or modifying @string then the behaviour is undefined. |
1838 | | * |
1839 | | * Returns: %TRUE is the string matched, %FALSE otherwise |
1840 | | * |
1841 | | * Since: 2.14 |
1842 | | */ |
1843 | | gboolean |
1844 | | g_regex_match_all (const GRegex *regex, |
1845 | | const gchar *string, |
1846 | | GRegexMatchFlags match_options, |
1847 | | GMatchInfo **match_info) |
1848 | 0 | { |
1849 | 0 | return g_regex_match_all_full (regex, string, -1, 0, match_options, |
1850 | 0 | match_info, NULL); |
1851 | 0 | } |
1852 | | |
1853 | | /** |
1854 | | * g_regex_match_all_full: |
1855 | | * @regex: a #GRegex structure from g_regex_new() |
1856 | | * @string: (array length=string_len): the string to scan for matches |
1857 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
1858 | | * @start_position: starting index of the string to match, in bytes |
1859 | | * @match_options: match options |
1860 | | * @match_info: (out) (optional): pointer to location where to store |
1861 | | * the #GMatchInfo, or %NULL if you do not need it |
1862 | | * @error: location to store the error occurring, or %NULL to ignore errors |
1863 | | * |
1864 | | * Using the standard algorithm for regular expression matching only |
1865 | | * the longest match in the @string is retrieved, it is not possible |
1866 | | * to obtain all the available matches. For instance matching |
1867 | | * "<a> <b> <c>" against the pattern "<.*>" |
1868 | | * you get "<a> <b> <c>". |
1869 | | * |
1870 | | * This function uses a different algorithm (called DFA, i.e. deterministic |
1871 | | * finite automaton), so it can retrieve all the possible matches, all |
1872 | | * starting at the same point in the string. For instance matching |
1873 | | * "<a> <b> <c>" against the pattern "<.*>;" |
1874 | | * you would obtain three matches: "<a> <b> <c>", |
1875 | | * "<a> <b>" and "<a>". |
1876 | | * |
1877 | | * The number of matched strings is retrieved using |
1878 | | * g_match_info_get_match_count(). To obtain the matched strings and |
1879 | | * their position you can use, respectively, g_match_info_fetch() and |
1880 | | * g_match_info_fetch_pos(). Note that the strings are returned in |
1881 | | * reverse order of length; that is, the longest matching string is |
1882 | | * given first. |
1883 | | * |
1884 | | * Note that the DFA algorithm is slower than the standard one and it |
1885 | | * is not able to capture substrings, so backreferences do not work. |
1886 | | * |
1887 | | * Setting @start_position differs from just passing over a shortened |
1888 | | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
1889 | | * that begins with any kind of lookbehind assertion, such as "\b". |
1890 | | * |
1891 | | * Unless %G_REGEX_RAW is specified in the options, @string must be valid UTF-8. |
1892 | | * |
1893 | | * A #GMatchInfo structure, used to get information on the match, is |
1894 | | * stored in @match_info if not %NULL. Note that if @match_info is |
1895 | | * not %NULL then it is created even if the function returns %FALSE, |
1896 | | * i.e. you must free it regardless if regular expression actually |
1897 | | * matched. |
1898 | | * |
1899 | | * @string is not copied and is used in #GMatchInfo internally. If |
1900 | | * you use any #GMatchInfo method (except g_match_info_free()) after |
1901 | | * freeing or modifying @string then the behaviour is undefined. |
1902 | | * |
1903 | | * Returns: %TRUE is the string matched, %FALSE otherwise |
1904 | | * |
1905 | | * Since: 2.14 |
1906 | | */ |
1907 | | gboolean |
1908 | | g_regex_match_all_full (const GRegex *regex, |
1909 | | const gchar *string, |
1910 | | gssize string_len, |
1911 | | gint start_position, |
1912 | | GRegexMatchFlags match_options, |
1913 | | GMatchInfo **match_info, |
1914 | | GError **error) |
1915 | 0 | { |
1916 | 0 | GMatchInfo *info; |
1917 | 0 | gboolean done; |
1918 | 0 | pcre *pcre_re; |
1919 | 0 | pcre_extra *extra; |
1920 | 0 | gboolean retval; |
1921 | |
|
1922 | 0 | g_return_val_if_fail (regex != NULL, FALSE); |
1923 | 0 | g_return_val_if_fail (string != NULL, FALSE); |
1924 | 0 | g_return_val_if_fail (start_position >= 0, FALSE); |
1925 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, FALSE); |
1926 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, FALSE); |
1927 | | |
1928 | | #ifdef PCRE_NO_AUTO_POSSESS |
1929 | | /* For PCRE >= 8.34 we need to turn off PCRE_NO_AUTO_POSSESS, which |
1930 | | * is an optimization for normal regex matching, but results in omitting |
1931 | | * some shorter matches here, and an observable behaviour change. |
1932 | | * |
1933 | | * DFA matching is rather niche, and very rarely used according to |
1934 | | * codesearch.debian.net, so don't bother caching the recompiled RE. */ |
1935 | | pcre_re = regex_compile (regex->pattern, |
1936 | | regex->compile_opts | PCRE_NO_AUTO_POSSESS, |
1937 | | NULL, NULL, error); |
1938 | | |
1939 | | if (pcre_re == NULL) |
1940 | | return FALSE; |
1941 | | |
1942 | | /* Not bothering to cache the optimization data either, with similar |
1943 | | * reasoning */ |
1944 | | extra = NULL; |
1945 | | #else |
1946 | | /* For PCRE < 8.33 the precompiled regex is fine. */ |
1947 | 0 | pcre_re = regex->pcre_re; |
1948 | 0 | extra = regex->extra; |
1949 | 0 | #endif |
1950 | |
|
1951 | 0 | info = match_info_new (regex, string, string_len, start_position, |
1952 | 0 | match_options, TRUE); |
1953 | |
|
1954 | 0 | done = FALSE; |
1955 | 0 | while (!done) |
1956 | 0 | { |
1957 | 0 | done = TRUE; |
1958 | 0 | info->matches = pcre_dfa_exec (pcre_re, extra, |
1959 | 0 | info->string, info->string_len, |
1960 | 0 | info->pos, |
1961 | 0 | regex->match_opts | match_options, |
1962 | 0 | info->offsets, info->n_offsets, |
1963 | 0 | info->workspace, info->n_workspace); |
1964 | 0 | if (info->matches == PCRE_ERROR_DFA_WSSIZE) |
1965 | 0 | { |
1966 | | /* info->workspace is too small. */ |
1967 | 0 | info->n_workspace *= 2; |
1968 | 0 | info->workspace = g_realloc (info->workspace, |
1969 | 0 | info->n_workspace * sizeof (gint)); |
1970 | 0 | done = FALSE; |
1971 | 0 | } |
1972 | 0 | else if (info->matches == 0) |
1973 | 0 | { |
1974 | | /* info->offsets is too small. */ |
1975 | 0 | info->n_offsets *= 2; |
1976 | 0 | info->offsets = g_realloc (info->offsets, |
1977 | 0 | info->n_offsets * sizeof (gint)); |
1978 | 0 | done = FALSE; |
1979 | 0 | } |
1980 | 0 | else if (IS_PCRE_ERROR (info->matches)) |
1981 | 0 | { |
1982 | 0 | g_set_error (error, G_REGEX_ERROR, G_REGEX_ERROR_MATCH, |
1983 | 0 | _("Error while matching regular expression %s: %s"), |
1984 | 0 | regex->pattern, match_error (info->matches)); |
1985 | 0 | } |
1986 | 0 | } |
1987 | |
|
1988 | | #ifdef PCRE_NO_AUTO_POSSESS |
1989 | | pcre_free (pcre_re); |
1990 | | #endif |
1991 | | |
1992 | | /* set info->pos to -1 so that a call to g_match_info_next() fails. */ |
1993 | 0 | info->pos = -1; |
1994 | 0 | retval = info->matches >= 0; |
1995 | |
|
1996 | 0 | if (match_info != NULL) |
1997 | 0 | *match_info = info; |
1998 | 0 | else |
1999 | 0 | g_match_info_free (info); |
2000 | |
|
2001 | 0 | return retval; |
2002 | 0 | } |
2003 | | |
2004 | | /** |
2005 | | * g_regex_get_string_number: |
2006 | | * @regex: #GRegex structure |
2007 | | * @name: name of the subexpression |
2008 | | * |
2009 | | * Retrieves the number of the subexpression named @name. |
2010 | | * |
2011 | | * Returns: The number of the subexpression or -1 if @name |
2012 | | * does not exists |
2013 | | * |
2014 | | * Since: 2.14 |
2015 | | */ |
2016 | | gint |
2017 | | g_regex_get_string_number (const GRegex *regex, |
2018 | | const gchar *name) |
2019 | 0 | { |
2020 | 0 | gint num; |
2021 | |
|
2022 | 0 | g_return_val_if_fail (regex != NULL, -1); |
2023 | 0 | g_return_val_if_fail (name != NULL, -1); |
2024 | | |
2025 | 0 | num = pcre_get_stringnumber (regex->pcre_re, name); |
2026 | 0 | if (num == PCRE_ERROR_NOSUBSTRING) |
2027 | 0 | num = -1; |
2028 | |
|
2029 | 0 | return num; |
2030 | 0 | } |
2031 | | |
2032 | | /** |
2033 | | * g_regex_split_simple: |
2034 | | * @pattern: the regular expression |
2035 | | * @string: the string to scan for matches |
2036 | | * @compile_options: compile options for the regular expression, or 0 |
2037 | | * @match_options: match options, or 0 |
2038 | | * |
2039 | | * Breaks the string on the pattern, and returns an array of |
2040 | | * the tokens. If the pattern contains capturing parentheses, |
2041 | | * then the text for each of the substrings will also be returned. |
2042 | | * If the pattern does not match anywhere in the string, then the |
2043 | | * whole string is returned as the first token. |
2044 | | * |
2045 | | * This function is equivalent to g_regex_split() but it does |
2046 | | * not require to compile the pattern with g_regex_new(), avoiding |
2047 | | * some lines of code when you need just to do a split without |
2048 | | * extracting substrings, capture counts, and so on. |
2049 | | * |
2050 | | * If this function is to be called on the same @pattern more than |
2051 | | * once, it's more efficient to compile the pattern once with |
2052 | | * g_regex_new() and then use g_regex_split(). |
2053 | | * |
2054 | | * As a special case, the result of splitting the empty string "" |
2055 | | * is an empty vector, not a vector containing a single string. |
2056 | | * The reason for this special case is that being able to represent |
2057 | | * an empty vector is typically more useful than consistent handling |
2058 | | * of empty elements. If you do need to represent empty elements, |
2059 | | * you'll need to check for the empty string before calling this |
2060 | | * function. |
2061 | | * |
2062 | | * A pattern that can match empty strings splits @string into |
2063 | | * separate characters wherever it matches the empty string between |
2064 | | * characters. For example splitting "ab c" using as a separator |
2065 | | * "\s*", you will get "a", "b" and "c". |
2066 | | * |
2067 | | * Returns: (transfer full): a %NULL-terminated array of strings. Free |
2068 | | * it using g_strfreev() |
2069 | | * |
2070 | | * Since: 2.14 |
2071 | | **/ |
2072 | | gchar ** |
2073 | | g_regex_split_simple (const gchar *pattern, |
2074 | | const gchar *string, |
2075 | | GRegexCompileFlags compile_options, |
2076 | | GRegexMatchFlags match_options) |
2077 | 0 | { |
2078 | 0 | GRegex *regex; |
2079 | 0 | gchar **result; |
2080 | |
|
2081 | 0 | regex = g_regex_new (pattern, compile_options, 0, NULL); |
2082 | 0 | if (!regex) |
2083 | 0 | return NULL; |
2084 | | |
2085 | 0 | result = g_regex_split_full (regex, string, -1, 0, match_options, 0, NULL); |
2086 | 0 | g_regex_unref (regex); |
2087 | 0 | return result; |
2088 | 0 | } |
2089 | | |
2090 | | /** |
2091 | | * g_regex_split: |
2092 | | * @regex: a #GRegex structure |
2093 | | * @string: the string to split with the pattern |
2094 | | * @match_options: match time option flags |
2095 | | * |
2096 | | * Breaks the string on the pattern, and returns an array of the tokens. |
2097 | | * If the pattern contains capturing parentheses, then the text for each |
2098 | | * of the substrings will also be returned. If the pattern does not match |
2099 | | * anywhere in the string, then the whole string is returned as the first |
2100 | | * token. |
2101 | | * |
2102 | | * As a special case, the result of splitting the empty string "" is an |
2103 | | * empty vector, not a vector containing a single string. The reason for |
2104 | | * this special case is that being able to represent an empty vector is |
2105 | | * typically more useful than consistent handling of empty elements. If |
2106 | | * you do need to represent empty elements, you'll need to check for the |
2107 | | * empty string before calling this function. |
2108 | | * |
2109 | | * A pattern that can match empty strings splits @string into separate |
2110 | | * characters wherever it matches the empty string between characters. |
2111 | | * For example splitting "ab c" using as a separator "\s*", you will get |
2112 | | * "a", "b" and "c". |
2113 | | * |
2114 | | * Returns: (transfer full): a %NULL-terminated gchar ** array. Free |
2115 | | * it using g_strfreev() |
2116 | | * |
2117 | | * Since: 2.14 |
2118 | | **/ |
2119 | | gchar ** |
2120 | | g_regex_split (const GRegex *regex, |
2121 | | const gchar *string, |
2122 | | GRegexMatchFlags match_options) |
2123 | 0 | { |
2124 | 0 | return g_regex_split_full (regex, string, -1, 0, |
2125 | 0 | match_options, 0, NULL); |
2126 | 0 | } |
2127 | | |
2128 | | /** |
2129 | | * g_regex_split_full: |
2130 | | * @regex: a #GRegex structure |
2131 | | * @string: (array length=string_len): the string to split with the pattern |
2132 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
2133 | | * @start_position: starting index of the string to match, in bytes |
2134 | | * @match_options: match time option flags |
2135 | | * @max_tokens: the maximum number of tokens to split @string into. |
2136 | | * If this is less than 1, the string is split completely |
2137 | | * @error: return location for a #GError |
2138 | | * |
2139 | | * Breaks the string on the pattern, and returns an array of the tokens. |
2140 | | * If the pattern contains capturing parentheses, then the text for each |
2141 | | * of the substrings will also be returned. If the pattern does not match |
2142 | | * anywhere in the string, then the whole string is returned as the first |
2143 | | * token. |
2144 | | * |
2145 | | * As a special case, the result of splitting the empty string "" is an |
2146 | | * empty vector, not a vector containing a single string. The reason for |
2147 | | * this special case is that being able to represent an empty vector is |
2148 | | * typically more useful than consistent handling of empty elements. If |
2149 | | * you do need to represent empty elements, you'll need to check for the |
2150 | | * empty string before calling this function. |
2151 | | * |
2152 | | * A pattern that can match empty strings splits @string into separate |
2153 | | * characters wherever it matches the empty string between characters. |
2154 | | * For example splitting "ab c" using as a separator "\s*", you will get |
2155 | | * "a", "b" and "c". |
2156 | | * |
2157 | | * Setting @start_position differs from just passing over a shortened |
2158 | | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
2159 | | * that begins with any kind of lookbehind assertion, such as "\b". |
2160 | | * |
2161 | | * Returns: (transfer full): a %NULL-terminated gchar ** array. Free |
2162 | | * it using g_strfreev() |
2163 | | * |
2164 | | * Since: 2.14 |
2165 | | **/ |
2166 | | gchar ** |
2167 | | g_regex_split_full (const GRegex *regex, |
2168 | | const gchar *string, |
2169 | | gssize string_len, |
2170 | | gint start_position, |
2171 | | GRegexMatchFlags match_options, |
2172 | | gint max_tokens, |
2173 | | GError **error) |
2174 | 0 | { |
2175 | 0 | GError *tmp_error = NULL; |
2176 | 0 | GMatchInfo *match_info; |
2177 | 0 | GList *list, *last; |
2178 | 0 | gint i; |
2179 | 0 | gint token_count; |
2180 | 0 | gboolean match_ok; |
2181 | | /* position of the last separator. */ |
2182 | 0 | gint last_separator_end; |
2183 | | /* was the last match 0 bytes long? */ |
2184 | 0 | gboolean last_match_is_empty; |
2185 | | /* the returned array of char **s */ |
2186 | 0 | gchar **string_list; |
2187 | |
|
2188 | 0 | g_return_val_if_fail (regex != NULL, NULL); |
2189 | 0 | g_return_val_if_fail (string != NULL, NULL); |
2190 | 0 | g_return_val_if_fail (start_position >= 0, NULL); |
2191 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
2192 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
2193 | | |
2194 | 0 | if (max_tokens <= 0) |
2195 | 0 | max_tokens = G_MAXINT; |
2196 | |
|
2197 | 0 | if (string_len < 0) |
2198 | 0 | string_len = strlen (string); |
2199 | | |
2200 | | /* zero-length string */ |
2201 | 0 | if (string_len - start_position == 0) |
2202 | 0 | return g_new0 (gchar *, 1); |
2203 | | |
2204 | 0 | if (max_tokens == 1) |
2205 | 0 | { |
2206 | 0 | string_list = g_new0 (gchar *, 2); |
2207 | 0 | string_list[0] = g_strndup (&string[start_position], |
2208 | 0 | string_len - start_position); |
2209 | 0 | return string_list; |
2210 | 0 | } |
2211 | | |
2212 | 0 | list = NULL; |
2213 | 0 | token_count = 0; |
2214 | 0 | last_separator_end = start_position; |
2215 | 0 | last_match_is_empty = FALSE; |
2216 | |
|
2217 | 0 | match_ok = g_regex_match_full (regex, string, string_len, start_position, |
2218 | 0 | match_options, &match_info, &tmp_error); |
2219 | |
|
2220 | 0 | while (tmp_error == NULL) |
2221 | 0 | { |
2222 | 0 | if (match_ok) |
2223 | 0 | { |
2224 | 0 | last_match_is_empty = |
2225 | 0 | (match_info->offsets[0] == match_info->offsets[1]); |
2226 | | |
2227 | | /* we need to skip empty separators at the same position of the end |
2228 | | * of another separator. e.g. the string is "a b" and the separator |
2229 | | * is " *", so from 1 to 2 we have a match and at position 2 we have |
2230 | | * an empty match. */ |
2231 | 0 | if (last_separator_end != match_info->offsets[1]) |
2232 | 0 | { |
2233 | 0 | gchar *token; |
2234 | 0 | gint match_count; |
2235 | |
|
2236 | 0 | token = g_strndup (string + last_separator_end, |
2237 | 0 | match_info->offsets[0] - last_separator_end); |
2238 | 0 | list = g_list_prepend (list, token); |
2239 | 0 | token_count++; |
2240 | | |
2241 | | /* if there were substrings, these need to be added to |
2242 | | * the list. */ |
2243 | 0 | match_count = g_match_info_get_match_count (match_info); |
2244 | 0 | if (match_count > 1) |
2245 | 0 | { |
2246 | 0 | for (i = 1; i < match_count; i++) |
2247 | 0 | list = g_list_prepend (list, g_match_info_fetch (match_info, i)); |
2248 | 0 | } |
2249 | 0 | } |
2250 | 0 | } |
2251 | 0 | else |
2252 | 0 | { |
2253 | | /* if there was no match, copy to end of string. */ |
2254 | 0 | if (!last_match_is_empty) |
2255 | 0 | { |
2256 | 0 | gchar *token = g_strndup (string + last_separator_end, |
2257 | 0 | match_info->string_len - last_separator_end); |
2258 | 0 | list = g_list_prepend (list, token); |
2259 | 0 | } |
2260 | | /* no more tokens, end the loop. */ |
2261 | 0 | break; |
2262 | 0 | } |
2263 | | |
2264 | | /* -1 to leave room for the last part. */ |
2265 | 0 | if (token_count >= max_tokens - 1) |
2266 | 0 | { |
2267 | | /* we have reached the maximum number of tokens, so we copy |
2268 | | * the remaining part of the string. */ |
2269 | 0 | if (last_match_is_empty) |
2270 | 0 | { |
2271 | | /* the last match was empty, so we have moved one char |
2272 | | * after the real position to avoid empty matches at the |
2273 | | * same position. */ |
2274 | 0 | match_info->pos = PREV_CHAR (regex, &string[match_info->pos]) - string; |
2275 | 0 | } |
2276 | | /* the if is needed in the case we have terminated the available |
2277 | | * tokens, but we are at the end of the string, so there are no |
2278 | | * characters left to copy. */ |
2279 | 0 | if (string_len > match_info->pos) |
2280 | 0 | { |
2281 | 0 | gchar *token = g_strndup (string + match_info->pos, |
2282 | 0 | string_len - match_info->pos); |
2283 | 0 | list = g_list_prepend (list, token); |
2284 | 0 | } |
2285 | | /* end the loop. */ |
2286 | 0 | break; |
2287 | 0 | } |
2288 | | |
2289 | 0 | last_separator_end = match_info->pos; |
2290 | 0 | if (last_match_is_empty) |
2291 | | /* if the last match was empty, g_match_info_next() has moved |
2292 | | * forward to avoid infinite loops, but we still need to copy that |
2293 | | * character. */ |
2294 | 0 | last_separator_end = PREV_CHAR (regex, &string[last_separator_end]) - string; |
2295 | |
|
2296 | 0 | match_ok = g_match_info_next (match_info, &tmp_error); |
2297 | 0 | } |
2298 | 0 | g_match_info_free (match_info); |
2299 | 0 | if (tmp_error != NULL) |
2300 | 0 | { |
2301 | 0 | g_propagate_error (error, tmp_error); |
2302 | 0 | g_list_free_full (list, g_free); |
2303 | 0 | return NULL; |
2304 | 0 | } |
2305 | | |
2306 | 0 | string_list = g_new (gchar *, g_list_length (list) + 1); |
2307 | 0 | i = 0; |
2308 | 0 | for (last = g_list_last (list); last; last = g_list_previous (last)) |
2309 | 0 | string_list[i++] = last->data; |
2310 | 0 | string_list[i] = NULL; |
2311 | 0 | g_list_free (list); |
2312 | |
|
2313 | 0 | return string_list; |
2314 | 0 | } |
2315 | | |
2316 | | enum |
2317 | | { |
2318 | | REPL_TYPE_STRING, |
2319 | | REPL_TYPE_CHARACTER, |
2320 | | REPL_TYPE_SYMBOLIC_REFERENCE, |
2321 | | REPL_TYPE_NUMERIC_REFERENCE, |
2322 | | REPL_TYPE_CHANGE_CASE |
2323 | | }; |
2324 | | |
2325 | | typedef enum |
2326 | | { |
2327 | | CHANGE_CASE_NONE = 1 << 0, |
2328 | | CHANGE_CASE_UPPER = 1 << 1, |
2329 | | CHANGE_CASE_LOWER = 1 << 2, |
2330 | | CHANGE_CASE_UPPER_SINGLE = 1 << 3, |
2331 | | CHANGE_CASE_LOWER_SINGLE = 1 << 4, |
2332 | | CHANGE_CASE_SINGLE_MASK = CHANGE_CASE_UPPER_SINGLE | CHANGE_CASE_LOWER_SINGLE, |
2333 | | CHANGE_CASE_LOWER_MASK = CHANGE_CASE_LOWER | CHANGE_CASE_LOWER_SINGLE, |
2334 | | CHANGE_CASE_UPPER_MASK = CHANGE_CASE_UPPER | CHANGE_CASE_UPPER_SINGLE |
2335 | | } ChangeCase; |
2336 | | |
2337 | | struct _InterpolationData |
2338 | | { |
2339 | | gchar *text; |
2340 | | gint type; |
2341 | | gint num; |
2342 | | gchar c; |
2343 | | ChangeCase change_case; |
2344 | | }; |
2345 | | |
2346 | | static void |
2347 | | free_interpolation_data (InterpolationData *data) |
2348 | 0 | { |
2349 | 0 | g_free (data->text); |
2350 | 0 | g_free (data); |
2351 | 0 | } |
2352 | | |
2353 | | static const gchar * |
2354 | | expand_escape (const gchar *replacement, |
2355 | | const gchar *p, |
2356 | | InterpolationData *data, |
2357 | | GError **error) |
2358 | 0 | { |
2359 | 0 | const gchar *q, *r; |
2360 | 0 | gint x, d, h, i; |
2361 | 0 | const gchar *error_detail; |
2362 | 0 | gint base = 0; |
2363 | 0 | GError *tmp_error = NULL; |
2364 | |
|
2365 | 0 | p++; |
2366 | 0 | switch (*p) |
2367 | 0 | { |
2368 | 0 | case 't': |
2369 | 0 | p++; |
2370 | 0 | data->c = '\t'; |
2371 | 0 | data->type = REPL_TYPE_CHARACTER; |
2372 | 0 | break; |
2373 | 0 | case 'n': |
2374 | 0 | p++; |
2375 | 0 | data->c = '\n'; |
2376 | 0 | data->type = REPL_TYPE_CHARACTER; |
2377 | 0 | break; |
2378 | 0 | case 'v': |
2379 | 0 | p++; |
2380 | 0 | data->c = '\v'; |
2381 | 0 | data->type = REPL_TYPE_CHARACTER; |
2382 | 0 | break; |
2383 | 0 | case 'r': |
2384 | 0 | p++; |
2385 | 0 | data->c = '\r'; |
2386 | 0 | data->type = REPL_TYPE_CHARACTER; |
2387 | 0 | break; |
2388 | 0 | case 'f': |
2389 | 0 | p++; |
2390 | 0 | data->c = '\f'; |
2391 | 0 | data->type = REPL_TYPE_CHARACTER; |
2392 | 0 | break; |
2393 | 0 | case 'a': |
2394 | 0 | p++; |
2395 | 0 | data->c = '\a'; |
2396 | 0 | data->type = REPL_TYPE_CHARACTER; |
2397 | 0 | break; |
2398 | 0 | case 'b': |
2399 | 0 | p++; |
2400 | 0 | data->c = '\b'; |
2401 | 0 | data->type = REPL_TYPE_CHARACTER; |
2402 | 0 | break; |
2403 | 0 | case '\\': |
2404 | 0 | p++; |
2405 | 0 | data->c = '\\'; |
2406 | 0 | data->type = REPL_TYPE_CHARACTER; |
2407 | 0 | break; |
2408 | 0 | case 'x': |
2409 | 0 | p++; |
2410 | 0 | x = 0; |
2411 | 0 | if (*p == '{') |
2412 | 0 | { |
2413 | 0 | p++; |
2414 | 0 | do |
2415 | 0 | { |
2416 | 0 | h = g_ascii_xdigit_value (*p); |
2417 | 0 | if (h < 0) |
2418 | 0 | { |
2419 | 0 | error_detail = _("hexadecimal digit or “}” expected"); |
2420 | 0 | goto error; |
2421 | 0 | } |
2422 | 0 | x = x * 16 + h; |
2423 | 0 | p++; |
2424 | 0 | } |
2425 | 0 | while (*p != '}'); |
2426 | 0 | p++; |
2427 | 0 | } |
2428 | 0 | else |
2429 | 0 | { |
2430 | 0 | for (i = 0; i < 2; i++) |
2431 | 0 | { |
2432 | 0 | h = g_ascii_xdigit_value (*p); |
2433 | 0 | if (h < 0) |
2434 | 0 | { |
2435 | 0 | error_detail = _("hexadecimal digit expected"); |
2436 | 0 | goto error; |
2437 | 0 | } |
2438 | 0 | x = x * 16 + h; |
2439 | 0 | p++; |
2440 | 0 | } |
2441 | 0 | } |
2442 | 0 | data->type = REPL_TYPE_STRING; |
2443 | 0 | data->text = g_new0 (gchar, 8); |
2444 | 0 | g_unichar_to_utf8 (x, data->text); |
2445 | 0 | break; |
2446 | 0 | case 'l': |
2447 | 0 | p++; |
2448 | 0 | data->type = REPL_TYPE_CHANGE_CASE; |
2449 | 0 | data->change_case = CHANGE_CASE_LOWER_SINGLE; |
2450 | 0 | break; |
2451 | 0 | case 'u': |
2452 | 0 | p++; |
2453 | 0 | data->type = REPL_TYPE_CHANGE_CASE; |
2454 | 0 | data->change_case = CHANGE_CASE_UPPER_SINGLE; |
2455 | 0 | break; |
2456 | 0 | case 'L': |
2457 | 0 | p++; |
2458 | 0 | data->type = REPL_TYPE_CHANGE_CASE; |
2459 | 0 | data->change_case = CHANGE_CASE_LOWER; |
2460 | 0 | break; |
2461 | 0 | case 'U': |
2462 | 0 | p++; |
2463 | 0 | data->type = REPL_TYPE_CHANGE_CASE; |
2464 | 0 | data->change_case = CHANGE_CASE_UPPER; |
2465 | 0 | break; |
2466 | 0 | case 'E': |
2467 | 0 | p++; |
2468 | 0 | data->type = REPL_TYPE_CHANGE_CASE; |
2469 | 0 | data->change_case = CHANGE_CASE_NONE; |
2470 | 0 | break; |
2471 | 0 | case 'g': |
2472 | 0 | p++; |
2473 | 0 | if (*p != '<') |
2474 | 0 | { |
2475 | 0 | error_detail = _("missing “<” in symbolic reference"); |
2476 | 0 | goto error; |
2477 | 0 | } |
2478 | 0 | q = p + 1; |
2479 | 0 | do |
2480 | 0 | { |
2481 | 0 | p++; |
2482 | 0 | if (!*p) |
2483 | 0 | { |
2484 | 0 | error_detail = _("unfinished symbolic reference"); |
2485 | 0 | goto error; |
2486 | 0 | } |
2487 | 0 | } |
2488 | 0 | while (*p != '>'); |
2489 | 0 | if (p - q == 0) |
2490 | 0 | { |
2491 | 0 | error_detail = _("zero-length symbolic reference"); |
2492 | 0 | goto error; |
2493 | 0 | } |
2494 | 0 | if (g_ascii_isdigit (*q)) |
2495 | 0 | { |
2496 | 0 | x = 0; |
2497 | 0 | do |
2498 | 0 | { |
2499 | 0 | h = g_ascii_digit_value (*q); |
2500 | 0 | if (h < 0) |
2501 | 0 | { |
2502 | 0 | error_detail = _("digit expected"); |
2503 | 0 | p = q; |
2504 | 0 | goto error; |
2505 | 0 | } |
2506 | 0 | x = x * 10 + h; |
2507 | 0 | q++; |
2508 | 0 | } |
2509 | 0 | while (q != p); |
2510 | 0 | data->num = x; |
2511 | 0 | data->type = REPL_TYPE_NUMERIC_REFERENCE; |
2512 | 0 | } |
2513 | 0 | else |
2514 | 0 | { |
2515 | 0 | r = q; |
2516 | 0 | do |
2517 | 0 | { |
2518 | 0 | if (!g_ascii_isalnum (*r)) |
2519 | 0 | { |
2520 | 0 | error_detail = _("illegal symbolic reference"); |
2521 | 0 | p = r; |
2522 | 0 | goto error; |
2523 | 0 | } |
2524 | 0 | r++; |
2525 | 0 | } |
2526 | 0 | while (r != p); |
2527 | 0 | data->text = g_strndup (q, p - q); |
2528 | 0 | data->type = REPL_TYPE_SYMBOLIC_REFERENCE; |
2529 | 0 | } |
2530 | 0 | p++; |
2531 | 0 | break; |
2532 | 0 | case '0': |
2533 | | /* if \0 is followed by a number is an octal number representing a |
2534 | | * character, else it is a numeric reference. */ |
2535 | 0 | if (g_ascii_digit_value (*g_utf8_next_char (p)) >= 0) |
2536 | 0 | { |
2537 | 0 | base = 8; |
2538 | 0 | p = g_utf8_next_char (p); |
2539 | 0 | } |
2540 | 0 | G_GNUC_FALLTHROUGH; |
2541 | 0 | case '1': |
2542 | 0 | case '2': |
2543 | 0 | case '3': |
2544 | 0 | case '4': |
2545 | 0 | case '5': |
2546 | 0 | case '6': |
2547 | 0 | case '7': |
2548 | 0 | case '8': |
2549 | 0 | case '9': |
2550 | 0 | x = 0; |
2551 | 0 | d = 0; |
2552 | 0 | for (i = 0; i < 3; i++) |
2553 | 0 | { |
2554 | 0 | h = g_ascii_digit_value (*p); |
2555 | 0 | if (h < 0) |
2556 | 0 | break; |
2557 | 0 | if (h > 7) |
2558 | 0 | { |
2559 | 0 | if (base == 8) |
2560 | 0 | break; |
2561 | 0 | else |
2562 | 0 | base = 10; |
2563 | 0 | } |
2564 | 0 | if (i == 2 && base == 10) |
2565 | 0 | break; |
2566 | 0 | x = x * 8 + h; |
2567 | 0 | d = d * 10 + h; |
2568 | 0 | p++; |
2569 | 0 | } |
2570 | 0 | if (base == 8 || i == 3) |
2571 | 0 | { |
2572 | 0 | data->type = REPL_TYPE_STRING; |
2573 | 0 | data->text = g_new0 (gchar, 8); |
2574 | 0 | g_unichar_to_utf8 (x, data->text); |
2575 | 0 | } |
2576 | 0 | else |
2577 | 0 | { |
2578 | 0 | data->type = REPL_TYPE_NUMERIC_REFERENCE; |
2579 | 0 | data->num = d; |
2580 | 0 | } |
2581 | 0 | break; |
2582 | 0 | case 0: |
2583 | 0 | error_detail = _("stray final “\\”"); |
2584 | 0 | goto error; |
2585 | 0 | break; |
2586 | 0 | default: |
2587 | 0 | error_detail = _("unknown escape sequence"); |
2588 | 0 | goto error; |
2589 | 0 | } |
2590 | | |
2591 | 0 | return p; |
2592 | | |
2593 | 0 | error: |
2594 | | /* G_GSSIZE_FORMAT doesn't work with gettext, so we use %lu */ |
2595 | 0 | tmp_error = g_error_new (G_REGEX_ERROR, |
2596 | 0 | G_REGEX_ERROR_REPLACE, |
2597 | 0 | _("Error while parsing replacement " |
2598 | 0 | "text “%s” at char %lu: %s"), |
2599 | 0 | replacement, |
2600 | 0 | (gulong)(p - replacement), |
2601 | 0 | error_detail); |
2602 | 0 | g_propagate_error (error, tmp_error); |
2603 | |
|
2604 | 0 | return NULL; |
2605 | 0 | } |
2606 | | |
2607 | | static GList * |
2608 | | split_replacement (const gchar *replacement, |
2609 | | GError **error) |
2610 | 0 | { |
2611 | 0 | GList *list = NULL; |
2612 | 0 | InterpolationData *data; |
2613 | 0 | const gchar *p, *start; |
2614 | |
|
2615 | 0 | start = p = replacement; |
2616 | 0 | while (*p) |
2617 | 0 | { |
2618 | 0 | if (*p == '\\') |
2619 | 0 | { |
2620 | 0 | data = g_new0 (InterpolationData, 1); |
2621 | 0 | start = p = expand_escape (replacement, p, data, error); |
2622 | 0 | if (p == NULL) |
2623 | 0 | { |
2624 | 0 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
2625 | 0 | free_interpolation_data (data); |
2626 | |
|
2627 | 0 | return NULL; |
2628 | 0 | } |
2629 | 0 | list = g_list_prepend (list, data); |
2630 | 0 | } |
2631 | 0 | else |
2632 | 0 | { |
2633 | 0 | p++; |
2634 | 0 | if (*p == '\\' || *p == '\0') |
2635 | 0 | { |
2636 | 0 | if (p - start > 0) |
2637 | 0 | { |
2638 | 0 | data = g_new0 (InterpolationData, 1); |
2639 | 0 | data->text = g_strndup (start, p - start); |
2640 | 0 | data->type = REPL_TYPE_STRING; |
2641 | 0 | list = g_list_prepend (list, data); |
2642 | 0 | } |
2643 | 0 | } |
2644 | 0 | } |
2645 | 0 | } |
2646 | | |
2647 | 0 | return g_list_reverse (list); |
2648 | 0 | } |
2649 | | |
2650 | | /* Change the case of c based on change_case. */ |
2651 | | #define CHANGE_CASE(c, change_case) \ |
2652 | 0 | (((change_case) & CHANGE_CASE_LOWER_MASK) ? \ |
2653 | 0 | g_unichar_tolower (c) : \ |
2654 | 0 | g_unichar_toupper (c)) |
2655 | | |
2656 | | static void |
2657 | | string_append (GString *string, |
2658 | | const gchar *text, |
2659 | | ChangeCase *change_case) |
2660 | 0 | { |
2661 | 0 | gunichar c; |
2662 | |
|
2663 | 0 | if (text[0] == '\0') |
2664 | 0 | return; |
2665 | | |
2666 | 0 | if (*change_case == CHANGE_CASE_NONE) |
2667 | 0 | { |
2668 | 0 | g_string_append (string, text); |
2669 | 0 | } |
2670 | 0 | else if (*change_case & CHANGE_CASE_SINGLE_MASK) |
2671 | 0 | { |
2672 | 0 | c = g_utf8_get_char (text); |
2673 | 0 | g_string_append_unichar (string, CHANGE_CASE (c, *change_case)); |
2674 | 0 | g_string_append (string, g_utf8_next_char (text)); |
2675 | 0 | *change_case = CHANGE_CASE_NONE; |
2676 | 0 | } |
2677 | 0 | else |
2678 | 0 | { |
2679 | 0 | while (*text != '\0') |
2680 | 0 | { |
2681 | 0 | c = g_utf8_get_char (text); |
2682 | 0 | g_string_append_unichar (string, CHANGE_CASE (c, *change_case)); |
2683 | 0 | text = g_utf8_next_char (text); |
2684 | 0 | } |
2685 | 0 | } |
2686 | 0 | } |
2687 | | |
2688 | | static gboolean |
2689 | | interpolate_replacement (const GMatchInfo *match_info, |
2690 | | GString *result, |
2691 | | gpointer data) |
2692 | 0 | { |
2693 | 0 | GList *list; |
2694 | 0 | InterpolationData *idata; |
2695 | 0 | gchar *match; |
2696 | 0 | ChangeCase change_case = CHANGE_CASE_NONE; |
2697 | |
|
2698 | 0 | for (list = data; list; list = list->next) |
2699 | 0 | { |
2700 | 0 | idata = list->data; |
2701 | 0 | switch (idata->type) |
2702 | 0 | { |
2703 | 0 | case REPL_TYPE_STRING: |
2704 | 0 | string_append (result, idata->text, &change_case); |
2705 | 0 | break; |
2706 | 0 | case REPL_TYPE_CHARACTER: |
2707 | 0 | g_string_append_c (result, CHANGE_CASE (idata->c, change_case)); |
2708 | 0 | if (change_case & CHANGE_CASE_SINGLE_MASK) |
2709 | 0 | change_case = CHANGE_CASE_NONE; |
2710 | 0 | break; |
2711 | 0 | case REPL_TYPE_NUMERIC_REFERENCE: |
2712 | 0 | match = g_match_info_fetch (match_info, idata->num); |
2713 | 0 | if (match) |
2714 | 0 | { |
2715 | 0 | string_append (result, match, &change_case); |
2716 | 0 | g_free (match); |
2717 | 0 | } |
2718 | 0 | break; |
2719 | 0 | case REPL_TYPE_SYMBOLIC_REFERENCE: |
2720 | 0 | match = g_match_info_fetch_named (match_info, idata->text); |
2721 | 0 | if (match) |
2722 | 0 | { |
2723 | 0 | string_append (result, match, &change_case); |
2724 | 0 | g_free (match); |
2725 | 0 | } |
2726 | 0 | break; |
2727 | 0 | case REPL_TYPE_CHANGE_CASE: |
2728 | 0 | change_case = idata->change_case; |
2729 | 0 | break; |
2730 | 0 | } |
2731 | 0 | } |
2732 | | |
2733 | 0 | return FALSE; |
2734 | 0 | } |
2735 | | |
2736 | | /* whether actual match_info is needed for replacement, i.e. |
2737 | | * whether there are references |
2738 | | */ |
2739 | | static gboolean |
2740 | | interpolation_list_needs_match (GList *list) |
2741 | 0 | { |
2742 | 0 | while (list != NULL) |
2743 | 0 | { |
2744 | 0 | InterpolationData *data = list->data; |
2745 | |
|
2746 | 0 | if (data->type == REPL_TYPE_SYMBOLIC_REFERENCE || |
2747 | 0 | data->type == REPL_TYPE_NUMERIC_REFERENCE) |
2748 | 0 | { |
2749 | 0 | return TRUE; |
2750 | 0 | } |
2751 | | |
2752 | 0 | list = list->next; |
2753 | 0 | } |
2754 | | |
2755 | 0 | return FALSE; |
2756 | 0 | } |
2757 | | |
2758 | | /** |
2759 | | * g_regex_replace: |
2760 | | * @regex: a #GRegex structure |
2761 | | * @string: (array length=string_len): the string to perform matches against |
2762 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
2763 | | * @start_position: starting index of the string to match, in bytes |
2764 | | * @replacement: text to replace each match with |
2765 | | * @match_options: options for the match |
2766 | | * @error: location to store the error occurring, or %NULL to ignore errors |
2767 | | * |
2768 | | * Replaces all occurrences of the pattern in @regex with the |
2769 | | * replacement text. Backreferences of the form '\number' or |
2770 | | * '\g<number>' in the replacement text are interpolated by the |
2771 | | * number-th captured subexpression of the match, '\g<name>' refers |
2772 | | * to the captured subexpression with the given name. '\0' refers |
2773 | | * to the complete match, but '\0' followed by a number is the octal |
2774 | | * representation of a character. To include a literal '\' in the |
2775 | | * replacement, write '\\\\'. |
2776 | | * |
2777 | | * There are also escapes that changes the case of the following text: |
2778 | | * |
2779 | | * - \l: Convert to lower case the next character |
2780 | | * - \u: Convert to upper case the next character |
2781 | | * - \L: Convert to lower case till \E |
2782 | | * - \U: Convert to upper case till \E |
2783 | | * - \E: End case modification |
2784 | | * |
2785 | | * If you do not need to use backreferences use g_regex_replace_literal(). |
2786 | | * |
2787 | | * The @replacement string must be UTF-8 encoded even if #G_REGEX_RAW was |
2788 | | * passed to g_regex_new(). If you want to use not UTF-8 encoded strings |
2789 | | * you can use g_regex_replace_literal(). |
2790 | | * |
2791 | | * Setting @start_position differs from just passing over a shortened |
2792 | | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern that |
2793 | | * begins with any kind of lookbehind assertion, such as "\b". |
2794 | | * |
2795 | | * Returns: a newly allocated string containing the replacements |
2796 | | * |
2797 | | * Since: 2.14 |
2798 | | */ |
2799 | | gchar * |
2800 | | g_regex_replace (const GRegex *regex, |
2801 | | const gchar *string, |
2802 | | gssize string_len, |
2803 | | gint start_position, |
2804 | | const gchar *replacement, |
2805 | | GRegexMatchFlags match_options, |
2806 | | GError **error) |
2807 | 0 | { |
2808 | 0 | gchar *result; |
2809 | 0 | GList *list; |
2810 | 0 | GError *tmp_error = NULL; |
2811 | |
|
2812 | 0 | g_return_val_if_fail (regex != NULL, NULL); |
2813 | 0 | g_return_val_if_fail (string != NULL, NULL); |
2814 | 0 | g_return_val_if_fail (start_position >= 0, NULL); |
2815 | 0 | g_return_val_if_fail (replacement != NULL, NULL); |
2816 | 0 | g_return_val_if_fail (error == NULL || *error == NULL, NULL); |
2817 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
2818 | | |
2819 | 0 | list = split_replacement (replacement, &tmp_error); |
2820 | 0 | if (tmp_error != NULL) |
2821 | 0 | { |
2822 | 0 | g_propagate_error (error, tmp_error); |
2823 | 0 | return NULL; |
2824 | 0 | } |
2825 | | |
2826 | 0 | result = g_regex_replace_eval (regex, |
2827 | 0 | string, string_len, start_position, |
2828 | 0 | match_options, |
2829 | 0 | interpolate_replacement, |
2830 | 0 | (gpointer)list, |
2831 | 0 | &tmp_error); |
2832 | 0 | if (tmp_error != NULL) |
2833 | 0 | g_propagate_error (error, tmp_error); |
2834 | |
|
2835 | 0 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
2836 | |
|
2837 | 0 | return result; |
2838 | 0 | } |
2839 | | |
2840 | | static gboolean |
2841 | | literal_replacement (const GMatchInfo *match_info, |
2842 | | GString *result, |
2843 | | gpointer data) |
2844 | 0 | { |
2845 | 0 | g_string_append (result, data); |
2846 | 0 | return FALSE; |
2847 | 0 | } |
2848 | | |
2849 | | /** |
2850 | | * g_regex_replace_literal: |
2851 | | * @regex: a #GRegex structure |
2852 | | * @string: (array length=string_len): the string to perform matches against |
2853 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
2854 | | * @start_position: starting index of the string to match, in bytes |
2855 | | * @replacement: text to replace each match with |
2856 | | * @match_options: options for the match |
2857 | | * @error: location to store the error occurring, or %NULL to ignore errors |
2858 | | * |
2859 | | * Replaces all occurrences of the pattern in @regex with the |
2860 | | * replacement text. @replacement is replaced literally, to |
2861 | | * include backreferences use g_regex_replace(). |
2862 | | * |
2863 | | * Setting @start_position differs from just passing over a |
2864 | | * shortened string and setting #G_REGEX_MATCH_NOTBOL in the |
2865 | | * case of a pattern that begins with any kind of lookbehind |
2866 | | * assertion, such as "\b". |
2867 | | * |
2868 | | * Returns: a newly allocated string containing the replacements |
2869 | | * |
2870 | | * Since: 2.14 |
2871 | | */ |
2872 | | gchar * |
2873 | | g_regex_replace_literal (const GRegex *regex, |
2874 | | const gchar *string, |
2875 | | gssize string_len, |
2876 | | gint start_position, |
2877 | | const gchar *replacement, |
2878 | | GRegexMatchFlags match_options, |
2879 | | GError **error) |
2880 | 0 | { |
2881 | 0 | g_return_val_if_fail (replacement != NULL, NULL); |
2882 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
2883 | | |
2884 | 0 | return g_regex_replace_eval (regex, |
2885 | 0 | string, string_len, start_position, |
2886 | 0 | match_options, |
2887 | 0 | literal_replacement, |
2888 | 0 | (gpointer)replacement, |
2889 | 0 | error); |
2890 | 0 | } |
2891 | | |
2892 | | /** |
2893 | | * g_regex_replace_eval: |
2894 | | * @regex: a #GRegex structure from g_regex_new() |
2895 | | * @string: (array length=string_len): string to perform matches against |
2896 | | * @string_len: the length of @string, in bytes, or -1 if @string is nul-terminated |
2897 | | * @start_position: starting index of the string to match, in bytes |
2898 | | * @match_options: options for the match |
2899 | | * @eval: a function to call for each match |
2900 | | * @user_data: user data to pass to the function |
2901 | | * @error: location to store the error occurring, or %NULL to ignore errors |
2902 | | * |
2903 | | * Replaces occurrences of the pattern in regex with the output of |
2904 | | * @eval for that occurrence. |
2905 | | * |
2906 | | * Setting @start_position differs from just passing over a shortened |
2907 | | * string and setting #G_REGEX_MATCH_NOTBOL in the case of a pattern |
2908 | | * that begins with any kind of lookbehind assertion, such as "\b". |
2909 | | * |
2910 | | * The following example uses g_regex_replace_eval() to replace multiple |
2911 | | * strings at once: |
2912 | | * |[<!-- language="C" --> |
2913 | | * static gboolean |
2914 | | * eval_cb (const GMatchInfo *info, |
2915 | | * GString *res, |
2916 | | * gpointer data) |
2917 | | * { |
2918 | | * gchar *match; |
2919 | | * gchar *r; |
2920 | | * |
2921 | | * match = g_match_info_fetch (info, 0); |
2922 | | * r = g_hash_table_lookup ((GHashTable *)data, match); |
2923 | | * g_string_append (res, r); |
2924 | | * g_free (match); |
2925 | | * |
2926 | | * return FALSE; |
2927 | | * } |
2928 | | * |
2929 | | * ... |
2930 | | * |
2931 | | * GRegex *reg; |
2932 | | * GHashTable *h; |
2933 | | * gchar *res; |
2934 | | * |
2935 | | * h = g_hash_table_new (g_str_hash, g_str_equal); |
2936 | | * |
2937 | | * g_hash_table_insert (h, "1", "ONE"); |
2938 | | * g_hash_table_insert (h, "2", "TWO"); |
2939 | | * g_hash_table_insert (h, "3", "THREE"); |
2940 | | * g_hash_table_insert (h, "4", "FOUR"); |
2941 | | * |
2942 | | * reg = g_regex_new ("1|2|3|4", 0, 0, NULL); |
2943 | | * res = g_regex_replace_eval (reg, text, -1, 0, 0, eval_cb, h, NULL); |
2944 | | * g_hash_table_destroy (h); |
2945 | | * |
2946 | | * ... |
2947 | | * ]| |
2948 | | * |
2949 | | * Returns: a newly allocated string containing the replacements |
2950 | | * |
2951 | | * Since: 2.14 |
2952 | | */ |
2953 | | gchar * |
2954 | | g_regex_replace_eval (const GRegex *regex, |
2955 | | const gchar *string, |
2956 | | gssize string_len, |
2957 | | gint start_position, |
2958 | | GRegexMatchFlags match_options, |
2959 | | GRegexEvalCallback eval, |
2960 | | gpointer user_data, |
2961 | | GError **error) |
2962 | 0 | { |
2963 | 0 | GMatchInfo *match_info; |
2964 | 0 | GString *result; |
2965 | 0 | gint str_pos = 0; |
2966 | 0 | gboolean done = FALSE; |
2967 | 0 | GError *tmp_error = NULL; |
2968 | |
|
2969 | 0 | g_return_val_if_fail (regex != NULL, NULL); |
2970 | 0 | g_return_val_if_fail (string != NULL, NULL); |
2971 | 0 | g_return_val_if_fail (start_position >= 0, NULL); |
2972 | 0 | g_return_val_if_fail (eval != NULL, NULL); |
2973 | 0 | g_return_val_if_fail ((match_options & ~G_REGEX_MATCH_MASK) == 0, NULL); |
2974 | | |
2975 | 0 | if (string_len < 0) |
2976 | 0 | string_len = strlen (string); |
2977 | |
|
2978 | 0 | result = g_string_sized_new (string_len); |
2979 | | |
2980 | | /* run down the string making matches. */ |
2981 | 0 | g_regex_match_full (regex, string, string_len, start_position, |
2982 | 0 | match_options, &match_info, &tmp_error); |
2983 | 0 | while (!done && g_match_info_matches (match_info)) |
2984 | 0 | { |
2985 | 0 | g_string_append_len (result, |
2986 | 0 | string + str_pos, |
2987 | 0 | match_info->offsets[0] - str_pos); |
2988 | 0 | done = (*eval) (match_info, result, user_data); |
2989 | 0 | str_pos = match_info->offsets[1]; |
2990 | 0 | g_match_info_next (match_info, &tmp_error); |
2991 | 0 | } |
2992 | 0 | g_match_info_free (match_info); |
2993 | 0 | if (tmp_error != NULL) |
2994 | 0 | { |
2995 | 0 | g_propagate_error (error, tmp_error); |
2996 | 0 | g_string_free (result, TRUE); |
2997 | 0 | return NULL; |
2998 | 0 | } |
2999 | | |
3000 | 0 | g_string_append_len (result, string + str_pos, string_len - str_pos); |
3001 | 0 | return g_string_free (result, FALSE); |
3002 | 0 | } |
3003 | | |
3004 | | /** |
3005 | | * g_regex_check_replacement: |
3006 | | * @replacement: the replacement string |
3007 | | * @has_references: (out) (optional): location to store information about |
3008 | | * references in @replacement or %NULL |
3009 | | * @error: location to store error |
3010 | | * |
3011 | | * Checks whether @replacement is a valid replacement string |
3012 | | * (see g_regex_replace()), i.e. that all escape sequences in |
3013 | | * it are valid. |
3014 | | * |
3015 | | * If @has_references is not %NULL then @replacement is checked |
3016 | | * for pattern references. For instance, replacement text 'foo\n' |
3017 | | * does not contain references and may be evaluated without information |
3018 | | * about actual match, but '\0\1' (whole match followed by first |
3019 | | * subpattern) requires valid #GMatchInfo object. |
3020 | | * |
3021 | | * Returns: whether @replacement is a valid replacement string |
3022 | | * |
3023 | | * Since: 2.14 |
3024 | | */ |
3025 | | gboolean |
3026 | | g_regex_check_replacement (const gchar *replacement, |
3027 | | gboolean *has_references, |
3028 | | GError **error) |
3029 | 0 | { |
3030 | 0 | GList *list; |
3031 | 0 | GError *tmp = NULL; |
3032 | |
|
3033 | 0 | list = split_replacement (replacement, &tmp); |
3034 | |
|
3035 | 0 | if (tmp) |
3036 | 0 | { |
3037 | 0 | g_propagate_error (error, tmp); |
3038 | 0 | return FALSE; |
3039 | 0 | } |
3040 | | |
3041 | 0 | if (has_references) |
3042 | 0 | *has_references = interpolation_list_needs_match (list); |
3043 | |
|
3044 | 0 | g_list_free_full (list, (GDestroyNotify) free_interpolation_data); |
3045 | |
|
3046 | 0 | return TRUE; |
3047 | 0 | } |
3048 | | |
3049 | | /** |
3050 | | * g_regex_escape_nul: |
3051 | | * @string: the string to escape |
3052 | | * @length: the length of @string |
3053 | | * |
3054 | | * Escapes the nul characters in @string to "\x00". It can be used |
3055 | | * to compile a regex with embedded nul characters. |
3056 | | * |
3057 | | * For completeness, @length can be -1 for a nul-terminated string. |
3058 | | * In this case the output string will be of course equal to @string. |
3059 | | * |
3060 | | * Returns: a newly-allocated escaped string |
3061 | | * |
3062 | | * Since: 2.30 |
3063 | | */ |
3064 | | gchar * |
3065 | | g_regex_escape_nul (const gchar *string, |
3066 | | gint length) |
3067 | 0 | { |
3068 | 0 | GString *escaped; |
3069 | 0 | const gchar *p, *piece_start, *end; |
3070 | 0 | gint backslashes; |
3071 | |
|
3072 | 0 | g_return_val_if_fail (string != NULL, NULL); |
3073 | | |
3074 | 0 | if (length < 0) |
3075 | 0 | return g_strdup (string); |
3076 | | |
3077 | 0 | end = string + length; |
3078 | 0 | p = piece_start = string; |
3079 | 0 | escaped = g_string_sized_new (length + 1); |
3080 | |
|
3081 | 0 | backslashes = 0; |
3082 | 0 | while (p < end) |
3083 | 0 | { |
3084 | 0 | switch (*p) |
3085 | 0 | { |
3086 | 0 | case '\0': |
3087 | 0 | if (p != piece_start) |
3088 | 0 | { |
3089 | | /* copy the previous piece. */ |
3090 | 0 | g_string_append_len (escaped, piece_start, p - piece_start); |
3091 | 0 | } |
3092 | 0 | if ((backslashes & 1) == 0) |
3093 | 0 | g_string_append_c (escaped, '\\'); |
3094 | 0 | g_string_append_c (escaped, 'x'); |
3095 | 0 | g_string_append_c (escaped, '0'); |
3096 | 0 | g_string_append_c (escaped, '0'); |
3097 | 0 | piece_start = ++p; |
3098 | 0 | backslashes = 0; |
3099 | 0 | break; |
3100 | 0 | case '\\': |
3101 | 0 | backslashes++; |
3102 | 0 | ++p; |
3103 | 0 | break; |
3104 | 0 | default: |
3105 | 0 | backslashes = 0; |
3106 | 0 | p = g_utf8_next_char (p); |
3107 | 0 | break; |
3108 | 0 | } |
3109 | 0 | } |
3110 | | |
3111 | 0 | if (piece_start < end) |
3112 | 0 | g_string_append_len (escaped, piece_start, end - piece_start); |
3113 | |
|
3114 | 0 | return g_string_free (escaped, FALSE); |
3115 | 0 | } |
3116 | | |
3117 | | /** |
3118 | | * g_regex_escape_string: |
3119 | | * @string: (array length=length): the string to escape |
3120 | | * @length: the length of @string, in bytes, or -1 if @string is nul-terminated |
3121 | | * |
3122 | | * Escapes the special characters used for regular expressions |
3123 | | * in @string, for instance "a.b*c" becomes "a\.b\*c". This |
3124 | | * function is useful to dynamically generate regular expressions. |
3125 | | * |
3126 | | * @string can contain nul characters that are replaced with "\0", |
3127 | | * in this case remember to specify the correct length of @string |
3128 | | * in @length. |
3129 | | * |
3130 | | * Returns: a newly-allocated escaped string |
3131 | | * |
3132 | | * Since: 2.14 |
3133 | | */ |
3134 | | gchar * |
3135 | | g_regex_escape_string (const gchar *string, |
3136 | | gint length) |
3137 | 0 | { |
3138 | 0 | GString *escaped; |
3139 | 0 | const char *p, *piece_start, *end; |
3140 | |
|
3141 | 0 | g_return_val_if_fail (string != NULL, NULL); |
3142 | | |
3143 | 0 | if (length < 0) |
3144 | 0 | length = strlen (string); |
3145 | |
|
3146 | 0 | end = string + length; |
3147 | 0 | p = piece_start = string; |
3148 | 0 | escaped = g_string_sized_new (length + 1); |
3149 | |
|
3150 | 0 | while (p < end) |
3151 | 0 | { |
3152 | 0 | switch (*p) |
3153 | 0 | { |
3154 | 0 | case '\0': |
3155 | 0 | case '\\': |
3156 | 0 | case '|': |
3157 | 0 | case '(': |
3158 | 0 | case ')': |
3159 | 0 | case '[': |
3160 | 0 | case ']': |
3161 | 0 | case '{': |
3162 | 0 | case '}': |
3163 | 0 | case '^': |
3164 | 0 | case '$': |
3165 | 0 | case '*': |
3166 | 0 | case '+': |
3167 | 0 | case '?': |
3168 | 0 | case '.': |
3169 | 0 | if (p != piece_start) |
3170 | | /* copy the previous piece. */ |
3171 | 0 | g_string_append_len (escaped, piece_start, p - piece_start); |
3172 | 0 | g_string_append_c (escaped, '\\'); |
3173 | 0 | if (*p == '\0') |
3174 | 0 | g_string_append_c (escaped, '0'); |
3175 | 0 | else |
3176 | 0 | g_string_append_c (escaped, *p); |
3177 | 0 | piece_start = ++p; |
3178 | 0 | break; |
3179 | 0 | default: |
3180 | 0 | p = g_utf8_next_char (p); |
3181 | 0 | break; |
3182 | 0 | } |
3183 | 0 | } |
3184 | | |
3185 | 0 | if (piece_start < end) |
3186 | 0 | g_string_append_len (escaped, piece_start, end - piece_start); |
3187 | |
|
3188 | 0 | return g_string_free (escaped, FALSE); |
3189 | 0 | } |