/src/gstreamer/subprojects/glib-2.82.5/glib/gpattern.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* GLIB - Library of useful routines for C programming |
2 | | * Copyright (C) 1995-1997, 1999 Peter Mattis, Red Hat, Inc. |
3 | | * |
4 | | * SPDX-License-Identifier: LGPL-2.1-or-later |
5 | | * |
6 | | * This library is free software; you can redistribute it and/or |
7 | | * modify it under the terms of the GNU Lesser General Public |
8 | | * License as published by the Free Software Foundation; either |
9 | | * version 2.1 of the License, or (at your option) any later version. |
10 | | * |
11 | | * This library is distributed in the hope that it will be useful, |
12 | | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
13 | | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
14 | | * Lesser General Public License for more details. |
15 | | * |
16 | | * You should have received a copy of the GNU Lesser General Public |
17 | | * License along with this library; if not, see <http://www.gnu.org/licenses/>. |
18 | | */ |
19 | | |
20 | | #include "config.h" |
21 | | |
22 | | #include <string.h> |
23 | | |
24 | | #include "gpattern.h" |
25 | | |
26 | | #include "gmacros.h" |
27 | | #include "gmem.h" |
28 | | #include "gmessages.h" |
29 | | #include "gstrfuncs.h" |
30 | | #include "gunicode.h" |
31 | | #include "gutils.h" |
32 | | |
33 | | /** |
34 | | * GPatternSpec: |
35 | | * |
36 | | * A `GPatternSpec` struct is the ‘compiled’ form of a glob-style pattern. |
37 | | * |
38 | | * The [func@GLib.pattern_match_simple] and [method@GLib.PatternSpec.match] functions |
39 | | * match a string against a pattern containing `*` and `?` wildcards with similar |
40 | | * semantics as the standard `glob()` function: `*` matches an arbitrary, |
41 | | * possibly empty, string, `?` matches an arbitrary character. |
42 | | * |
43 | | * Note that in contrast to [`glob()`](man:glob(3)), the `/` character can be |
44 | | * matched by the wildcards, there are no `[…]` character ranges and `*` and `?` |
45 | | * can not be escaped to include them literally in a pattern. |
46 | | * |
47 | | * When multiple strings must be matched against the same pattern, it is better |
48 | | * to compile the pattern to a [struct@GLib.PatternSpec] using |
49 | | * [ctor@GLib.PatternSpec.new] and use [method@GLib.PatternSpec.match_string] |
50 | | * instead of [func@GLib.pattern_match_simple]. This avoids the overhead of repeated |
51 | | * pattern compilation. |
52 | | */ |
53 | | |
54 | | /* keep enum and structure of gpattern.c and patterntest.c in sync */ |
55 | | typedef enum |
56 | | { |
57 | | G_MATCH_ALL, /* "*A?A*" */ |
58 | | G_MATCH_ALL_TAIL, /* "*A?AA" */ |
59 | | G_MATCH_HEAD, /* "AAAA*" */ |
60 | | G_MATCH_TAIL, /* "*AAAA" */ |
61 | | G_MATCH_EXACT, /* "AAAAA" */ |
62 | | G_MATCH_LAST |
63 | | } GMatchType; |
64 | | |
65 | | struct _GPatternSpec |
66 | | { |
67 | | GMatchType match_type; |
68 | | size_t pattern_length; |
69 | | size_t min_length; |
70 | | size_t max_length; |
71 | | gchar *pattern; |
72 | | }; |
73 | | |
74 | | |
75 | | /* --- functions --- */ |
76 | | static inline gboolean |
77 | | g_pattern_ph_match (const gchar *match_pattern, |
78 | | const gchar *match_string, |
79 | | gboolean *wildcard_reached_p) |
80 | 0 | { |
81 | 0 | const gchar *pattern, *string; |
82 | 0 | gchar ch; |
83 | |
|
84 | 0 | pattern = match_pattern; |
85 | 0 | string = match_string; |
86 | |
|
87 | 0 | ch = *pattern; |
88 | 0 | pattern++; |
89 | 0 | while (ch) |
90 | 0 | { |
91 | 0 | switch (ch) |
92 | 0 | { |
93 | 0 | case '?': |
94 | 0 | if (!*string) |
95 | 0 | return FALSE; |
96 | 0 | string = g_utf8_next_char (string); |
97 | 0 | break; |
98 | | |
99 | 0 | case '*': |
100 | 0 | *wildcard_reached_p = TRUE; |
101 | 0 | do |
102 | 0 | { |
103 | 0 | ch = *pattern; |
104 | 0 | pattern++; |
105 | 0 | if (ch == '?') |
106 | 0 | { |
107 | 0 | if (!*string) |
108 | 0 | return FALSE; |
109 | 0 | string = g_utf8_next_char (string); |
110 | 0 | } |
111 | 0 | } |
112 | 0 | while (ch == '*' || ch == '?'); |
113 | 0 | if (!ch) |
114 | 0 | return TRUE; |
115 | 0 | do |
116 | 0 | { |
117 | 0 | gboolean next_wildcard_reached = FALSE; |
118 | 0 | while (ch != *string) |
119 | 0 | { |
120 | 0 | if (!*string) |
121 | 0 | return FALSE; |
122 | 0 | string = g_utf8_next_char (string); |
123 | 0 | } |
124 | 0 | string++; |
125 | 0 | if (g_pattern_ph_match (pattern, string, &next_wildcard_reached)) |
126 | 0 | return TRUE; |
127 | 0 | if (next_wildcard_reached) |
128 | | /* the forthcoming pattern substring up to the next wildcard has |
129 | | * been matched, but a mismatch occurred for the rest of the |
130 | | * pattern, following the next wildcard. |
131 | | * there's no need to advance the current match position any |
132 | | * further if the rest pattern will not match. |
133 | | */ |
134 | 0 | return FALSE; |
135 | 0 | } |
136 | 0 | while (*string); |
137 | 0 | break; |
138 | | |
139 | 0 | default: |
140 | 0 | if (ch == *string) |
141 | 0 | string++; |
142 | 0 | else |
143 | 0 | return FALSE; |
144 | 0 | break; |
145 | 0 | } |
146 | | |
147 | 0 | ch = *pattern; |
148 | 0 | pattern++; |
149 | 0 | } |
150 | | |
151 | 0 | return *string == 0; |
152 | 0 | } |
153 | | |
154 | | /** |
155 | | * g_pattern_spec_match: |
156 | | * @pspec: a #GPatternSpec |
157 | | * @string_length: the length of @string (in bytes, i.e. `strlen()`, |
158 | | * not [func@GLib.utf8_strlen]) |
159 | | * @string: the UTF-8 encoded string to match |
160 | | * @string_reversed: (nullable): the reverse of @string |
161 | | * |
162 | | * Matches a string against a compiled pattern. |
163 | | * |
164 | | * Passing the correct |
165 | | * length of the string given is mandatory. The reversed string can be |
166 | | * omitted by passing `NULL`, this is more efficient if the reversed |
167 | | * version of the string to be matched is not at hand, as |
168 | | * [method@GLib.PatternSpec.match] will only construct it if the compiled pattern |
169 | | * requires reverse matches. |
170 | | * |
171 | | * Note that, if the user code will (possibly) match a string against a |
172 | | * multitude of patterns containing wildcards, chances are high that |
173 | | * some patterns will require a reversed string. In this case, it’s |
174 | | * more efficient to provide the reversed string to avoid multiple |
175 | | * constructions thereof in the various calls to [method@GLib.PatternSpec.match]. |
176 | | * |
177 | | * Note also that the reverse of a UTF-8 encoded string can in general |
178 | | * not be obtained by [func@GLib.strreverse]. This works only if the string |
179 | | * does not contain any multibyte characters. GLib offers the |
180 | | * [func@GLib.utf8_strreverse] function to reverse UTF-8 encoded strings. |
181 | | * |
182 | | * Returns: %TRUE if @string matches @pspec |
183 | | * |
184 | | * Since: 2.70 |
185 | | **/ |
186 | | gboolean |
187 | | g_pattern_spec_match (GPatternSpec *pspec, |
188 | | gsize string_length, |
189 | | const gchar *string, |
190 | | const gchar *string_reversed) |
191 | 0 | { |
192 | 0 | g_return_val_if_fail (pspec != NULL, FALSE); |
193 | 0 | g_return_val_if_fail (string != NULL, FALSE); |
194 | | |
195 | 0 | if (string_length < pspec->min_length || |
196 | 0 | string_length > pspec->max_length) |
197 | 0 | return FALSE; |
198 | | |
199 | 0 | switch (pspec->match_type) |
200 | 0 | { |
201 | 0 | gboolean dummy; |
202 | 0 | case G_MATCH_ALL: |
203 | 0 | return g_pattern_ph_match (pspec->pattern, string, &dummy); |
204 | 0 | case G_MATCH_ALL_TAIL: |
205 | 0 | if (string_reversed) |
206 | 0 | return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy); |
207 | 0 | else |
208 | 0 | { |
209 | 0 | gboolean result; |
210 | 0 | gchar *tmp; |
211 | 0 | tmp = g_utf8_strreverse (string, string_length); |
212 | 0 | result = g_pattern_ph_match (pspec->pattern, tmp, &dummy); |
213 | 0 | g_free (tmp); |
214 | 0 | return result; |
215 | 0 | } |
216 | 0 | case G_MATCH_HEAD: |
217 | 0 | if (pspec->pattern_length == string_length) |
218 | 0 | return strcmp (pspec->pattern, string) == 0; |
219 | 0 | else if (pspec->pattern_length) |
220 | 0 | return strncmp (pspec->pattern, string, pspec->pattern_length) == 0; |
221 | 0 | else |
222 | 0 | return TRUE; |
223 | 0 | case G_MATCH_TAIL: |
224 | 0 | if (pspec->pattern_length) |
225 | 0 | return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0; |
226 | 0 | else |
227 | 0 | return TRUE; |
228 | 0 | case G_MATCH_EXACT: |
229 | 0 | if (pspec->pattern_length != string_length) |
230 | 0 | return FALSE; |
231 | 0 | else |
232 | 0 | return strcmp (pspec->pattern, string) == 0; |
233 | 0 | default: |
234 | 0 | g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE); |
235 | 0 | return FALSE; |
236 | 0 | } |
237 | 0 | } |
238 | | |
239 | | /** |
240 | | * g_pattern_match: (skip) |
241 | | * @pspec: a #GPatternSpec |
242 | | * @string_length: the length of @string (in bytes, i.e. `strlen()`, |
243 | | * not [func@GLib.utf8_strlen]) |
244 | | * @string: the UTF-8 encoded string to match |
245 | | * @string_reversed: (nullable): the reverse of @string |
246 | | * |
247 | | * Matches a string against a compiled pattern. |
248 | | * |
249 | | * Passing the correct |
250 | | * length of the string given is mandatory. The reversed string can be |
251 | | * omitted by passing `NULL`, this is more efficient if the reversed |
252 | | * version of the string to be matched is not at hand, as |
253 | | * `g_pattern_match()` will only construct it if the compiled pattern |
254 | | * requires reverse matches. |
255 | | * |
256 | | * Note that, if the user code will (possibly) match a string against a |
257 | | * multitude of patterns containing wildcards, chances are high that |
258 | | * some patterns will require a reversed string. In this case, it’s |
259 | | * more efficient to provide the reversed string to avoid multiple |
260 | | * constructions thereof in the various calls to `g_pattern_match()`. |
261 | | * |
262 | | * Note also that the reverse of a UTF-8 encoded string can in general |
263 | | * not be obtained by [func@GLib.strreverse]. This works only if the string |
264 | | * does not contain any multibyte characters. GLib offers the |
265 | | * [func@GLib.utf8_strreverse] function to reverse UTF-8 encoded strings. |
266 | | * |
267 | | * Returns: %TRUE if @string matches @pspec |
268 | | * Deprecated: 2.70: Use [method@GLib.PatternSpec.match] instead |
269 | | **/ |
270 | | gboolean |
271 | | g_pattern_match (GPatternSpec *pspec, |
272 | | guint string_length, |
273 | | const gchar *string, |
274 | | const gchar *string_reversed) |
275 | 0 | { |
276 | 0 | return g_pattern_spec_match (pspec, string_length, string, string_reversed); |
277 | 0 | } |
278 | | |
279 | | /** |
280 | | * g_pattern_spec_new: |
281 | | * @pattern: a zero-terminated UTF-8 encoded string |
282 | | * |
283 | | * Compiles a pattern to a [type@GLib.PatternSpec]. |
284 | | * |
285 | | * Returns: (transfer full): a newly-allocated [type@GLib.PatternSpec] |
286 | | **/ |
287 | | GPatternSpec* |
288 | | g_pattern_spec_new (const gchar *pattern) |
289 | 0 | { |
290 | 0 | GPatternSpec *pspec; |
291 | 0 | gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE; |
292 | 0 | gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1; |
293 | 0 | gboolean follows_wildcard = FALSE; |
294 | 0 | guint pending_jokers = 0; |
295 | 0 | const gchar *s; |
296 | 0 | gchar *d; |
297 | 0 | guint i; |
298 | | |
299 | 0 | g_return_val_if_fail (pattern != NULL, NULL); |
300 | | |
301 | | /* canonicalize pattern and collect necessary stats */ |
302 | 0 | pspec = g_new (GPatternSpec, 1); |
303 | 0 | pspec->pattern_length = strlen (pattern); |
304 | 0 | pspec->min_length = 0; |
305 | 0 | pspec->max_length = 0; |
306 | 0 | pspec->pattern = g_new (gchar, pspec->pattern_length + 1); |
307 | 0 | d = pspec->pattern; |
308 | 0 | for (i = 0, s = pattern; *s != 0; s++) |
309 | 0 | { |
310 | 0 | switch (*s) |
311 | 0 | { |
312 | 0 | case '*': |
313 | 0 | if (follows_wildcard) /* compress multiple wildcards */ |
314 | 0 | { |
315 | 0 | pspec->pattern_length--; |
316 | 0 | continue; |
317 | 0 | } |
318 | 0 | follows_wildcard = TRUE; |
319 | 0 | if (hw_pos < 0) |
320 | 0 | hw_pos = i; |
321 | 0 | tw_pos = i; |
322 | 0 | break; |
323 | 0 | case '?': |
324 | 0 | pending_jokers++; |
325 | 0 | pspec->min_length++; |
326 | 0 | pspec->max_length += 4; /* maximum UTF-8 character length */ |
327 | 0 | continue; |
328 | 0 | default: |
329 | 0 | for (; pending_jokers; pending_jokers--, i++) { |
330 | 0 | *d++ = '?'; |
331 | 0 | if (hj_pos < 0) |
332 | 0 | hj_pos = i; |
333 | 0 | tj_pos = i; |
334 | 0 | } |
335 | 0 | follows_wildcard = FALSE; |
336 | 0 | pspec->min_length++; |
337 | 0 | pspec->max_length++; |
338 | 0 | break; |
339 | 0 | } |
340 | 0 | *d++ = *s; |
341 | 0 | i++; |
342 | 0 | } |
343 | 0 | for (; pending_jokers; pending_jokers--) { |
344 | 0 | *d++ = '?'; |
345 | 0 | if (hj_pos < 0) |
346 | 0 | hj_pos = i; |
347 | 0 | tj_pos = i; |
348 | 0 | } |
349 | 0 | *d++ = 0; |
350 | 0 | seen_joker = hj_pos >= 0; |
351 | 0 | seen_wildcard = hw_pos >= 0; |
352 | 0 | more_wildcards = seen_wildcard && hw_pos != tw_pos; |
353 | 0 | if (seen_wildcard) |
354 | 0 | pspec->max_length = G_MAXUINT; |
355 | | |
356 | | /* special case sole head/tail wildcard or exact matches */ |
357 | 0 | if (!seen_joker && !more_wildcards) |
358 | 0 | { |
359 | 0 | if (pspec->pattern[0] == '*') |
360 | 0 | { |
361 | 0 | pspec->match_type = G_MATCH_TAIL; |
362 | 0 | memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length); |
363 | 0 | pspec->pattern[pspec->pattern_length] = 0; |
364 | 0 | return pspec; |
365 | 0 | } |
366 | 0 | if (pspec->pattern_length > 0 && |
367 | 0 | pspec->pattern[pspec->pattern_length - 1] == '*') |
368 | 0 | { |
369 | 0 | pspec->match_type = G_MATCH_HEAD; |
370 | 0 | pspec->pattern[--pspec->pattern_length] = 0; |
371 | 0 | return pspec; |
372 | 0 | } |
373 | 0 | if (!seen_wildcard) |
374 | 0 | { |
375 | 0 | pspec->match_type = G_MATCH_EXACT; |
376 | 0 | return pspec; |
377 | 0 | } |
378 | 0 | } |
379 | | |
380 | | /* now just need to distinguish between head or tail match start */ |
381 | 0 | tw_pos = pspec->pattern_length - 1 - tw_pos; /* last pos to tail distance */ |
382 | 0 | tj_pos = pspec->pattern_length - 1 - tj_pos; /* last pos to tail distance */ |
383 | 0 | if (seen_wildcard) |
384 | 0 | pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL; |
385 | 0 | else /* seen_joker */ |
386 | 0 | pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL; |
387 | 0 | if (pspec->match_type == G_MATCH_ALL_TAIL) { |
388 | 0 | gchar *tmp = pspec->pattern; |
389 | 0 | pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length); |
390 | 0 | g_free (tmp); |
391 | 0 | } |
392 | 0 | return pspec; |
393 | 0 | } |
394 | | |
395 | | /** |
396 | | * g_pattern_spec_copy: |
397 | | * @pspec: a #GPatternSpec |
398 | | * |
399 | | * Copies @pspec in a new [type@GLib.PatternSpec]. |
400 | | * |
401 | | * Returns: (transfer full): a copy of @pspec. |
402 | | * |
403 | | * Since: 2.70 |
404 | | **/ |
405 | | GPatternSpec * |
406 | | g_pattern_spec_copy (GPatternSpec *pspec) |
407 | 0 | { |
408 | 0 | GPatternSpec *pspec_copy; |
409 | |
|
410 | 0 | g_return_val_if_fail (pspec != NULL, NULL); |
411 | | |
412 | 0 | pspec_copy = g_new (GPatternSpec, 1); |
413 | 0 | *pspec_copy = *pspec; |
414 | 0 | pspec_copy->pattern = g_strndup (pspec->pattern, pspec->pattern_length); |
415 | |
|
416 | 0 | return pspec_copy; |
417 | 0 | } |
418 | | |
419 | | /** |
420 | | * g_pattern_spec_free: |
421 | | * @pspec: a #GPatternSpec |
422 | | * |
423 | | * Frees the memory allocated for the [type@GLib.PatternSpec]. |
424 | | **/ |
425 | | void |
426 | | g_pattern_spec_free (GPatternSpec *pspec) |
427 | 0 | { |
428 | 0 | g_return_if_fail (pspec != NULL); |
429 | | |
430 | 0 | g_free (pspec->pattern); |
431 | 0 | g_free (pspec); |
432 | 0 | } |
433 | | |
434 | | /** |
435 | | * g_pattern_spec_equal: |
436 | | * @pspec1: a #GPatternSpec |
437 | | * @pspec2: another #GPatternSpec |
438 | | * |
439 | | * Compares two compiled pattern specs and returns whether they will |
440 | | * match the same set of strings. |
441 | | * |
442 | | * Returns: Whether the compiled patterns are equal |
443 | | **/ |
444 | | gboolean |
445 | | g_pattern_spec_equal (GPatternSpec *pspec1, |
446 | | GPatternSpec *pspec2) |
447 | 0 | { |
448 | 0 | g_return_val_if_fail (pspec1 != NULL, FALSE); |
449 | 0 | g_return_val_if_fail (pspec2 != NULL, FALSE); |
450 | | |
451 | 0 | return (pspec1->pattern_length == pspec2->pattern_length && |
452 | 0 | pspec1->match_type == pspec2->match_type && |
453 | 0 | strcmp (pspec1->pattern, pspec2->pattern) == 0); |
454 | 0 | } |
455 | | |
456 | | /** |
457 | | * g_pattern_spec_match_string: |
458 | | * @pspec: a #GPatternSpec |
459 | | * @string: the UTF-8 encoded string to match |
460 | | * |
461 | | * Matches a string against a compiled pattern. |
462 | | * |
463 | | * If the string is to be |
464 | | * matched against more than one pattern, consider using |
465 | | * [method@GLib.PatternSpec.match] instead while supplying the reversed string. |
466 | | * |
467 | | * Returns: %TRUE if @string matches @pspec |
468 | | * |
469 | | * Since: 2.70 |
470 | | **/ |
471 | | gboolean |
472 | | g_pattern_spec_match_string (GPatternSpec *pspec, |
473 | | const gchar *string) |
474 | 0 | { |
475 | 0 | g_return_val_if_fail (pspec != NULL, FALSE); |
476 | 0 | g_return_val_if_fail (string != NULL, FALSE); |
477 | | |
478 | 0 | return g_pattern_spec_match (pspec, strlen (string), string, NULL); |
479 | 0 | } |
480 | | |
481 | | /** |
482 | | * g_pattern_match_string: (skip) |
483 | | * @pspec: a #GPatternSpec |
484 | | * @string: the UTF-8 encoded string to match |
485 | | * |
486 | | * Matches a string against a compiled pattern. |
487 | | * |
488 | | * If the string is to be |
489 | | * matched against more than one pattern, consider using |
490 | | * [method@GLib.PatternSpec.match] instead while supplying the reversed string. |
491 | | * |
492 | | * Returns: %TRUE if @string matches @pspec |
493 | | * Deprecated: 2.70: Use [method@GLib.PatternSpec.match_string] instead |
494 | | **/ |
495 | | gboolean |
496 | | g_pattern_match_string (GPatternSpec *pspec, |
497 | | const gchar *string) |
498 | 0 | { |
499 | 0 | return g_pattern_spec_match_string (pspec, string); |
500 | 0 | } |
501 | | |
502 | | /** |
503 | | * g_pattern_match_simple: |
504 | | * @pattern: the UTF-8 encoded pattern |
505 | | * @string: the UTF-8 encoded string to match |
506 | | * |
507 | | * Matches a string against a pattern given as a string. |
508 | | * |
509 | | * If this |
510 | | * function is to be called in a loop, it’s more efficient to compile |
511 | | * the pattern once with [ctor@GLib.PatternSpec.new] and call |
512 | | * [method@GLib.PatternSpec.match_string] repeatedly. |
513 | | * |
514 | | * Returns: %TRUE if @string matches @pspec |
515 | | **/ |
516 | | gboolean |
517 | | g_pattern_match_simple (const gchar *pattern, |
518 | | const gchar *string) |
519 | 0 | { |
520 | 0 | GPatternSpec *pspec; |
521 | 0 | gboolean ergo; |
522 | |
|
523 | 0 | g_return_val_if_fail (pattern != NULL, FALSE); |
524 | 0 | g_return_val_if_fail (string != NULL, FALSE); |
525 | | |
526 | 0 | pspec = g_pattern_spec_new (pattern); |
527 | 0 | ergo = g_pattern_spec_match (pspec, strlen (string), string, NULL); |
528 | 0 | g_pattern_spec_free (pspec); |
529 | |
|
530 | 0 | return ergo; |
531 | 0 | } |