Coverage Report

Created: 2025-08-28 06:10

/src/gstreamer/subprojects/glib-2.82.5/glib/gpattern.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright (C) 1995-1997, 1999  Peter Mattis, Red Hat, Inc.
3
 *
4
 * SPDX-License-Identifier: LGPL-2.1-or-later
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include "config.h"
21
22
#include <string.h>
23
24
#include "gpattern.h"
25
26
#include "gmacros.h"
27
#include "gmem.h"
28
#include "gmessages.h"
29
#include "gstrfuncs.h"
30
#include "gunicode.h"
31
#include "gutils.h"
32
33
/**
34
 * GPatternSpec:
35
 *
36
 * A `GPatternSpec` struct is the ‘compiled’ form of a glob-style pattern.
37
 *
38
 * The [func@GLib.pattern_match_simple] and [method@GLib.PatternSpec.match] functions
39
 * match a string against a pattern containing `*` and `?` wildcards with similar
40
 * semantics as the standard `glob()` function: `*` matches an arbitrary,
41
 * possibly empty, string, `?` matches an arbitrary character.
42
 *
43
 * Note that in contrast to [`glob()`](man:glob(3)), the `/` character can be
44
 * matched by the wildcards, there are no `[…]` character ranges and `*` and `?`
45
 * can not be escaped to include them literally in a pattern.
46
 *
47
 * When multiple strings must be matched against the same pattern, it is better
48
 * to compile the pattern to a [struct@GLib.PatternSpec] using
49
 * [ctor@GLib.PatternSpec.new] and use [method@GLib.PatternSpec.match_string]
50
 * instead of [func@GLib.pattern_match_simple]. This avoids the overhead of repeated
51
 * pattern compilation.
52
 */
53
54
/* keep enum and structure of gpattern.c and patterntest.c in sync */
55
typedef enum
56
{
57
  G_MATCH_ALL,       /* "*A?A*" */
58
  G_MATCH_ALL_TAIL,  /* "*A?AA" */
59
  G_MATCH_HEAD,      /* "AAAA*" */
60
  G_MATCH_TAIL,      /* "*AAAA" */
61
  G_MATCH_EXACT,     /* "AAAAA" */
62
  G_MATCH_LAST
63
} GMatchType;
64
65
struct _GPatternSpec
66
{
67
  GMatchType match_type;
68
  size_t     pattern_length;
69
  size_t     min_length;
70
  size_t     max_length;
71
  gchar     *pattern;
72
};
73
74
75
/* --- functions --- */
76
static inline gboolean
77
g_pattern_ph_match (const gchar *match_pattern,
78
        const gchar *match_string,
79
        gboolean    *wildcard_reached_p)
80
0
{
81
0
  const gchar *pattern, *string;
82
0
  gchar ch;
83
84
0
  pattern = match_pattern;
85
0
  string = match_string;
86
87
0
  ch = *pattern;
88
0
  pattern++;
89
0
  while (ch)
90
0
    {
91
0
      switch (ch)
92
0
  {
93
0
  case '?':
94
0
    if (!*string)
95
0
      return FALSE;
96
0
    string = g_utf8_next_char (string);
97
0
    break;
98
99
0
  case '*':
100
0
    *wildcard_reached_p = TRUE;
101
0
    do
102
0
      {
103
0
        ch = *pattern;
104
0
        pattern++;
105
0
        if (ch == '?')
106
0
    {
107
0
      if (!*string)
108
0
        return FALSE;
109
0
      string = g_utf8_next_char (string);
110
0
    }
111
0
      }
112
0
    while (ch == '*' || ch == '?');
113
0
    if (!ch)
114
0
      return TRUE;
115
0
    do
116
0
      {
117
0
              gboolean next_wildcard_reached = FALSE;
118
0
        while (ch != *string)
119
0
    {
120
0
      if (!*string)
121
0
        return FALSE;
122
0
      string = g_utf8_next_char (string);
123
0
    }
124
0
        string++;
125
0
        if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
126
0
    return TRUE;
127
0
              if (next_wildcard_reached)
128
                /* the forthcoming pattern substring up to the next wildcard has
129
                 * been matched, but a mismatch occurred for the rest of the
130
                 * pattern, following the next wildcard.
131
                 * there's no need to advance the current match position any
132
                 * further if the rest pattern will not match.
133
                 */
134
0
    return FALSE;
135
0
      }
136
0
    while (*string);
137
0
    break;
138
139
0
  default:
140
0
    if (ch == *string)
141
0
      string++;
142
0
    else
143
0
      return FALSE;
144
0
    break;
145
0
  }
146
147
0
      ch = *pattern;
148
0
      pattern++;
149
0
    }
150
151
0
  return *string == 0;
152
0
}
153
154
/**
155
 * g_pattern_spec_match:
156
 * @pspec: a #GPatternSpec
157
 * @string_length: the length of @string (in bytes, i.e. `strlen()`,
158
 *    not [func@GLib.utf8_strlen])
159
 * @string: the UTF-8 encoded string to match
160
 * @string_reversed: (nullable): the reverse of @string
161
 *
162
 * Matches a string against a compiled pattern.
163
 *
164
 * Passing the correct
165
 * length of the string given is mandatory. The reversed string can be
166
 * omitted by passing `NULL`, this is more efficient if the reversed
167
 * version of the string to be matched is not at hand, as
168
 * [method@GLib.PatternSpec.match] will only construct it if the compiled pattern
169
 * requires reverse matches.
170
 *
171
 * Note that, if the user code will (possibly) match a string against a
172
 * multitude of patterns containing wildcards, chances are high that
173
 * some patterns will require a reversed string. In this case, it’s
174
 * more efficient to provide the reversed string to avoid multiple
175
 * constructions thereof in the various calls to [method@GLib.PatternSpec.match].
176
 *
177
 * Note also that the reverse of a UTF-8 encoded string can in general
178
 * not be obtained by [func@GLib.strreverse]. This works only if the string
179
 * does not contain any multibyte characters. GLib offers the
180
 * [func@GLib.utf8_strreverse] function to reverse UTF-8 encoded strings.
181
 *
182
 * Returns: %TRUE if @string matches @pspec
183
 *
184
 * Since: 2.70
185
 **/
186
gboolean
187
g_pattern_spec_match (GPatternSpec *pspec,
188
                      gsize string_length,
189
                      const gchar *string,
190
                      const gchar *string_reversed)
191
0
{
192
0
  g_return_val_if_fail (pspec != NULL, FALSE);
193
0
  g_return_val_if_fail (string != NULL, FALSE);
194
195
0
  if (string_length < pspec->min_length ||
196
0
      string_length > pspec->max_length)
197
0
    return FALSE;
198
199
0
  switch (pspec->match_type)
200
0
    {
201
0
      gboolean dummy;
202
0
    case G_MATCH_ALL:
203
0
      return g_pattern_ph_match (pspec->pattern, string, &dummy);
204
0
    case G_MATCH_ALL_TAIL:
205
0
      if (string_reversed)
206
0
  return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
207
0
      else
208
0
  {
209
0
          gboolean result;
210
0
          gchar *tmp;
211
0
    tmp = g_utf8_strreverse (string, string_length);
212
0
    result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
213
0
    g_free (tmp);
214
0
    return result;
215
0
  }
216
0
    case G_MATCH_HEAD:
217
0
      if (pspec->pattern_length == string_length)
218
0
  return strcmp (pspec->pattern, string) == 0;
219
0
      else if (pspec->pattern_length)
220
0
  return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
221
0
      else
222
0
  return TRUE;
223
0
    case G_MATCH_TAIL:
224
0
      if (pspec->pattern_length)
225
0
        return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
226
0
      else
227
0
  return TRUE;
228
0
    case G_MATCH_EXACT:
229
0
      if (pspec->pattern_length != string_length)
230
0
        return FALSE;
231
0
      else
232
0
        return strcmp (pspec->pattern, string) == 0;
233
0
    default:
234
0
      g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
235
0
      return FALSE;
236
0
    }
237
0
}
238
239
/**
240
 * g_pattern_match: (skip)
241
 * @pspec: a #GPatternSpec
242
 * @string_length: the length of @string (in bytes, i.e. `strlen()`,
243
 *    not [func@GLib.utf8_strlen])
244
 * @string: the UTF-8 encoded string to match
245
 * @string_reversed: (nullable): the reverse of @string
246
 *
247
 * Matches a string against a compiled pattern.
248
 *
249
 * Passing the correct
250
 * length of the string given is mandatory. The reversed string can be
251
 * omitted by passing `NULL`, this is more efficient if the reversed
252
 * version of the string to be matched is not at hand, as
253
 * `g_pattern_match()` will only construct it if the compiled pattern
254
 * requires reverse matches.
255
 *
256
 * Note that, if the user code will (possibly) match a string against a
257
 * multitude of patterns containing wildcards, chances are high that
258
 * some patterns will require a reversed string. In this case, it’s
259
 * more efficient to provide the reversed string to avoid multiple
260
 * constructions thereof in the various calls to `g_pattern_match()`.
261
 *
262
 * Note also that the reverse of a UTF-8 encoded string can in general
263
 * not be obtained by [func@GLib.strreverse]. This works only if the string
264
 * does not contain any multibyte characters. GLib offers the
265
 * [func@GLib.utf8_strreverse] function to reverse UTF-8 encoded strings.
266
 *
267
 * Returns: %TRUE if @string matches @pspec
268
 * Deprecated: 2.70: Use [method@GLib.PatternSpec.match] instead
269
 **/
270
gboolean
271
g_pattern_match (GPatternSpec *pspec,
272
                 guint string_length,
273
                 const gchar *string,
274
                 const gchar *string_reversed)
275
0
{
276
0
  return g_pattern_spec_match (pspec, string_length, string, string_reversed);
277
0
}
278
279
/**
280
 * g_pattern_spec_new:
281
 * @pattern: a zero-terminated UTF-8 encoded string
282
 *
283
 * Compiles a pattern to a [type@GLib.PatternSpec].
284
 *
285
 * Returns: (transfer full): a newly-allocated [type@GLib.PatternSpec]
286
 **/
287
GPatternSpec*
288
g_pattern_spec_new (const gchar *pattern)
289
0
{
290
0
  GPatternSpec *pspec;
291
0
  gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
292
0
  gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
293
0
  gboolean follows_wildcard = FALSE;
294
0
  guint pending_jokers = 0;
295
0
  const gchar *s;
296
0
  gchar *d;
297
0
  guint i;
298
  
299
0
  g_return_val_if_fail (pattern != NULL, NULL);
300
301
  /* canonicalize pattern and collect necessary stats */
302
0
  pspec = g_new (GPatternSpec, 1);
303
0
  pspec->pattern_length = strlen (pattern);
304
0
  pspec->min_length = 0;
305
0
  pspec->max_length = 0;
306
0
  pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
307
0
  d = pspec->pattern;
308
0
  for (i = 0, s = pattern; *s != 0; s++)
309
0
    {
310
0
      switch (*s)
311
0
  {
312
0
  case '*':
313
0
    if (follows_wildcard) /* compress multiple wildcards */
314
0
      {
315
0
        pspec->pattern_length--;
316
0
        continue;
317
0
      }
318
0
    follows_wildcard = TRUE;
319
0
    if (hw_pos < 0)
320
0
      hw_pos = i;
321
0
    tw_pos = i;
322
0
    break;
323
0
  case '?':
324
0
    pending_jokers++;
325
0
    pspec->min_length++;
326
0
    pspec->max_length += 4; /* maximum UTF-8 character length */
327
0
    continue;
328
0
  default:
329
0
    for (; pending_jokers; pending_jokers--, i++) {
330
0
      *d++ = '?';
331
0
        if (hj_pos < 0)
332
0
       hj_pos = i;
333
0
      tj_pos = i;
334
0
    }
335
0
    follows_wildcard = FALSE;
336
0
    pspec->min_length++;
337
0
    pspec->max_length++;
338
0
    break;
339
0
  }
340
0
      *d++ = *s;
341
0
      i++;
342
0
    }
343
0
  for (; pending_jokers; pending_jokers--) {
344
0
    *d++ = '?';
345
0
    if (hj_pos < 0)
346
0
      hj_pos = i;
347
0
    tj_pos = i;
348
0
  }
349
0
  *d++ = 0;
350
0
  seen_joker = hj_pos >= 0;
351
0
  seen_wildcard = hw_pos >= 0;
352
0
  more_wildcards = seen_wildcard && hw_pos != tw_pos;
353
0
  if (seen_wildcard)
354
0
    pspec->max_length = G_MAXUINT;
355
356
  /* special case sole head/tail wildcard or exact matches */
357
0
  if (!seen_joker && !more_wildcards)
358
0
    {
359
0
      if (pspec->pattern[0] == '*')
360
0
  {
361
0
    pspec->match_type = G_MATCH_TAIL;
362
0
          memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
363
0
    pspec->pattern[pspec->pattern_length] = 0;
364
0
    return pspec;
365
0
  }
366
0
      if (pspec->pattern_length > 0 &&
367
0
    pspec->pattern[pspec->pattern_length - 1] == '*')
368
0
  {
369
0
    pspec->match_type = G_MATCH_HEAD;
370
0
    pspec->pattern[--pspec->pattern_length] = 0;
371
0
    return pspec;
372
0
  }
373
0
      if (!seen_wildcard)
374
0
  {
375
0
    pspec->match_type = G_MATCH_EXACT;
376
0
    return pspec;
377
0
  }
378
0
    }
379
380
  /* now just need to distinguish between head or tail match start */
381
0
  tw_pos = pspec->pattern_length - 1 - tw_pos;  /* last pos to tail distance */
382
0
  tj_pos = pspec->pattern_length - 1 - tj_pos;  /* last pos to tail distance */
383
0
  if (seen_wildcard)
384
0
    pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
385
0
  else /* seen_joker */
386
0
    pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
387
0
  if (pspec->match_type == G_MATCH_ALL_TAIL) {
388
0
    gchar *tmp = pspec->pattern;
389
0
    pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
390
0
    g_free (tmp);
391
0
  }
392
0
  return pspec;
393
0
}
394
395
/**
396
 * g_pattern_spec_copy:
397
 * @pspec: a #GPatternSpec
398
 *
399
 * Copies @pspec in a new [type@GLib.PatternSpec].
400
 *
401
 * Returns: (transfer full): a copy of @pspec.
402
 *
403
 * Since: 2.70
404
 **/
405
GPatternSpec *
406
g_pattern_spec_copy (GPatternSpec *pspec)
407
0
{
408
0
  GPatternSpec *pspec_copy;
409
410
0
  g_return_val_if_fail (pspec != NULL, NULL);
411
412
0
  pspec_copy = g_new (GPatternSpec, 1);
413
0
  *pspec_copy = *pspec;
414
0
  pspec_copy->pattern = g_strndup (pspec->pattern, pspec->pattern_length);
415
416
0
  return pspec_copy;
417
0
}
418
419
/**
420
 * g_pattern_spec_free:
421
 * @pspec: a #GPatternSpec
422
 *
423
 * Frees the memory allocated for the [type@GLib.PatternSpec].
424
 **/
425
void
426
g_pattern_spec_free (GPatternSpec *pspec)
427
0
{
428
0
  g_return_if_fail (pspec != NULL);
429
430
0
  g_free (pspec->pattern);
431
0
  g_free (pspec);
432
0
}
433
434
/**
435
 * g_pattern_spec_equal:
436
 * @pspec1: a #GPatternSpec
437
 * @pspec2: another #GPatternSpec
438
 *
439
 * Compares two compiled pattern specs and returns whether they will
440
 * match the same set of strings.
441
 *
442
 * Returns: Whether the compiled patterns are equal
443
 **/
444
gboolean
445
g_pattern_spec_equal (GPatternSpec *pspec1,
446
          GPatternSpec *pspec2)
447
0
{
448
0
  g_return_val_if_fail (pspec1 != NULL, FALSE);
449
0
  g_return_val_if_fail (pspec2 != NULL, FALSE);
450
451
0
  return (pspec1->pattern_length == pspec2->pattern_length &&
452
0
    pspec1->match_type == pspec2->match_type &&
453
0
    strcmp (pspec1->pattern, pspec2->pattern) == 0);
454
0
}
455
456
/**
457
 * g_pattern_spec_match_string:
458
 * @pspec: a #GPatternSpec
459
 * @string: the UTF-8 encoded string to match
460
 *
461
 * Matches a string against a compiled pattern.
462
 *
463
 * If the string is to be
464
 * matched against more than one pattern, consider using
465
 * [method@GLib.PatternSpec.match] instead while supplying the reversed string.
466
 *
467
 * Returns: %TRUE if @string matches @pspec
468
 *
469
 * Since: 2.70
470
 **/
471
gboolean
472
g_pattern_spec_match_string (GPatternSpec *pspec,
473
                             const gchar *string)
474
0
{
475
0
  g_return_val_if_fail (pspec != NULL, FALSE);
476
0
  g_return_val_if_fail (string != NULL, FALSE);
477
478
0
  return g_pattern_spec_match (pspec, strlen (string), string, NULL);
479
0
}
480
481
/**
482
 * g_pattern_match_string: (skip)
483
 * @pspec: a #GPatternSpec
484
 * @string: the UTF-8 encoded string to match
485
 *
486
 * Matches a string against a compiled pattern.
487
 *
488
 * If the string is to be
489
 * matched against more than one pattern, consider using
490
 * [method@GLib.PatternSpec.match] instead while supplying the reversed string.
491
 *
492
 * Returns: %TRUE if @string matches @pspec
493
 * Deprecated: 2.70: Use [method@GLib.PatternSpec.match_string] instead
494
 **/
495
gboolean
496
g_pattern_match_string (GPatternSpec *pspec,
497
                        const gchar *string)
498
0
{
499
0
  return g_pattern_spec_match_string (pspec, string);
500
0
}
501
502
/**
503
 * g_pattern_match_simple:
504
 * @pattern: the UTF-8 encoded pattern
505
 * @string: the UTF-8 encoded string to match
506
 *
507
 * Matches a string against a pattern given as a string.
508
 *
509
 * If this
510
 * function is to be called in a loop, it’s more efficient to compile
511
 * the pattern once with [ctor@GLib.PatternSpec.new] and call
512
 * [method@GLib.PatternSpec.match_string] repeatedly.
513
 *
514
 * Returns: %TRUE if @string matches @pspec
515
 **/
516
gboolean
517
g_pattern_match_simple (const gchar *pattern,
518
      const gchar *string)
519
0
{
520
0
  GPatternSpec *pspec;
521
0
  gboolean ergo;
522
523
0
  g_return_val_if_fail (pattern != NULL, FALSE);
524
0
  g_return_val_if_fail (string != NULL, FALSE);
525
526
0
  pspec = g_pattern_spec_new (pattern);
527
0
  ergo = g_pattern_spec_match (pspec, strlen (string), string, NULL);
528
0
  g_pattern_spec_free (pspec);
529
530
0
  return ergo;
531
0
}