Coverage Report

Created: 2025-07-11 07:03

/src/rauc/subprojects/glib-2.76.5/glib/gpattern.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright (C) 1995-1997, 1999  Peter Mattis, Red Hat, Inc.
3
 *
4
 * SPDX-License-Identifier: LGPL-2.1-or-later
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include "config.h"
21
22
#include <string.h>
23
24
#include "gpattern.h"
25
26
#include "gmacros.h"
27
#include "gmem.h"
28
#include "gmessages.h"
29
#include "gstrfuncs.h"
30
#include "gunicode.h"
31
#include "gutils.h"
32
33
/**
34
 * SECTION:patterns
35
 * @title: Glob-style pattern matching
36
 * @short_description: matches strings against patterns containing '*'
37
 *                     (wildcard) and '?' (joker)
38
 *
39
 * The g_pattern_match* functions match a string
40
 * against a pattern containing '*' and '?' wildcards with similar
41
 * semantics as the standard glob() function: '*' matches an arbitrary,
42
 * possibly empty, string, '?' matches an arbitrary character.
43
 *
44
 * Note that in contrast to glob(), the '/' character can be matched by
45
 * the wildcards, there are no '[...]' character ranges and '*' and '?'
46
 * can not be escaped to include them literally in a pattern.
47
 *
48
 * When multiple strings must be matched against the same pattern, it
49
 * is better to compile the pattern to a #GPatternSpec using
50
 * g_pattern_spec_new() and use g_pattern_match_string() instead of
51
 * g_pattern_match_simple(). This avoids the overhead of repeated
52
 * pattern compilation.
53
 **/
54
55
/**
56
 * GPatternSpec:
57
 *
58
 * A GPatternSpec struct is the 'compiled' form of a pattern. This
59
 * structure is opaque and its fields cannot be accessed directly.
60
 */
61
62
/* keep enum and structure of gpattern.c and patterntest.c in sync */
63
typedef enum
64
{
65
  G_MATCH_ALL,       /* "*A?A*" */
66
  G_MATCH_ALL_TAIL,  /* "*A?AA" */
67
  G_MATCH_HEAD,      /* "AAAA*" */
68
  G_MATCH_TAIL,      /* "*AAAA" */
69
  G_MATCH_EXACT,     /* "AAAAA" */
70
  G_MATCH_LAST
71
} GMatchType;
72
73
struct _GPatternSpec
74
{
75
  GMatchType match_type;
76
  guint      pattern_length;
77
  guint      min_length;
78
  guint      max_length;
79
  gchar     *pattern;
80
};
81
82
83
/* --- functions --- */
84
static inline gboolean
85
g_pattern_ph_match (const gchar *match_pattern,
86
        const gchar *match_string,
87
        gboolean    *wildcard_reached_p)
88
0
{
89
0
  const gchar *pattern, *string;
90
0
  gchar ch;
91
92
0
  pattern = match_pattern;
93
0
  string = match_string;
94
95
0
  ch = *pattern;
96
0
  pattern++;
97
0
  while (ch)
98
0
    {
99
0
      switch (ch)
100
0
  {
101
0
  case '?':
102
0
    if (!*string)
103
0
      return FALSE;
104
0
    string = g_utf8_next_char (string);
105
0
    break;
106
107
0
  case '*':
108
0
    *wildcard_reached_p = TRUE;
109
0
    do
110
0
      {
111
0
        ch = *pattern;
112
0
        pattern++;
113
0
        if (ch == '?')
114
0
    {
115
0
      if (!*string)
116
0
        return FALSE;
117
0
      string = g_utf8_next_char (string);
118
0
    }
119
0
      }
120
0
    while (ch == '*' || ch == '?');
121
0
    if (!ch)
122
0
      return TRUE;
123
0
    do
124
0
      {
125
0
              gboolean next_wildcard_reached = FALSE;
126
0
        while (ch != *string)
127
0
    {
128
0
      if (!*string)
129
0
        return FALSE;
130
0
      string = g_utf8_next_char (string);
131
0
    }
132
0
        string++;
133
0
        if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
134
0
    return TRUE;
135
0
              if (next_wildcard_reached)
136
                /* the forthcoming pattern substring up to the next wildcard has
137
                 * been matched, but a mismatch occurred for the rest of the
138
                 * pattern, following the next wildcard.
139
                 * there's no need to advance the current match position any
140
                 * further if the rest pattern will not match.
141
                 */
142
0
    return FALSE;
143
0
      }
144
0
    while (*string);
145
0
    break;
146
147
0
  default:
148
0
    if (ch == *string)
149
0
      string++;
150
0
    else
151
0
      return FALSE;
152
0
    break;
153
0
  }
154
155
0
      ch = *pattern;
156
0
      pattern++;
157
0
    }
158
159
0
  return *string == 0;
160
0
}
161
162
/**
163
 * g_pattern_spec_match:
164
 * @pspec: a #GPatternSpec
165
 * @string_length: the length of @string (in bytes, i.e. strlen(),
166
 *     not g_utf8_strlen())
167
 * @string: the UTF-8 encoded string to match
168
 * @string_reversed: (nullable): the reverse of @string or %NULL
169
 *
170
 * Matches a string against a compiled pattern. Passing the correct
171
 * length of the string given is mandatory. The reversed string can be
172
 * omitted by passing %NULL, this is more efficient if the reversed
173
 * version of the string to be matched is not at hand, as
174
 * g_pattern_match() will only construct it if the compiled pattern
175
 * requires reverse matches.
176
 *
177
 * Note that, if the user code will (possibly) match a string against a
178
 * multitude of patterns containing wildcards, chances are high that
179
 * some patterns will require a reversed string. In this case, it's
180
 * more efficient to provide the reversed string to avoid multiple
181
 * constructions thereof in the various calls to g_pattern_match().
182
 *
183
 * Note also that the reverse of a UTF-8 encoded string can in general
184
 * not be obtained by g_strreverse(). This works only if the string
185
 * does not contain any multibyte characters. GLib offers the
186
 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
187
 *
188
 * Returns: %TRUE if @string matches @pspec
189
 *
190
 * Since: 2.70
191
 **/
192
gboolean
193
g_pattern_spec_match (GPatternSpec *pspec,
194
                      gsize string_length,
195
                      const gchar *string,
196
                      const gchar *string_reversed)
197
0
{
198
0
  g_return_val_if_fail (pspec != NULL, FALSE);
199
0
  g_return_val_if_fail (string != NULL, FALSE);
200
201
0
  if (string_length < pspec->min_length ||
202
0
      string_length > pspec->max_length)
203
0
    return FALSE;
204
205
0
  switch (pspec->match_type)
206
0
    {
207
0
      gboolean dummy;
208
0
    case G_MATCH_ALL:
209
0
      return g_pattern_ph_match (pspec->pattern, string, &dummy);
210
0
    case G_MATCH_ALL_TAIL:
211
0
      if (string_reversed)
212
0
  return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
213
0
      else
214
0
  {
215
0
          gboolean result;
216
0
          gchar *tmp;
217
0
    tmp = g_utf8_strreverse (string, string_length);
218
0
    result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
219
0
    g_free (tmp);
220
0
    return result;
221
0
  }
222
0
    case G_MATCH_HEAD:
223
0
      if (pspec->pattern_length == string_length)
224
0
  return strcmp (pspec->pattern, string) == 0;
225
0
      else if (pspec->pattern_length)
226
0
  return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
227
0
      else
228
0
  return TRUE;
229
0
    case G_MATCH_TAIL:
230
0
      if (pspec->pattern_length)
231
0
        return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
232
0
      else
233
0
  return TRUE;
234
0
    case G_MATCH_EXACT:
235
0
      if (pspec->pattern_length != string_length)
236
0
        return FALSE;
237
0
      else
238
0
        return strcmp (pspec->pattern, string) == 0;
239
0
    default:
240
0
      g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
241
0
      return FALSE;
242
0
    }
243
0
}
244
245
/**
246
 * g_pattern_match: (skip)
247
 * @pspec: a #GPatternSpec
248
 * @string_length: the length of @string (in bytes, i.e. strlen(),
249
 *     not g_utf8_strlen())
250
 * @string: the UTF-8 encoded string to match
251
 * @string_reversed: (nullable): the reverse of @string or %NULL
252
 *
253
 * Matches a string against a compiled pattern. Passing the correct
254
 * length of the string given is mandatory. The reversed string can be
255
 * omitted by passing %NULL, this is more efficient if the reversed
256
 * version of the string to be matched is not at hand, as
257
 * g_pattern_match() will only construct it if the compiled pattern
258
 * requires reverse matches.
259
 *
260
 * Note that, if the user code will (possibly) match a string against a
261
 * multitude of patterns containing wildcards, chances are high that
262
 * some patterns will require a reversed string. In this case, it's
263
 * more efficient to provide the reversed string to avoid multiple
264
 * constructions thereof in the various calls to g_pattern_match().
265
 *
266
 * Note also that the reverse of a UTF-8 encoded string can in general
267
 * not be obtained by g_strreverse(). This works only if the string
268
 * does not contain any multibyte characters. GLib offers the
269
 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
270
 *
271
 * Returns: %TRUE if @string matches @pspec
272
 * Deprecated: 2.70: Use g_pattern_spec_match() instead
273
 **/
274
gboolean
275
g_pattern_match (GPatternSpec *pspec,
276
                 guint string_length,
277
                 const gchar *string,
278
                 const gchar *string_reversed)
279
0
{
280
0
  return g_pattern_spec_match (pspec, string_length, string, string_reversed);
281
0
}
282
283
/**
284
 * g_pattern_spec_new:
285
 * @pattern: a zero-terminated UTF-8 encoded string
286
 *
287
 * Compiles a pattern to a #GPatternSpec.
288
 *
289
 * Returns: a newly-allocated #GPatternSpec
290
 **/
291
GPatternSpec*
292
g_pattern_spec_new (const gchar *pattern)
293
0
{
294
0
  GPatternSpec *pspec;
295
0
  gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
296
0
  gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
297
0
  gboolean follows_wildcard = FALSE;
298
0
  guint pending_jokers = 0;
299
0
  const gchar *s;
300
0
  gchar *d;
301
0
  guint i;
302
  
303
0
  g_return_val_if_fail (pattern != NULL, NULL);
304
305
  /* canonicalize pattern and collect necessary stats */
306
0
  pspec = g_new (GPatternSpec, 1);
307
0
  pspec->pattern_length = strlen (pattern);
308
0
  pspec->min_length = 0;
309
0
  pspec->max_length = 0;
310
0
  pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
311
0
  d = pspec->pattern;
312
0
  for (i = 0, s = pattern; *s != 0; s++)
313
0
    {
314
0
      switch (*s)
315
0
  {
316
0
  case '*':
317
0
    if (follows_wildcard) /* compress multiple wildcards */
318
0
      {
319
0
        pspec->pattern_length--;
320
0
        continue;
321
0
      }
322
0
    follows_wildcard = TRUE;
323
0
    if (hw_pos < 0)
324
0
      hw_pos = i;
325
0
    tw_pos = i;
326
0
    break;
327
0
  case '?':
328
0
    pending_jokers++;
329
0
    pspec->min_length++;
330
0
    pspec->max_length += 4; /* maximum UTF-8 character length */
331
0
    continue;
332
0
  default:
333
0
    for (; pending_jokers; pending_jokers--, i++) {
334
0
      *d++ = '?';
335
0
        if (hj_pos < 0)
336
0
       hj_pos = i;
337
0
      tj_pos = i;
338
0
    }
339
0
    follows_wildcard = FALSE;
340
0
    pspec->min_length++;
341
0
    pspec->max_length++;
342
0
    break;
343
0
  }
344
0
      *d++ = *s;
345
0
      i++;
346
0
    }
347
0
  for (; pending_jokers; pending_jokers--) {
348
0
    *d++ = '?';
349
0
    if (hj_pos < 0)
350
0
      hj_pos = i;
351
0
    tj_pos = i;
352
0
  }
353
0
  *d++ = 0;
354
0
  seen_joker = hj_pos >= 0;
355
0
  seen_wildcard = hw_pos >= 0;
356
0
  more_wildcards = seen_wildcard && hw_pos != tw_pos;
357
0
  if (seen_wildcard)
358
0
    pspec->max_length = G_MAXUINT;
359
360
  /* special case sole head/tail wildcard or exact matches */
361
0
  if (!seen_joker && !more_wildcards)
362
0
    {
363
0
      if (pspec->pattern[0] == '*')
364
0
  {
365
0
    pspec->match_type = G_MATCH_TAIL;
366
0
          memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
367
0
    pspec->pattern[pspec->pattern_length] = 0;
368
0
    return pspec;
369
0
  }
370
0
      if (pspec->pattern_length > 0 &&
371
0
    pspec->pattern[pspec->pattern_length - 1] == '*')
372
0
  {
373
0
    pspec->match_type = G_MATCH_HEAD;
374
0
    pspec->pattern[--pspec->pattern_length] = 0;
375
0
    return pspec;
376
0
  }
377
0
      if (!seen_wildcard)
378
0
  {
379
0
    pspec->match_type = G_MATCH_EXACT;
380
0
    return pspec;
381
0
  }
382
0
    }
383
384
  /* now just need to distinguish between head or tail match start */
385
0
  tw_pos = pspec->pattern_length - 1 - tw_pos;  /* last pos to tail distance */
386
0
  tj_pos = pspec->pattern_length - 1 - tj_pos;  /* last pos to tail distance */
387
0
  if (seen_wildcard)
388
0
    pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
389
0
  else /* seen_joker */
390
0
    pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
391
0
  if (pspec->match_type == G_MATCH_ALL_TAIL) {
392
0
    gchar *tmp = pspec->pattern;
393
0
    pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
394
0
    g_free (tmp);
395
0
  }
396
0
  return pspec;
397
0
}
398
399
/**
400
 * g_pattern_spec_copy:
401
 * @pspec: a #GPatternSpec
402
 *
403
 * Copies @pspec in a new #GPatternSpec.
404
 *
405
 * Returns: (transfer full): a copy of @pspec.
406
 *
407
 * Since: 2.70
408
 **/
409
GPatternSpec *
410
g_pattern_spec_copy (GPatternSpec *pspec)
411
0
{
412
0
  GPatternSpec *pspec_copy;
413
414
0
  g_return_val_if_fail (pspec != NULL, NULL);
415
416
0
  pspec_copy = g_new (GPatternSpec, 1);
417
0
  *pspec_copy = *pspec;
418
0
  pspec_copy->pattern = g_strndup (pspec->pattern, pspec->pattern_length);
419
420
0
  return pspec_copy;
421
0
}
422
423
/**
424
 * g_pattern_spec_free:
425
 * @pspec: a #GPatternSpec
426
 *
427
 * Frees the memory allocated for the #GPatternSpec.
428
 **/
429
void
430
g_pattern_spec_free (GPatternSpec *pspec)
431
0
{
432
0
  g_return_if_fail (pspec != NULL);
433
434
0
  g_free (pspec->pattern);
435
0
  g_free (pspec);
436
0
}
437
438
/**
439
 * g_pattern_spec_equal:
440
 * @pspec1: a #GPatternSpec
441
 * @pspec2: another #GPatternSpec
442
 *
443
 * Compares two compiled pattern specs and returns whether they will
444
 * match the same set of strings.
445
 *
446
 * Returns: Whether the compiled patterns are equal
447
 **/
448
gboolean
449
g_pattern_spec_equal (GPatternSpec *pspec1,
450
          GPatternSpec *pspec2)
451
0
{
452
0
  g_return_val_if_fail (pspec1 != NULL, FALSE);
453
0
  g_return_val_if_fail (pspec2 != NULL, FALSE);
454
455
0
  return (pspec1->pattern_length == pspec2->pattern_length &&
456
0
    pspec1->match_type == pspec2->match_type &&
457
0
    strcmp (pspec1->pattern, pspec2->pattern) == 0);
458
0
}
459
460
/**
461
 * g_pattern_spec_match_string:
462
 * @pspec: a #GPatternSpec
463
 * @string: the UTF-8 encoded string to match
464
 *
465
 * Matches a string against a compiled pattern. If the string is to be
466
 * matched against more than one pattern, consider using
467
 * g_pattern_match() instead while supplying the reversed string.
468
 *
469
 * Returns: %TRUE if @string matches @pspec
470
 *
471
 * Since: 2.70
472
 **/
473
gboolean
474
g_pattern_spec_match_string (GPatternSpec *pspec,
475
                             const gchar *string)
476
0
{
477
0
  g_return_val_if_fail (pspec != NULL, FALSE);
478
0
  g_return_val_if_fail (string != NULL, FALSE);
479
480
0
  return g_pattern_spec_match (pspec, strlen (string), string, NULL);
481
0
}
482
483
/**
484
 * g_pattern_match_string: (skip)
485
 * @pspec: a #GPatternSpec
486
 * @string: the UTF-8 encoded string to match
487
 *
488
 * Matches a string against a compiled pattern. If the string is to be
489
 * matched against more than one pattern, consider using
490
 * g_pattern_match() instead while supplying the reversed string.
491
 *
492
 * Returns: %TRUE if @string matches @pspec
493
 * Deprecated: 2.70: Use g_pattern_spec_match_string() instead
494
 **/
495
gboolean
496
g_pattern_match_string (GPatternSpec *pspec,
497
                        const gchar *string)
498
0
{
499
0
  return g_pattern_spec_match_string (pspec, string);
500
0
}
501
502
/**
503
 * g_pattern_match_simple:
504
 * @pattern: the UTF-8 encoded pattern
505
 * @string: the UTF-8 encoded string to match
506
 *
507
 * Matches a string against a pattern given as a string. If this
508
 * function is to be called in a loop, it's more efficient to compile
509
 * the pattern once with g_pattern_spec_new() and call
510
 * g_pattern_match_string() repeatedly.
511
 *
512
 * Returns: %TRUE if @string matches @pspec
513
 **/
514
gboolean
515
g_pattern_match_simple (const gchar *pattern,
516
      const gchar *string)
517
0
{
518
0
  GPatternSpec *pspec;
519
0
  gboolean ergo;
520
521
0
  g_return_val_if_fail (pattern != NULL, FALSE);
522
0
  g_return_val_if_fail (string != NULL, FALSE);
523
524
0
  pspec = g_pattern_spec_new (pattern);
525
0
  ergo = g_pattern_spec_match (pspec, strlen (string), string, NULL);
526
0
  g_pattern_spec_free (pspec);
527
528
0
  return ergo;
529
0
}