Coverage Report

Created: 2025-07-01 07:09

/src/glib/glib/gpattern.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright (C) 1995-1997, 1999  Peter Mattis, Red Hat, Inc.
3
 *
4
 * This library is free software; you can redistribute it and/or
5
 * modify it under the terms of the GNU Lesser General Public
6
 * License as published by the Free Software Foundation; either
7
 * version 2.1 of the License, or (at your option) any later version.
8
 *
9
 * This library is distributed in the hope that it will be useful,
10
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12
 * Lesser General Public License for more details.
13
 *
14
 * You should have received a copy of the GNU Lesser General Public
15
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
16
 */
17
18
#include "config.h"
19
20
#include <string.h>
21
22
#include "gpattern.h"
23
24
#include "gmacros.h"
25
#include "gmessages.h"
26
#include "gmem.h"
27
#include "gunicode.h"
28
#include "gutils.h" 
29
30
/**
31
 * SECTION:patterns
32
 * @title: Glob-style pattern matching
33
 * @short_description: matches strings against patterns containing '*'
34
 *                     (wildcard) and '?' (joker)
35
 *
36
 * The g_pattern_match* functions match a string
37
 * against a pattern containing '*' and '?' wildcards with similar
38
 * semantics as the standard glob() function: '*' matches an arbitrary,
39
 * possibly empty, string, '?' matches an arbitrary character.
40
 *
41
 * Note that in contrast to glob(), the '/' character can be matched by
42
 * the wildcards, there are no '[...]' character ranges and '*' and '?'
43
 * can not be escaped to include them literally in a pattern.
44
 *
45
 * When multiple strings must be matched against the same pattern, it
46
 * is better to compile the pattern to a #GPatternSpec using
47
 * g_pattern_spec_new() and use g_pattern_match_string() instead of
48
 * g_pattern_match_simple(). This avoids the overhead of repeated
49
 * pattern compilation.
50
 **/
51
52
/**
53
 * GPatternSpec:
54
 *
55
 * A GPatternSpec struct is the 'compiled' form of a pattern. This
56
 * structure is opaque and its fields cannot be accessed directly.
57
 */
58
59
/* keep enum and structure of gpattern.c and patterntest.c in sync */
60
typedef enum
61
{
62
  G_MATCH_ALL,       /* "*A?A*" */
63
  G_MATCH_ALL_TAIL,  /* "*A?AA" */
64
  G_MATCH_HEAD,      /* "AAAA*" */
65
  G_MATCH_TAIL,      /* "*AAAA" */
66
  G_MATCH_EXACT,     /* "AAAAA" */
67
  G_MATCH_LAST
68
} GMatchType;
69
70
struct _GPatternSpec
71
{
72
  GMatchType match_type;
73
  guint      pattern_length;
74
  guint      min_length;
75
  guint      max_length;
76
  gchar     *pattern;
77
};
78
79
80
/* --- functions --- */
81
static inline gboolean
82
g_pattern_ph_match (const gchar *match_pattern,
83
        const gchar *match_string,
84
        gboolean    *wildcard_reached_p)
85
0
{
86
0
  const gchar *pattern, *string;
87
0
  gchar ch;
88
89
0
  pattern = match_pattern;
90
0
  string = match_string;
91
92
0
  ch = *pattern;
93
0
  pattern++;
94
0
  while (ch)
95
0
    {
96
0
      switch (ch)
97
0
  {
98
0
  case '?':
99
0
    if (!*string)
100
0
      return FALSE;
101
0
    string = g_utf8_next_char (string);
102
0
    break;
103
104
0
  case '*':
105
0
    *wildcard_reached_p = TRUE;
106
0
    do
107
0
      {
108
0
        ch = *pattern;
109
0
        pattern++;
110
0
        if (ch == '?')
111
0
    {
112
0
      if (!*string)
113
0
        return FALSE;
114
0
      string = g_utf8_next_char (string);
115
0
    }
116
0
      }
117
0
    while (ch == '*' || ch == '?');
118
0
    if (!ch)
119
0
      return TRUE;
120
0
    do
121
0
      {
122
0
              gboolean next_wildcard_reached = FALSE;
123
0
        while (ch != *string)
124
0
    {
125
0
      if (!*string)
126
0
        return FALSE;
127
0
      string = g_utf8_next_char (string);
128
0
    }
129
0
        string++;
130
0
        if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
131
0
    return TRUE;
132
0
              if (next_wildcard_reached)
133
                /* the forthcoming pattern substring up to the next wildcard has
134
                 * been matched, but a mismatch occurred for the rest of the
135
                 * pattern, following the next wildcard.
136
                 * there's no need to advance the current match position any
137
                 * further if the rest pattern will not match.
138
                 */
139
0
    return FALSE;
140
0
      }
141
0
    while (*string);
142
0
    break;
143
144
0
  default:
145
0
    if (ch == *string)
146
0
      string++;
147
0
    else
148
0
      return FALSE;
149
0
    break;
150
0
  }
151
152
0
      ch = *pattern;
153
0
      pattern++;
154
0
    }
155
156
0
  return *string == 0;
157
0
}
158
159
/**
160
 * g_pattern_match:
161
 * @pspec: a #GPatternSpec
162
 * @string_length: the length of @string (in bytes, i.e. strlen(),
163
 *     not g_utf8_strlen())
164
 * @string: the UTF-8 encoded string to match
165
 * @string_reversed: (nullable): the reverse of @string or %NULL
166
 *
167
 * Matches a string against a compiled pattern. Passing the correct
168
 * length of the string given is mandatory. The reversed string can be
169
 * omitted by passing %NULL, this is more efficient if the reversed
170
 * version of the string to be matched is not at hand, as
171
 * g_pattern_match() will only construct it if the compiled pattern
172
 * requires reverse matches.
173
 *
174
 * Note that, if the user code will (possibly) match a string against a
175
 * multitude of patterns containing wildcards, chances are high that
176
 * some patterns will require a reversed string. In this case, it's
177
 * more efficient to provide the reversed string to avoid multiple
178
 * constructions thereof in the various calls to g_pattern_match().
179
 *
180
 * Note also that the reverse of a UTF-8 encoded string can in general
181
 * not be obtained by g_strreverse(). This works only if the string
182
 * does not contain any multibyte characters. GLib offers the
183
 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
184
 *
185
 * Returns: %TRUE if @string matches @pspec
186
 **/
187
gboolean
188
g_pattern_match (GPatternSpec *pspec,
189
     guint         string_length,
190
     const gchar  *string,
191
     const gchar  *string_reversed)
192
6.90k
{
193
6.90k
  g_return_val_if_fail (pspec != NULL, FALSE);
194
6.90k
  g_return_val_if_fail (string != NULL, FALSE);
195
196
6.90k
  if (string_length < pspec->min_length ||
197
6.90k
      string_length > pspec->max_length)
198
5.48k
    return FALSE;
199
200
1.42k
  switch (pspec->match_type)
201
1.42k
    {
202
0
      gboolean dummy;
203
0
    case G_MATCH_ALL:
204
0
      return g_pattern_ph_match (pspec->pattern, string, &dummy);
205
0
    case G_MATCH_ALL_TAIL:
206
0
      if (string_reversed)
207
0
  return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
208
0
      else
209
0
  {
210
0
          gboolean result;
211
0
          gchar *tmp;
212
0
    tmp = g_utf8_strreverse (string, string_length);
213
0
    result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
214
0
    g_free (tmp);
215
0
    return result;
216
0
  }
217
0
    case G_MATCH_HEAD:
218
0
      if (pspec->pattern_length == string_length)
219
0
  return strcmp (pspec->pattern, string) == 0;
220
0
      else if (pspec->pattern_length)
221
0
  return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
222
0
      else
223
0
  return TRUE;
224
0
    case G_MATCH_TAIL:
225
0
      if (pspec->pattern_length)
226
0
        return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
227
0
      else
228
0
  return TRUE;
229
1.42k
    case G_MATCH_EXACT:
230
1.42k
      if (pspec->pattern_length != string_length)
231
0
        return FALSE;
232
1.42k
      else
233
1.42k
        return strcmp (pspec->pattern, string) == 0;
234
0
    default:
235
0
      g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
236
0
      return FALSE;
237
1.42k
    }
238
1.42k
}
239
240
/**
241
 * g_pattern_spec_new:
242
 * @pattern: a zero-terminated UTF-8 encoded string
243
 *
244
 * Compiles a pattern to a #GPatternSpec.
245
 *
246
 * Returns: a newly-allocated #GPatternSpec
247
 **/
248
GPatternSpec*
249
g_pattern_spec_new (const gchar *pattern)
250
6.90k
{
251
6.90k
  GPatternSpec *pspec;
252
6.90k
  gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
253
6.90k
  gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
254
6.90k
  gboolean follows_wildcard = FALSE;
255
6.90k
  guint pending_jokers = 0;
256
6.90k
  const gchar *s;
257
6.90k
  gchar *d;
258
6.90k
  guint i;
259
  
260
6.90k
  g_return_val_if_fail (pattern != NULL, NULL);
261
262
  /* canonicalize pattern and collect necessary stats */
263
6.90k
  pspec = g_new (GPatternSpec, 1);
264
6.90k
  pspec->pattern_length = strlen (pattern);
265
6.90k
  pspec->min_length = 0;
266
6.90k
  pspec->max_length = 0;
267
6.90k
  pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
268
6.90k
  d = pspec->pattern;
269
27.2k
  for (i = 0, s = pattern; *s != 0; s++)
270
20.3k
    {
271
20.3k
      switch (*s)
272
20.3k
  {
273
0
  case '*':
274
0
    if (follows_wildcard) /* compress multiple wildcards */
275
0
      {
276
0
        pspec->pattern_length--;
277
0
        continue;
278
0
      }
279
0
    follows_wildcard = TRUE;
280
0
    if (hw_pos < 0)
281
0
      hw_pos = i;
282
0
    tw_pos = i;
283
0
    break;
284
0
  case '?':
285
0
    pending_jokers++;
286
0
    pspec->min_length++;
287
0
    pspec->max_length += 4; /* maximum UTF-8 character length */
288
0
    continue;
289
20.3k
  default:
290
20.3k
    for (; pending_jokers; pending_jokers--, i++) {
291
0
      *d++ = '?';
292
0
        if (hj_pos < 0)
293
0
       hj_pos = i;
294
0
      tj_pos = i;
295
0
    }
296
20.3k
    follows_wildcard = FALSE;
297
20.3k
    pspec->min_length++;
298
20.3k
    pspec->max_length++;
299
20.3k
    break;
300
20.3k
  }
301
20.3k
      *d++ = *s;
302
20.3k
      i++;
303
20.3k
    }
304
6.90k
  for (; pending_jokers; pending_jokers--) {
305
0
    *d++ = '?';
306
0
    if (hj_pos < 0)
307
0
      hj_pos = i;
308
0
    tj_pos = i;
309
0
  }
310
6.90k
  *d++ = 0;
311
6.90k
  seen_joker = hj_pos >= 0;
312
6.90k
  seen_wildcard = hw_pos >= 0;
313
6.90k
  more_wildcards = seen_wildcard && hw_pos != tw_pos;
314
6.90k
  if (seen_wildcard)
315
0
    pspec->max_length = G_MAXUINT;
316
317
  /* special case sole head/tail wildcard or exact matches */
318
6.90k
  if (!seen_joker && !more_wildcards)
319
6.90k
    {
320
6.90k
      if (pspec->pattern[0] == '*')
321
0
  {
322
0
    pspec->match_type = G_MATCH_TAIL;
323
0
          memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
324
0
    pspec->pattern[pspec->pattern_length] = 0;
325
0
    return pspec;
326
0
  }
327
6.90k
      if (pspec->pattern_length > 0 &&
328
6.90k
    pspec->pattern[pspec->pattern_length - 1] == '*')
329
0
  {
330
0
    pspec->match_type = G_MATCH_HEAD;
331
0
    pspec->pattern[--pspec->pattern_length] = 0;
332
0
    return pspec;
333
0
  }
334
6.90k
      if (!seen_wildcard)
335
6.90k
  {
336
6.90k
    pspec->match_type = G_MATCH_EXACT;
337
6.90k
    return pspec;
338
6.90k
  }
339
6.90k
    }
340
341
  /* now just need to distinguish between head or tail match start */
342
0
  tw_pos = pspec->pattern_length - 1 - tw_pos;  /* last pos to tail distance */
343
0
  tj_pos = pspec->pattern_length - 1 - tj_pos;  /* last pos to tail distance */
344
0
  if (seen_wildcard)
345
0
    pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
346
0
  else /* seen_joker */
347
0
    pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
348
0
  if (pspec->match_type == G_MATCH_ALL_TAIL) {
349
0
    gchar *tmp = pspec->pattern;
350
0
    pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
351
0
    g_free (tmp);
352
0
  }
353
0
  return pspec;
354
6.90k
}
355
356
/**
357
 * g_pattern_spec_free:
358
 * @pspec: a #GPatternSpec
359
 *
360
 * Frees the memory allocated for the #GPatternSpec.
361
 **/
362
void
363
g_pattern_spec_free (GPatternSpec *pspec)
364
6.90k
{
365
6.90k
  g_return_if_fail (pspec != NULL);
366
367
6.90k
  g_free (pspec->pattern);
368
6.90k
  g_free (pspec);
369
6.90k
}
370
371
/**
372
 * g_pattern_spec_equal:
373
 * @pspec1: a #GPatternSpec
374
 * @pspec2: another #GPatternSpec
375
 *
376
 * Compares two compiled pattern specs and returns whether they will
377
 * match the same set of strings.
378
 *
379
 * Returns: Whether the compiled patterns are equal
380
 **/
381
gboolean
382
g_pattern_spec_equal (GPatternSpec *pspec1,
383
          GPatternSpec *pspec2)
384
0
{
385
0
  g_return_val_if_fail (pspec1 != NULL, FALSE);
386
0
  g_return_val_if_fail (pspec2 != NULL, FALSE);
387
388
0
  return (pspec1->pattern_length == pspec2->pattern_length &&
389
0
    pspec1->match_type == pspec2->match_type &&
390
0
    strcmp (pspec1->pattern, pspec2->pattern) == 0);
391
0
}
392
393
/**
394
 * g_pattern_match_string:
395
 * @pspec: a #GPatternSpec
396
 * @string: the UTF-8 encoded string to match
397
 *
398
 * Matches a string against a compiled pattern. If the string is to be
399
 * matched against more than one pattern, consider using
400
 * g_pattern_match() instead while supplying the reversed string.
401
 *
402
 * Returns: %TRUE if @string matches @pspec
403
 **/
404
gboolean
405
g_pattern_match_string (GPatternSpec *pspec,
406
      const gchar  *string)
407
0
{
408
0
  g_return_val_if_fail (pspec != NULL, FALSE);
409
0
  g_return_val_if_fail (string != NULL, FALSE);
410
411
0
  return g_pattern_match (pspec, strlen (string), string, NULL);
412
0
}
413
414
/**
415
 * g_pattern_match_simple:
416
 * @pattern: the UTF-8 encoded pattern
417
 * @string: the UTF-8 encoded string to match
418
 *
419
 * Matches a string against a pattern given as a string. If this
420
 * function is to be called in a loop, it's more efficient to compile
421
 * the pattern once with g_pattern_spec_new() and call
422
 * g_pattern_match_string() repeatedly.
423
 *
424
 * Returns: %TRUE if @string matches @pspec
425
 **/
426
gboolean
427
g_pattern_match_simple (const gchar *pattern,
428
      const gchar *string)
429
6.90k
{
430
6.90k
  GPatternSpec *pspec;
431
6.90k
  gboolean ergo;
432
433
6.90k
  g_return_val_if_fail (pattern != NULL, FALSE);
434
6.90k
  g_return_val_if_fail (string != NULL, FALSE);
435
436
6.90k
  pspec = g_pattern_spec_new (pattern);
437
6.90k
  ergo = g_pattern_match (pspec, strlen (string), string, NULL);
438
6.90k
  g_pattern_spec_free (pspec);
439
440
6.90k
  return ergo;
441
6.90k
}