Coverage Report

Created: 2025-07-11 06:47

/src/tinysparql/subprojects/glib-2.80.3/glib/gpattern.c
Line
Count
Source (jump to first uncovered line)
1
/* GLIB - Library of useful routines for C programming
2
 * Copyright (C) 1995-1997, 1999  Peter Mattis, Red Hat, Inc.
3
 *
4
 * SPDX-License-Identifier: LGPL-2.1-or-later
5
 *
6
 * This library is free software; you can redistribute it and/or
7
 * modify it under the terms of the GNU Lesser General Public
8
 * License as published by the Free Software Foundation; either
9
 * version 2.1 of the License, or (at your option) any later version.
10
 *
11
 * This library is distributed in the hope that it will be useful,
12
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14
 * Lesser General Public License for more details.
15
 *
16
 * You should have received a copy of the GNU Lesser General Public
17
 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
18
 */
19
20
#include "config.h"
21
22
#include <string.h>
23
24
#include "gpattern.h"
25
26
#include "gmacros.h"
27
#include "gmem.h"
28
#include "gmessages.h"
29
#include "gstrfuncs.h"
30
#include "gunicode.h"
31
#include "gutils.h"
32
33
/**
34
 * GPatternSpec:
35
 *
36
 * A `GPatternSpec` struct is the 'compiled' form of a glob-style pattern.
37
 *
38
 * The [func@GLib.pattern_match_simple] and [method@GLib.PatternSpec.match] functions
39
 * match a string against a pattern containing '*' and '?' wildcards with similar
40
 * semantics as the standard `glob()` function: '*' matches an arbitrary,
41
 * possibly empty, string, '?' matches an arbitrary character.
42
 *
43
 * Note that in contrast to `glob()`, the '/' character can be matched by
44
 * the wildcards, there are no '[...]' character ranges and '*' and '?'
45
 * can not be escaped to include them literally in a pattern.
46
 *
47
 * When multiple strings must be matched against the same pattern, it is better
48
 * to compile the pattern to a [struct@GLib.PatternSpec] using
49
 * [ctor@GLib.PatternSpec.new] and use [method@GLib.PatternSpec.match_string]
50
 * instead of [func@GLib.pattern_match_simple]. This avoids the overhead of repeated
51
 * pattern compilation.
52
 */
53
54
/* keep enum and structure of gpattern.c and patterntest.c in sync */
55
typedef enum
56
{
57
  G_MATCH_ALL,       /* "*A?A*" */
58
  G_MATCH_ALL_TAIL,  /* "*A?AA" */
59
  G_MATCH_HEAD,      /* "AAAA*" */
60
  G_MATCH_TAIL,      /* "*AAAA" */
61
  G_MATCH_EXACT,     /* "AAAAA" */
62
  G_MATCH_LAST
63
} GMatchType;
64
65
struct _GPatternSpec
66
{
67
  GMatchType match_type;
68
  guint      pattern_length;
69
  guint      min_length;
70
  guint      max_length;
71
  gchar     *pattern;
72
};
73
74
75
/* --- functions --- */
76
static inline gboolean
77
g_pattern_ph_match (const gchar *match_pattern,
78
        const gchar *match_string,
79
        gboolean    *wildcard_reached_p)
80
0
{
81
0
  const gchar *pattern, *string;
82
0
  gchar ch;
83
84
0
  pattern = match_pattern;
85
0
  string = match_string;
86
87
0
  ch = *pattern;
88
0
  pattern++;
89
0
  while (ch)
90
0
    {
91
0
      switch (ch)
92
0
  {
93
0
  case '?':
94
0
    if (!*string)
95
0
      return FALSE;
96
0
    string = g_utf8_next_char (string);
97
0
    break;
98
99
0
  case '*':
100
0
    *wildcard_reached_p = TRUE;
101
0
    do
102
0
      {
103
0
        ch = *pattern;
104
0
        pattern++;
105
0
        if (ch == '?')
106
0
    {
107
0
      if (!*string)
108
0
        return FALSE;
109
0
      string = g_utf8_next_char (string);
110
0
    }
111
0
      }
112
0
    while (ch == '*' || ch == '?');
113
0
    if (!ch)
114
0
      return TRUE;
115
0
    do
116
0
      {
117
0
              gboolean next_wildcard_reached = FALSE;
118
0
        while (ch != *string)
119
0
    {
120
0
      if (!*string)
121
0
        return FALSE;
122
0
      string = g_utf8_next_char (string);
123
0
    }
124
0
        string++;
125
0
        if (g_pattern_ph_match (pattern, string, &next_wildcard_reached))
126
0
    return TRUE;
127
0
              if (next_wildcard_reached)
128
                /* the forthcoming pattern substring up to the next wildcard has
129
                 * been matched, but a mismatch occurred for the rest of the
130
                 * pattern, following the next wildcard.
131
                 * there's no need to advance the current match position any
132
                 * further if the rest pattern will not match.
133
                 */
134
0
    return FALSE;
135
0
      }
136
0
    while (*string);
137
0
    break;
138
139
0
  default:
140
0
    if (ch == *string)
141
0
      string++;
142
0
    else
143
0
      return FALSE;
144
0
    break;
145
0
  }
146
147
0
      ch = *pattern;
148
0
      pattern++;
149
0
    }
150
151
0
  return *string == 0;
152
0
}
153
154
/**
155
 * g_pattern_spec_match:
156
 * @pspec: a #GPatternSpec
157
 * @string_length: the length of @string (in bytes, i.e. strlen(),
158
 *     not g_utf8_strlen())
159
 * @string: the UTF-8 encoded string to match
160
 * @string_reversed: (nullable): the reverse of @string or %NULL
161
 *
162
 * Matches a string against a compiled pattern. Passing the correct
163
 * length of the string given is mandatory. The reversed string can be
164
 * omitted by passing %NULL, this is more efficient if the reversed
165
 * version of the string to be matched is not at hand, as
166
 * g_pattern_match() will only construct it if the compiled pattern
167
 * requires reverse matches.
168
 *
169
 * Note that, if the user code will (possibly) match a string against a
170
 * multitude of patterns containing wildcards, chances are high that
171
 * some patterns will require a reversed string. In this case, it's
172
 * more efficient to provide the reversed string to avoid multiple
173
 * constructions thereof in the various calls to g_pattern_match().
174
 *
175
 * Note also that the reverse of a UTF-8 encoded string can in general
176
 * not be obtained by g_strreverse(). This works only if the string
177
 * does not contain any multibyte characters. GLib offers the
178
 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
179
 *
180
 * Returns: %TRUE if @string matches @pspec
181
 *
182
 * Since: 2.70
183
 **/
184
gboolean
185
g_pattern_spec_match (GPatternSpec *pspec,
186
                      gsize string_length,
187
                      const gchar *string,
188
                      const gchar *string_reversed)
189
0
{
190
0
  g_return_val_if_fail (pspec != NULL, FALSE);
191
0
  g_return_val_if_fail (string != NULL, FALSE);
192
193
0
  if (string_length < pspec->min_length ||
194
0
      string_length > pspec->max_length)
195
0
    return FALSE;
196
197
0
  switch (pspec->match_type)
198
0
    {
199
0
      gboolean dummy;
200
0
    case G_MATCH_ALL:
201
0
      return g_pattern_ph_match (pspec->pattern, string, &dummy);
202
0
    case G_MATCH_ALL_TAIL:
203
0
      if (string_reversed)
204
0
  return g_pattern_ph_match (pspec->pattern, string_reversed, &dummy);
205
0
      else
206
0
  {
207
0
          gboolean result;
208
0
          gchar *tmp;
209
0
    tmp = g_utf8_strreverse (string, string_length);
210
0
    result = g_pattern_ph_match (pspec->pattern, tmp, &dummy);
211
0
    g_free (tmp);
212
0
    return result;
213
0
  }
214
0
    case G_MATCH_HEAD:
215
0
      if (pspec->pattern_length == string_length)
216
0
  return strcmp (pspec->pattern, string) == 0;
217
0
      else if (pspec->pattern_length)
218
0
  return strncmp (pspec->pattern, string, pspec->pattern_length) == 0;
219
0
      else
220
0
  return TRUE;
221
0
    case G_MATCH_TAIL:
222
0
      if (pspec->pattern_length)
223
0
        return strcmp (pspec->pattern, string + (string_length - pspec->pattern_length)) == 0;
224
0
      else
225
0
  return TRUE;
226
0
    case G_MATCH_EXACT:
227
0
      if (pspec->pattern_length != string_length)
228
0
        return FALSE;
229
0
      else
230
0
        return strcmp (pspec->pattern, string) == 0;
231
0
    default:
232
0
      g_return_val_if_fail (pspec->match_type < G_MATCH_LAST, FALSE);
233
0
      return FALSE;
234
0
    }
235
0
}
236
237
/**
238
 * g_pattern_match: (skip)
239
 * @pspec: a #GPatternSpec
240
 * @string_length: the length of @string (in bytes, i.e. strlen(),
241
 *     not g_utf8_strlen())
242
 * @string: the UTF-8 encoded string to match
243
 * @string_reversed: (nullable): the reverse of @string or %NULL
244
 *
245
 * Matches a string against a compiled pattern. Passing the correct
246
 * length of the string given is mandatory. The reversed string can be
247
 * omitted by passing %NULL, this is more efficient if the reversed
248
 * version of the string to be matched is not at hand, as
249
 * g_pattern_match() will only construct it if the compiled pattern
250
 * requires reverse matches.
251
 *
252
 * Note that, if the user code will (possibly) match a string against a
253
 * multitude of patterns containing wildcards, chances are high that
254
 * some patterns will require a reversed string. In this case, it's
255
 * more efficient to provide the reversed string to avoid multiple
256
 * constructions thereof in the various calls to g_pattern_match().
257
 *
258
 * Note also that the reverse of a UTF-8 encoded string can in general
259
 * not be obtained by g_strreverse(). This works only if the string
260
 * does not contain any multibyte characters. GLib offers the
261
 * g_utf8_strreverse() function to reverse UTF-8 encoded strings.
262
 *
263
 * Returns: %TRUE if @string matches @pspec
264
 * Deprecated: 2.70: Use g_pattern_spec_match() instead
265
 **/
266
gboolean
267
g_pattern_match (GPatternSpec *pspec,
268
                 guint string_length,
269
                 const gchar *string,
270
                 const gchar *string_reversed)
271
0
{
272
0
  return g_pattern_spec_match (pspec, string_length, string, string_reversed);
273
0
}
274
275
/**
276
 * g_pattern_spec_new:
277
 * @pattern: a zero-terminated UTF-8 encoded string
278
 *
279
 * Compiles a pattern to a #GPatternSpec.
280
 *
281
 * Returns: a newly-allocated #GPatternSpec
282
 **/
283
GPatternSpec*
284
g_pattern_spec_new (const gchar *pattern)
285
0
{
286
0
  GPatternSpec *pspec;
287
0
  gboolean seen_joker = FALSE, seen_wildcard = FALSE, more_wildcards = FALSE;
288
0
  gint hw_pos = -1, tw_pos = -1, hj_pos = -1, tj_pos = -1;
289
0
  gboolean follows_wildcard = FALSE;
290
0
  guint pending_jokers = 0;
291
0
  const gchar *s;
292
0
  gchar *d;
293
0
  guint i;
294
  
295
0
  g_return_val_if_fail (pattern != NULL, NULL);
296
297
  /* canonicalize pattern and collect necessary stats */
298
0
  pspec = g_new (GPatternSpec, 1);
299
0
  pspec->pattern_length = strlen (pattern);
300
0
  pspec->min_length = 0;
301
0
  pspec->max_length = 0;
302
0
  pspec->pattern = g_new (gchar, pspec->pattern_length + 1);
303
0
  d = pspec->pattern;
304
0
  for (i = 0, s = pattern; *s != 0; s++)
305
0
    {
306
0
      switch (*s)
307
0
  {
308
0
  case '*':
309
0
    if (follows_wildcard) /* compress multiple wildcards */
310
0
      {
311
0
        pspec->pattern_length--;
312
0
        continue;
313
0
      }
314
0
    follows_wildcard = TRUE;
315
0
    if (hw_pos < 0)
316
0
      hw_pos = i;
317
0
    tw_pos = i;
318
0
    break;
319
0
  case '?':
320
0
    pending_jokers++;
321
0
    pspec->min_length++;
322
0
    pspec->max_length += 4; /* maximum UTF-8 character length */
323
0
    continue;
324
0
  default:
325
0
    for (; pending_jokers; pending_jokers--, i++) {
326
0
      *d++ = '?';
327
0
        if (hj_pos < 0)
328
0
       hj_pos = i;
329
0
      tj_pos = i;
330
0
    }
331
0
    follows_wildcard = FALSE;
332
0
    pspec->min_length++;
333
0
    pspec->max_length++;
334
0
    break;
335
0
  }
336
0
      *d++ = *s;
337
0
      i++;
338
0
    }
339
0
  for (; pending_jokers; pending_jokers--) {
340
0
    *d++ = '?';
341
0
    if (hj_pos < 0)
342
0
      hj_pos = i;
343
0
    tj_pos = i;
344
0
  }
345
0
  *d++ = 0;
346
0
  seen_joker = hj_pos >= 0;
347
0
  seen_wildcard = hw_pos >= 0;
348
0
  more_wildcards = seen_wildcard && hw_pos != tw_pos;
349
0
  if (seen_wildcard)
350
0
    pspec->max_length = G_MAXUINT;
351
352
  /* special case sole head/tail wildcard or exact matches */
353
0
  if (!seen_joker && !more_wildcards)
354
0
    {
355
0
      if (pspec->pattern[0] == '*')
356
0
  {
357
0
    pspec->match_type = G_MATCH_TAIL;
358
0
          memmove (pspec->pattern, pspec->pattern + 1, --pspec->pattern_length);
359
0
    pspec->pattern[pspec->pattern_length] = 0;
360
0
    return pspec;
361
0
  }
362
0
      if (pspec->pattern_length > 0 &&
363
0
    pspec->pattern[pspec->pattern_length - 1] == '*')
364
0
  {
365
0
    pspec->match_type = G_MATCH_HEAD;
366
0
    pspec->pattern[--pspec->pattern_length] = 0;
367
0
    return pspec;
368
0
  }
369
0
      if (!seen_wildcard)
370
0
  {
371
0
    pspec->match_type = G_MATCH_EXACT;
372
0
    return pspec;
373
0
  }
374
0
    }
375
376
  /* now just need to distinguish between head or tail match start */
377
0
  tw_pos = pspec->pattern_length - 1 - tw_pos;  /* last pos to tail distance */
378
0
  tj_pos = pspec->pattern_length - 1 - tj_pos;  /* last pos to tail distance */
379
0
  if (seen_wildcard)
380
0
    pspec->match_type = tw_pos > hw_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
381
0
  else /* seen_joker */
382
0
    pspec->match_type = tj_pos > hj_pos ? G_MATCH_ALL_TAIL : G_MATCH_ALL;
383
0
  if (pspec->match_type == G_MATCH_ALL_TAIL) {
384
0
    gchar *tmp = pspec->pattern;
385
0
    pspec->pattern = g_utf8_strreverse (pspec->pattern, pspec->pattern_length);
386
0
    g_free (tmp);
387
0
  }
388
0
  return pspec;
389
0
}
390
391
/**
392
 * g_pattern_spec_copy:
393
 * @pspec: a #GPatternSpec
394
 *
395
 * Copies @pspec in a new #GPatternSpec.
396
 *
397
 * Returns: (transfer full): a copy of @pspec.
398
 *
399
 * Since: 2.70
400
 **/
401
GPatternSpec *
402
g_pattern_spec_copy (GPatternSpec *pspec)
403
0
{
404
0
  GPatternSpec *pspec_copy;
405
406
0
  g_return_val_if_fail (pspec != NULL, NULL);
407
408
0
  pspec_copy = g_new (GPatternSpec, 1);
409
0
  *pspec_copy = *pspec;
410
0
  pspec_copy->pattern = g_strndup (pspec->pattern, pspec->pattern_length);
411
412
0
  return pspec_copy;
413
0
}
414
415
/**
416
 * g_pattern_spec_free:
417
 * @pspec: a #GPatternSpec
418
 *
419
 * Frees the memory allocated for the #GPatternSpec.
420
 **/
421
void
422
g_pattern_spec_free (GPatternSpec *pspec)
423
0
{
424
0
  g_return_if_fail (pspec != NULL);
425
426
0
  g_free (pspec->pattern);
427
0
  g_free (pspec);
428
0
}
429
430
/**
431
 * g_pattern_spec_equal:
432
 * @pspec1: a #GPatternSpec
433
 * @pspec2: another #GPatternSpec
434
 *
435
 * Compares two compiled pattern specs and returns whether they will
436
 * match the same set of strings.
437
 *
438
 * Returns: Whether the compiled patterns are equal
439
 **/
440
gboolean
441
g_pattern_spec_equal (GPatternSpec *pspec1,
442
          GPatternSpec *pspec2)
443
0
{
444
0
  g_return_val_if_fail (pspec1 != NULL, FALSE);
445
0
  g_return_val_if_fail (pspec2 != NULL, FALSE);
446
447
0
  return (pspec1->pattern_length == pspec2->pattern_length &&
448
0
    pspec1->match_type == pspec2->match_type &&
449
0
    strcmp (pspec1->pattern, pspec2->pattern) == 0);
450
0
}
451
452
/**
453
 * g_pattern_spec_match_string:
454
 * @pspec: a #GPatternSpec
455
 * @string: the UTF-8 encoded string to match
456
 *
457
 * Matches a string against a compiled pattern. If the string is to be
458
 * matched against more than one pattern, consider using
459
 * g_pattern_match() instead while supplying the reversed string.
460
 *
461
 * Returns: %TRUE if @string matches @pspec
462
 *
463
 * Since: 2.70
464
 **/
465
gboolean
466
g_pattern_spec_match_string (GPatternSpec *pspec,
467
                             const gchar *string)
468
0
{
469
0
  g_return_val_if_fail (pspec != NULL, FALSE);
470
0
  g_return_val_if_fail (string != NULL, FALSE);
471
472
0
  return g_pattern_spec_match (pspec, strlen (string), string, NULL);
473
0
}
474
475
/**
476
 * g_pattern_match_string: (skip)
477
 * @pspec: a #GPatternSpec
478
 * @string: the UTF-8 encoded string to match
479
 *
480
 * Matches a string against a compiled pattern. If the string is to be
481
 * matched against more than one pattern, consider using
482
 * g_pattern_match() instead while supplying the reversed string.
483
 *
484
 * Returns: %TRUE if @string matches @pspec
485
 * Deprecated: 2.70: Use g_pattern_spec_match_string() instead
486
 **/
487
gboolean
488
g_pattern_match_string (GPatternSpec *pspec,
489
                        const gchar *string)
490
0
{
491
0
  return g_pattern_spec_match_string (pspec, string);
492
0
}
493
494
/**
495
 * g_pattern_match_simple:
496
 * @pattern: the UTF-8 encoded pattern
497
 * @string: the UTF-8 encoded string to match
498
 *
499
 * Matches a string against a pattern given as a string. If this
500
 * function is to be called in a loop, it's more efficient to compile
501
 * the pattern once with g_pattern_spec_new() and call
502
 * g_pattern_match_string() repeatedly.
503
 *
504
 * Returns: %TRUE if @string matches @pspec
505
 **/
506
gboolean
507
g_pattern_match_simple (const gchar *pattern,
508
      const gchar *string)
509
0
{
510
0
  GPatternSpec *pspec;
511
0
  gboolean ergo;
512
513
0
  g_return_val_if_fail (pattern != NULL, FALSE);
514
0
  g_return_val_if_fail (string != NULL, FALSE);
515
516
0
  pspec = g_pattern_spec_new (pattern);
517
0
  ergo = g_pattern_spec_match (pspec, strlen (string), string, NULL);
518
0
  g_pattern_spec_free (pspec);
519
520
0
  return ergo;
521
0
}