Coverage Report

Created: 2025-07-11 06:05

/src/libgit2/src/util/wildmatch.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * Copyright (C) the libgit2 contributors. All rights reserved.
3
 *
4
 * This file is part of libgit2, distributed under the GNU GPL v2 with
5
 * a Linking Exception. For full terms see the included COPYING file.
6
 *
7
 * Do shell-style pattern matching for ?, \, [], and * characters.
8
 * It is 8bit clean.
9
 *
10
 * Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
11
 * Rich $alz is now <rsalz@bbn.com>.
12
 *
13
 * Modified by Wayne Davison to special-case '/' matching, to make '**'
14
 * work differently than '*', and to fix the character-class code.
15
 *
16
 * Imported from git.git.
17
 */
18
19
#include "wildmatch.h"
20
21
#define GIT_SPACE 0x01
22
#define GIT_DIGIT 0x02
23
#define GIT_ALPHA 0x04
24
#define GIT_GLOB_SPECIAL 0x08
25
#define GIT_REGEX_SPECIAL 0x10
26
#define GIT_PATHSPEC_MAGIC 0x20
27
#define GIT_CNTRL 0x40
28
#define GIT_PUNCT 0x80
29
30
enum {
31
  S = GIT_SPACE,
32
  A = GIT_ALPHA,
33
  D = GIT_DIGIT,
34
  G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */
35
  R = GIT_REGEX_SPECIAL,  /* $, (, ), +, ., ^, {, | */
36
  P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */
37
  X = GIT_CNTRL,
38
  U = GIT_PUNCT,
39
  Z = GIT_CNTRL | GIT_SPACE
40
};
41
42
static const unsigned char sane_ctype[256] = {
43
  X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X,   /*   0.. 15 */
44
  X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,   /*  16.. 31 */
45
  S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P,   /*  32.. 47 */
46
  D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G,   /*  48.. 63 */
47
  P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,   /*  64.. 79 */
48
  A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P,   /*  80.. 95 */
49
  P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A,   /*  96..111 */
50
  A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X,   /* 112..127 */
51
  /* Nothing in the 128.. range */
52
};
53
54
0
#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0)
55
0
#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL)
56
57
typedef unsigned char uchar;
58
59
/* What character marks an inverted character class? */
60
0
#define NEGATE_CLASS  '!'
61
0
#define NEGATE_CLASS2 '^'
62
63
0
#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
64
0
            && *(class) == *(litmatch) \
65
0
            && strncmp((char*)class, litmatch, len) == 0)
66
67
#if defined STDC_HEADERS || !defined isascii
68
# define ISASCII(c) 1
69
#else
70
0
# define ISASCII(c) isascii(c)
71
#endif
72
73
#ifdef isblank
74
0
# define ISBLANK(c) (ISASCII(c) && isblank(c))
75
#else
76
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
77
#endif
78
79
#ifdef isgraph
80
0
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
81
#else
82
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
83
#endif
84
85
0
#define ISPRINT(c) (ISASCII(c) && isprint(c))
86
0
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
87
0
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
88
0
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
89
0
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
90
0
#define ISLOWER(c) (ISASCII(c) && islower(c))
91
0
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
92
0
#define ISSPACE(c) (ISASCII(c) && isspace(c))
93
0
#define ISUPPER(c) (ISASCII(c) && isupper(c))
94
0
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
95
96
/* Match pattern "p" against "text" */
97
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
98
0
{
99
0
  uchar p_ch;
100
0
  const uchar *pattern = p;
101
102
0
  for ( ; (p_ch = *p) != '\0'; text++, p++) {
103
0
    int matched, match_slash, negated;
104
0
    uchar t_ch, prev_ch;
105
0
    if ((t_ch = *text) == '\0' && p_ch != '*')
106
0
      return WM_ABORT_ALL;
107
0
    if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
108
0
      t_ch = tolower(t_ch);
109
0
    if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
110
0
      p_ch = tolower(p_ch);
111
0
    switch (p_ch) {
112
0
    case '\\':
113
      /* Literal match with following character.  Note that the test
114
       * in "default" handles the p[1] == '\0' failure case. */
115
0
      p_ch = *++p;
116
      /* FALLTHROUGH */
117
0
    default:
118
0
      if (t_ch != p_ch)
119
0
        return WM_NOMATCH;
120
0
      continue;
121
0
    case '?':
122
      /* Match anything but '/'. */
123
0
      if ((flags & WM_PATHNAME) && t_ch == '/')
124
0
        return WM_NOMATCH;
125
0
      continue;
126
0
    case '*':
127
0
      if (*++p == '*') {
128
0
        const uchar *prev_p = p - 2;
129
0
        while (*++p == '*') {}
130
0
        if (!(flags & WM_PATHNAME))
131
          /* without WM_PATHNAME, '*' == '**' */
132
0
          match_slash = 1;
133
0
        else if ((prev_p < pattern || *prev_p == '/') &&
134
0
            (*p == '\0' || *p == '/' ||
135
0
             (p[0] == '\\' && p[1] == '/'))) {
136
          /*
137
           * Assuming we already match 'foo/' and are at
138
           * <star star slash>, just assume it matches
139
           * nothing and go ahead match the rest of the
140
           * pattern with the remaining string. This
141
           * helps make foo/<*><*>/bar (<> because
142
           * otherwise it breaks C comment syntax) match
143
           * both foo/bar and foo/a/bar.
144
           */
145
0
          if (p[0] == '/' &&
146
0
              dowild(p + 1, text, flags) == WM_MATCH)
147
0
            return WM_MATCH;
148
0
          match_slash = 1;
149
0
        } else /* WM_PATHNAME is set */
150
0
          match_slash = 0;
151
0
      } else
152
        /* without WM_PATHNAME, '*' == '**' */
153
0
        match_slash = flags & WM_PATHNAME ? 0 : 1;
154
0
      if (*p == '\0') {
155
        /* Trailing "**" matches everything.  Trailing "*" matches
156
         * only if there are no more slash characters. */
157
0
        if (!match_slash) {
158
0
          if (strchr((char*)text, '/') != NULL)
159
0
            return WM_NOMATCH;
160
0
        }
161
0
        return WM_MATCH;
162
0
      } else if (!match_slash && *p == '/') {
163
        /*
164
         * _one_ asterisk followed by a slash
165
         * with WM_PATHNAME matches the next
166
         * directory
167
         */
168
0
        const char *slash = strchr((char*)text, '/');
169
0
        if (!slash)
170
0
          return WM_NOMATCH;
171
0
        text = (const uchar*)slash;
172
        /* the slash is consumed by the top-level for loop */
173
0
        break;
174
0
      }
175
0
      while (1) {
176
0
        if (t_ch == '\0')
177
0
          break;
178
        /*
179
         * Try to advance faster when an asterisk is
180
         * followed by a literal. We know in this case
181
         * that the string before the literal
182
         * must belong to "*".
183
         * If match_slash is false, do not look past
184
         * the first slash as it cannot belong to '*'.
185
         */
186
0
        if (!is_glob_special(*p)) {
187
0
          p_ch = *p;
188
0
          if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
189
0
            p_ch = tolower(p_ch);
190
0
          while ((t_ch = *text) != '\0' &&
191
0
                 (match_slash || t_ch != '/')) {
192
0
            if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
193
0
              t_ch = tolower(t_ch);
194
0
            if (t_ch == p_ch)
195
0
              break;
196
0
            text++;
197
0
          }
198
0
          if (t_ch != p_ch)
199
0
            return WM_NOMATCH;
200
0
        }
201
0
        if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
202
0
          if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
203
0
            return matched;
204
0
        } else if (!match_slash && t_ch == '/')
205
0
          return WM_ABORT_TO_STARSTAR;
206
0
        t_ch = *++text;
207
0
      }
208
0
      return WM_ABORT_ALL;
209
0
    case '[':
210
0
      p_ch = *++p;
211
0
#ifdef NEGATE_CLASS2
212
0
      if (p_ch == NEGATE_CLASS2)
213
0
        p_ch = NEGATE_CLASS;
214
0
#endif
215
      /* Assign literal 1/0 because of "matched" comparison. */
216
0
      negated = p_ch == NEGATE_CLASS ? 1 : 0;
217
0
      if (negated) {
218
        /* Inverted character class. */
219
0
        p_ch = *++p;
220
0
      }
221
0
      prev_ch = 0;
222
0
      matched = 0;
223
0
      do {
224
0
        if (!p_ch)
225
0
          return WM_ABORT_ALL;
226
0
        if (p_ch == '\\') {
227
0
          p_ch = *++p;
228
0
          if (!p_ch)
229
0
            return WM_ABORT_ALL;
230
0
          if (t_ch == p_ch)
231
0
            matched = 1;
232
0
        } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
233
0
          p_ch = *++p;
234
0
          if (p_ch == '\\') {
235
0
            p_ch = *++p;
236
0
            if (!p_ch)
237
0
              return WM_ABORT_ALL;
238
0
          }
239
0
          if (t_ch <= p_ch && t_ch >= prev_ch)
240
0
            matched = 1;
241
0
          else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
242
0
            uchar t_ch_upper = toupper(t_ch);
243
0
            if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
244
0
              matched = 1;
245
0
          }
246
0
          p_ch = 0; /* This makes "prev_ch" get set to 0. */
247
0
        } else if (p_ch == '[' && p[1] == ':') {
248
0
          const uchar *s;
249
0
          int i;
250
0
          for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
251
0
          if (!p_ch)
252
0
            return WM_ABORT_ALL;
253
0
          i = (int)(p - s - 1);
254
0
          if (i < 0 || p[-1] != ':') {
255
            /* Didn't find ":]", so treat like a normal set. */
256
0
            p = s - 2;
257
0
            p_ch = '[';
258
0
            if (t_ch == p_ch)
259
0
              matched = 1;
260
0
            continue;
261
0
          }
262
0
          if (CC_EQ(s,i, "alnum")) {
263
0
            if (ISALNUM(t_ch))
264
0
              matched = 1;
265
0
          } else if (CC_EQ(s,i, "alpha")) {
266
0
            if (ISALPHA(t_ch))
267
0
              matched = 1;
268
0
          } else if (CC_EQ(s,i, "blank")) {
269
0
            if (ISBLANK(t_ch))
270
0
              matched = 1;
271
0
          } else if (CC_EQ(s,i, "cntrl")) {
272
0
            if (ISCNTRL(t_ch))
273
0
              matched = 1;
274
0
          } else if (CC_EQ(s,i, "digit")) {
275
0
            if (ISDIGIT(t_ch))
276
0
              matched = 1;
277
0
          } else if (CC_EQ(s,i, "graph")) {
278
0
            if (ISGRAPH(t_ch))
279
0
              matched = 1;
280
0
          } else if (CC_EQ(s,i, "lower")) {
281
0
            if (ISLOWER(t_ch))
282
0
              matched = 1;
283
0
          } else if (CC_EQ(s,i, "print")) {
284
0
            if (ISPRINT(t_ch))
285
0
              matched = 1;
286
0
          } else if (CC_EQ(s,i, "punct")) {
287
0
            if (ISPUNCT(t_ch))
288
0
              matched = 1;
289
0
          } else if (CC_EQ(s,i, "space")) {
290
0
            if (ISSPACE(t_ch))
291
0
              matched = 1;
292
0
          } else if (CC_EQ(s,i, "upper")) {
293
0
            if (ISUPPER(t_ch))
294
0
              matched = 1;
295
0
            else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
296
0
              matched = 1;
297
0
          } else if (CC_EQ(s,i, "xdigit")) {
298
0
            if (ISXDIGIT(t_ch))
299
0
              matched = 1;
300
0
          } else /* malformed [:class:] string */
301
0
            return WM_ABORT_ALL;
302
0
          p_ch = 0; /* This makes "prev_ch" get set to 0. */
303
0
        } else if (t_ch == p_ch)
304
0
          matched = 1;
305
0
      } while (prev_ch = p_ch, (p_ch = *++p) != ']');
306
0
      if (matched == negated ||
307
0
          ((flags & WM_PATHNAME) && t_ch == '/'))
308
0
        return WM_NOMATCH;
309
0
      continue;
310
0
    }
311
0
  }
312
313
0
  return *text ? WM_NOMATCH : WM_MATCH;
314
0
}
315
316
/* Match the "pattern" against the "text" string. */
317
int wildmatch(const char *pattern, const char *text, unsigned int flags)
318
0
{
319
0
  return dowild((const uchar*)pattern, (const uchar*)text, flags);
320
0
}