Coverage Report

Created: 2024-09-08 06:23

/src/git/wildmatch.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
**  Do shell-style pattern matching for ?, \, [], and * characters.
3
**  It is 8bit clean.
4
**
5
**  Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
6
**  Rich $alz is now <rsalz@bbn.com>.
7
**
8
**  Modified by Wayne Davison to special-case '/' matching, to make '**'
9
**  work differently than '*', and to fix the character-class code.
10
*/
11
12
#include "git-compat-util.h"
13
#include "wildmatch.h"
14
15
typedef unsigned char uchar;
16
17
/* Internal return values */
18
0
#define WM_ABORT_ALL -1
19
0
#define WM_ABORT_TO_STARSTAR -2
20
21
/* What character marks an inverted character class? */
22
0
#define NEGATE_CLASS  '!'
23
0
#define NEGATE_CLASS2 '^'
24
25
0
#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \
26
0
            && *(class) == *(litmatch) \
27
0
            && strncmp((char*)class, litmatch, len) == 0)
28
29
#if defined STDC_HEADERS || !defined isascii
30
# define ISASCII(c) 1
31
#else
32
0
# define ISASCII(c) isascii(c)
33
#endif
34
35
#ifdef isblank
36
# define ISBLANK(c) (ISASCII(c) && isblank(c))
37
#else
38
0
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
39
#endif
40
41
#ifdef isgraph
42
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
43
#else
44
0
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
45
#endif
46
47
0
#define ISPRINT(c) (ISASCII(c) && isprint(c))
48
0
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
49
0
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
50
0
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
51
0
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
52
0
#define ISLOWER(c) (ISASCII(c) && islower(c))
53
0
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
54
0
#define ISSPACE(c) (ISASCII(c) && isspace(c))
55
0
#define ISUPPER(c) (ISASCII(c) && isupper(c))
56
0
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
57
58
/* Match pattern "p" against "text" */
59
static int dowild(const uchar *p, const uchar *text, unsigned int flags)
60
0
{
61
0
  uchar p_ch;
62
0
  const uchar *pattern = p;
63
64
0
  for ( ; (p_ch = *p) != '\0'; text++, p++) {
65
0
    int matched, match_slash, negated;
66
0
    uchar t_ch, prev_ch;
67
0
    if ((t_ch = *text) == '\0' && p_ch != '*')
68
0
      return WM_ABORT_ALL;
69
0
    if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
70
0
      t_ch = tolower(t_ch);
71
0
    if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
72
0
      p_ch = tolower(p_ch);
73
0
    switch (p_ch) {
74
0
    case '\\':
75
      /* Literal match with following character.  Note that the test
76
       * in "default" handles the p[1] == '\0' failure case. */
77
0
      p_ch = *++p;
78
      /* FALLTHROUGH */
79
0
    default:
80
0
      if (t_ch != p_ch)
81
0
        return WM_NOMATCH;
82
0
      continue;
83
0
    case '?':
84
      /* Match anything but '/'. */
85
0
      if ((flags & WM_PATHNAME) && t_ch == '/')
86
0
        return WM_NOMATCH;
87
0
      continue;
88
0
    case '*':
89
0
      if (*++p == '*') {
90
0
        const uchar *prev_p = p;
91
0
        while (*++p == '*') {}
92
0
        if (!(flags & WM_PATHNAME))
93
          /* without WM_PATHNAME, '*' == '**' */
94
0
          match_slash = 1;
95
0
        else if ((prev_p - pattern < 2 || *(prev_p - 2) == '/') &&
96
0
            (*p == '\0' || *p == '/' ||
97
0
             (p[0] == '\\' && p[1] == '/'))) {
98
          /*
99
           * Assuming we already match 'foo/' and are at
100
           * <star star slash>, just assume it matches
101
           * nothing and go ahead match the rest of the
102
           * pattern with the remaining string. This
103
           * helps make foo/<*><*>/bar (<> because
104
           * otherwise it breaks C comment syntax) match
105
           * both foo/bar and foo/a/bar.
106
           */
107
0
          if (p[0] == '/' &&
108
0
              dowild(p + 1, text, flags) == WM_MATCH)
109
0
            return WM_MATCH;
110
0
          match_slash = 1;
111
0
        } else /* WM_PATHNAME is set */
112
0
          match_slash = 0;
113
0
      } else
114
        /* without WM_PATHNAME, '*' == '**' */
115
0
        match_slash = flags & WM_PATHNAME ? 0 : 1;
116
0
      if (*p == '\0') {
117
        /* Trailing "**" matches everything.  Trailing "*" matches
118
         * only if there are no more slash characters. */
119
0
        if (!match_slash) {
120
0
          if (strchr((char *)text, '/'))
121
0
            return WM_ABORT_TO_STARSTAR;
122
0
        }
123
0
        return WM_MATCH;
124
0
      } else if (!match_slash && *p == '/') {
125
        /*
126
         * _one_ asterisk followed by a slash
127
         * with WM_PATHNAME matches the next
128
         * directory
129
         */
130
0
        const char *slash = strchr((char*)text, '/');
131
0
        if (!slash)
132
0
          return WM_ABORT_ALL;
133
0
        text = (const uchar*)slash;
134
        /* the slash is consumed by the top-level for loop */
135
0
        break;
136
0
      }
137
0
      while (1) {
138
0
        if (t_ch == '\0')
139
0
          break;
140
        /*
141
         * Try to advance faster when an asterisk is
142
         * followed by a literal. We know in this case
143
         * that the string before the literal
144
         * must belong to "*".
145
         * If match_slash is false, do not look past
146
         * the first slash as it cannot belong to '*'.
147
         */
148
0
        if (!is_glob_special(*p)) {
149
0
          p_ch = *p;
150
0
          if ((flags & WM_CASEFOLD) && ISUPPER(p_ch))
151
0
            p_ch = tolower(p_ch);
152
0
          while ((t_ch = *text) != '\0' &&
153
0
                 (match_slash || t_ch != '/')) {
154
0
            if ((flags & WM_CASEFOLD) && ISUPPER(t_ch))
155
0
              t_ch = tolower(t_ch);
156
0
            if (t_ch == p_ch)
157
0
              break;
158
0
            text++;
159
0
          }
160
0
          if (t_ch != p_ch) {
161
0
            if (match_slash)
162
0
              return WM_ABORT_ALL;
163
0
            else
164
0
              return WM_ABORT_TO_STARSTAR;
165
0
          }
166
0
        }
167
0
        if ((matched = dowild(p, text, flags)) != WM_NOMATCH) {
168
0
          if (!match_slash || matched != WM_ABORT_TO_STARSTAR)
169
0
            return matched;
170
0
        } else if (!match_slash && t_ch == '/')
171
0
          return WM_ABORT_TO_STARSTAR;
172
0
        t_ch = *++text;
173
0
      }
174
0
      return WM_ABORT_ALL;
175
0
    case '[':
176
0
      p_ch = *++p;
177
0
#ifdef NEGATE_CLASS2
178
0
      if (p_ch == NEGATE_CLASS2)
179
0
        p_ch = NEGATE_CLASS;
180
0
#endif
181
      /* Assign literal 1/0 because of "matched" comparison. */
182
0
      negated = p_ch == NEGATE_CLASS ? 1 : 0;
183
0
      if (negated) {
184
        /* Inverted character class. */
185
0
        p_ch = *++p;
186
0
      }
187
0
      prev_ch = 0;
188
0
      matched = 0;
189
0
      do {
190
0
        if (!p_ch)
191
0
          return WM_ABORT_ALL;
192
0
        if (p_ch == '\\') {
193
0
          p_ch = *++p;
194
0
          if (!p_ch)
195
0
            return WM_ABORT_ALL;
196
0
          if (t_ch == p_ch)
197
0
            matched = 1;
198
0
        } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') {
199
0
          p_ch = *++p;
200
0
          if (p_ch == '\\') {
201
0
            p_ch = *++p;
202
0
            if (!p_ch)
203
0
              return WM_ABORT_ALL;
204
0
          }
205
0
          if (t_ch <= p_ch && t_ch >= prev_ch)
206
0
            matched = 1;
207
0
          else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) {
208
0
            uchar t_ch_upper = toupper(t_ch);
209
0
            if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch)
210
0
              matched = 1;
211
0
          }
212
0
          p_ch = 0; /* This makes "prev_ch" get set to 0. */
213
0
        } else if (p_ch == '[' && p[1] == ':') {
214
0
          const uchar *s;
215
0
          int i;
216
0
          for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/
217
0
          if (!p_ch)
218
0
            return WM_ABORT_ALL;
219
0
          i = p - s - 1;
220
0
          if (i < 0 || p[-1] != ':') {
221
            /* Didn't find ":]", so treat like a normal set. */
222
0
            p = s - 2;
223
0
            p_ch = '[';
224
0
            if (t_ch == p_ch)
225
0
              matched = 1;
226
0
            continue;
227
0
          }
228
0
          if (CC_EQ(s,i, "alnum")) {
229
0
            if (ISALNUM(t_ch))
230
0
              matched = 1;
231
0
          } else if (CC_EQ(s,i, "alpha")) {
232
0
            if (ISALPHA(t_ch))
233
0
              matched = 1;
234
0
          } else if (CC_EQ(s,i, "blank")) {
235
0
            if (ISBLANK(t_ch))
236
0
              matched = 1;
237
0
          } else if (CC_EQ(s,i, "cntrl")) {
238
0
            if (ISCNTRL(t_ch))
239
0
              matched = 1;
240
0
          } else if (CC_EQ(s,i, "digit")) {
241
0
            if (ISDIGIT(t_ch))
242
0
              matched = 1;
243
0
          } else if (CC_EQ(s,i, "graph")) {
244
0
            if (ISGRAPH(t_ch))
245
0
              matched = 1;
246
0
          } else if (CC_EQ(s,i, "lower")) {
247
0
            if (ISLOWER(t_ch))
248
0
              matched = 1;
249
0
          } else if (CC_EQ(s,i, "print")) {
250
0
            if (ISPRINT(t_ch))
251
0
              matched = 1;
252
0
          } else if (CC_EQ(s,i, "punct")) {
253
0
            if (ISPUNCT(t_ch))
254
0
              matched = 1;
255
0
          } else if (CC_EQ(s,i, "space")) {
256
0
            if (ISSPACE(t_ch))
257
0
              matched = 1;
258
0
          } else if (CC_EQ(s,i, "upper")) {
259
0
            if (ISUPPER(t_ch))
260
0
              matched = 1;
261
0
            else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch))
262
0
              matched = 1;
263
0
          } else if (CC_EQ(s,i, "xdigit")) {
264
0
            if (ISXDIGIT(t_ch))
265
0
              matched = 1;
266
0
          } else /* malformed [:class:] string */
267
0
            return WM_ABORT_ALL;
268
0
          p_ch = 0; /* This makes "prev_ch" get set to 0. */
269
0
        } else if (t_ch == p_ch)
270
0
          matched = 1;
271
0
      } while (prev_ch = p_ch, (p_ch = *++p) != ']');
272
0
      if (matched == negated ||
273
0
          ((flags & WM_PATHNAME) && t_ch == '/'))
274
0
        return WM_NOMATCH;
275
0
      continue;
276
0
    }
277
0
  }
278
279
0
  return *text ? WM_NOMATCH : WM_MATCH;
280
0
}
281
282
/* Match the "pattern" against the "text" string. */
283
int wildmatch(const char *pattern, const char *text, unsigned int flags)
284
0
{
285
0
  int res = dowild((const uchar*)pattern, (const uchar*)text, flags);
286
0
  return res == WM_MATCH ? WM_MATCH : WM_NOMATCH;
287
0
}