/src/httpd/srclib/apr/strings/apr_fnmatch.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* Licensed to the Apache Software Foundation (ASF) under one or more |
2 | | * contributor license agreements. See the NOTICE file distributed with |
3 | | * this work for additional information regarding copyright ownership. |
4 | | * The ASF licenses this file to You under the Apache License, Version 2.0 |
5 | | * (the "License"); you may not use this file except in compliance with |
6 | | * the License. You may obtain a copy of the License at |
7 | | * |
8 | | * http://www.apache.org/licenses/LICENSE-2.0 |
9 | | * |
10 | | * Unless required by applicable law or agreed to in writing, software |
11 | | * distributed under the License is distributed on an "AS IS" BASIS, |
12 | | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
13 | | * See the License for the specific language governing permissions and |
14 | | * limitations under the License. |
15 | | */ |
16 | | |
17 | | |
18 | | /* Derived from The Open Group Base Specifications Issue 7, IEEE Std 1003.1-2008 |
19 | | * as described in; |
20 | | * http://pubs.opengroup.org/onlinepubs/9699919799/functions/fnmatch.html |
21 | | * |
22 | | * Filename pattern matches defined in section 2.13, "Pattern Matching Notation" |
23 | | * from chapter 2. "Shell Command Language" |
24 | | * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 |
25 | | * where; 1. A bracket expression starting with an unquoted <circumflex> '^' |
26 | | * character CONTINUES to specify a non-matching list; 2. an explicit <period> '.' |
27 | | * in a bracket expression matching list, e.g. "[.abc]" does NOT match a leading |
28 | | * <period> in a filename; 3. a <left-square-bracket> '[' which does not introduce |
29 | | * a valid bracket expression is treated as an ordinary character; 4. a differing |
30 | | * number of consecutive slashes within pattern and string will NOT match; |
31 | | * 5. a trailing '\' in FNM_ESCAPE mode is treated as an ordinary '\' character. |
32 | | * |
33 | | * Bracket expansion defined in section 9.3.5, "RE Bracket Expression", |
34 | | * from chapter 9, "Regular Expressions" |
35 | | * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/V1_chap09.html#tag_09_03_05 |
36 | | * with no support for collating symbols, equivalence class expressions or |
37 | | * character class expressions. A partial range expression with a leading |
38 | | * hyphen following a valid range expression will match only the ordinary |
39 | | * <hyphen> and the ending character (e.g. "[a-m-z]" will match characters |
40 | | * 'a' through 'm', a <hyphen> '-', or a 'z'). |
41 | | * |
42 | | * NOTE: Only POSIX/C single byte locales are correctly supported at this time. |
43 | | * Notably, non-POSIX locales with FNM_CASEFOLD produce undefined results, |
44 | | * particularly in ranges of mixed case (e.g. "[A-z]") or spanning alpha and |
45 | | * nonalpha characters within a range. |
46 | | * |
47 | | * XXX comments below indicate porting required for multi-byte character sets |
48 | | * and non-POSIX locale collation orders; requires mbr* APIs to track shift |
49 | | * state of pattern and string (rewinding pattern and string repeatedly). |
50 | | * |
51 | | * Certain parts of the code assume 0x00-0x3F are unique with any MBCS (e.g. |
52 | | * UTF-8, SHIFT-JIS, etc). Any implementation allowing '\' as an alternate |
53 | | * path delimiter must be aware that 0x5C is NOT unique within SHIFT-JIS. |
54 | | */ |
55 | | |
56 | | #include "apr_file_info.h" |
57 | | #include "apr_fnmatch.h" |
58 | | #include "apr_tables.h" |
59 | | #include "apr_lib.h" |
60 | | #include "apr_strings.h" |
61 | | #include <string.h> |
62 | | #if APR_HAVE_CTYPE_H |
63 | | # include <ctype.h> |
64 | | #endif |
65 | | |
66 | | |
67 | | /* Most MBCS/collation/case issues handled here. Wildcard '*' is not handled. |
68 | | * EOS '\0' and the FNM_PATHNAME '/' delimiters are not advanced over, |
69 | | * however the "\/" sequence is advanced to '/'. |
70 | | * |
71 | | * Both pattern and string are **char to support pointer increment of arbitrary |
72 | | * multibyte characters for the given locale, in a later iteration of this code |
73 | | */ |
74 | | static APR_INLINE int fnmatch_ch(const char **pattern, const char **string, int flags) |
75 | 0 | { |
76 | 0 | const char * const mismatch = *pattern; |
77 | 0 | const int nocase = !!(flags & APR_FNM_CASE_BLIND); |
78 | 0 | const int escape = !(flags & APR_FNM_NOESCAPE); |
79 | 0 | const int slash = !!(flags & APR_FNM_PATHNAME); |
80 | 0 | int result = APR_FNM_NOMATCH; |
81 | 0 | const char *startch; |
82 | 0 | int negate; |
83 | |
|
84 | 0 | if (**pattern == '[') |
85 | 0 | { |
86 | 0 | ++*pattern; |
87 | | |
88 | | /* Handle negation, either leading ! or ^ operators (never both) */ |
89 | 0 | negate = ((**pattern == '!') || (**pattern == '^')); |
90 | 0 | if (negate) |
91 | 0 | ++*pattern; |
92 | | |
93 | | /* ']' is an ordinary character at the start of the range pattern */ |
94 | 0 | if (**pattern == ']') |
95 | 0 | goto leadingclosebrace; |
96 | | |
97 | 0 | while (**pattern) |
98 | 0 | { |
99 | 0 | if (**pattern == ']') { |
100 | 0 | ++*pattern; |
101 | | /* XXX: Fix for MBCS character width */ |
102 | 0 | ++*string; |
103 | 0 | return (result ^ negate); |
104 | 0 | } |
105 | | |
106 | 0 | if (escape && (**pattern == '\\')) { |
107 | 0 | ++*pattern; |
108 | | |
109 | | /* Patterns must be terminated with ']', not EOS */ |
110 | 0 | if (!**pattern) |
111 | 0 | break; |
112 | 0 | } |
113 | | |
114 | | /* Patterns must be terminated with ']' not '/' */ |
115 | 0 | if (slash && (**pattern == '/')) |
116 | 0 | break; |
117 | | |
118 | 0 | leadingclosebrace: |
119 | | /* Look at only well-formed range patterns; |
120 | | * "x-]" is not allowed unless escaped ("x-\]") |
121 | | * XXX: Fix for locale/MBCS character width |
122 | | */ |
123 | 0 | if (((*pattern)[1] == '-') && ((*pattern)[2] != ']')) |
124 | 0 | { |
125 | 0 | startch = *pattern; |
126 | 0 | *pattern += (escape && ((*pattern)[2] == '\\')) ? 3 : 2; |
127 | | |
128 | | /* NOT a properly balanced [expr] pattern, EOS terminated |
129 | | * or ranges containing a slash in FNM_PATHNAME mode pattern |
130 | | * fall out to to the rewind and test '[' literal code path |
131 | | */ |
132 | 0 | if (!**pattern || (slash && (**pattern == '/'))) |
133 | 0 | break; |
134 | | |
135 | | /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ |
136 | 0 | if ((**string >= *startch) && (**string <= **pattern)) |
137 | 0 | result = 0; |
138 | 0 | else if (nocase && (isupper(**string) || isupper(*startch) |
139 | 0 | || isupper(**pattern)) |
140 | 0 | && (tolower(**string) >= tolower(*startch)) |
141 | 0 | && (tolower(**string) <= tolower(**pattern))) |
142 | 0 | result = 0; |
143 | |
|
144 | 0 | ++*pattern; |
145 | 0 | continue; |
146 | 0 | } |
147 | | |
148 | | /* XXX: handle locale/MBCS comparison, advance by MBCS char width */ |
149 | 0 | if ((**string == **pattern)) |
150 | 0 | result = 0; |
151 | 0 | else if (nocase && (isupper(**string) || isupper(**pattern)) |
152 | 0 | && (tolower(**string) == tolower(**pattern))) |
153 | 0 | result = 0; |
154 | |
|
155 | 0 | ++*pattern; |
156 | 0 | } |
157 | | |
158 | | /* NOT a properly balanced [expr] pattern; Rewind |
159 | | * and reset result to test '[' literal |
160 | | */ |
161 | 0 | *pattern = mismatch; |
162 | 0 | result = APR_FNM_NOMATCH; |
163 | 0 | } |
164 | 0 | else if (**pattern == '?') { |
165 | | /* Optimize '?' match before unescaping **pattern */ |
166 | 0 | if (!**string || (slash && (**string == '/'))) |
167 | 0 | return APR_FNM_NOMATCH; |
168 | 0 | result = 0; |
169 | 0 | goto fnmatch_ch_success; |
170 | 0 | } |
171 | 0 | else if (escape && (**pattern == '\\') && (*pattern)[1]) { |
172 | 0 | ++*pattern; |
173 | 0 | } |
174 | | |
175 | | /* XXX: handle locale/MBCS comparison, advance by the MBCS char width */ |
176 | 0 | if (**string == **pattern) |
177 | 0 | result = 0; |
178 | 0 | else if (nocase && (isupper(**string) || isupper(**pattern)) |
179 | 0 | && (tolower(**string) == tolower(**pattern))) |
180 | 0 | result = 0; |
181 | | |
182 | | /* Refuse to advance over trailing slash or nulls |
183 | | */ |
184 | 0 | if (!**string || !**pattern || (slash && ((**string == '/') || (**pattern == '/')))) |
185 | 0 | return result; |
186 | | |
187 | 0 | fnmatch_ch_success: |
188 | 0 | ++*pattern; |
189 | 0 | ++*string; |
190 | 0 | return result; |
191 | 0 | } |
192 | | |
193 | | |
194 | | APR_DECLARE(int) apr_fnmatch(const char *pattern, const char *string, int flags) |
195 | 0 | { |
196 | 0 | static const char dummystring[2] = {' ', 0}; |
197 | 0 | const int escape = !(flags & APR_FNM_NOESCAPE); |
198 | 0 | const int slash = !!(flags & APR_FNM_PATHNAME); |
199 | 0 | const char *strendseg; |
200 | 0 | const char *dummyptr; |
201 | 0 | const char *matchptr; |
202 | 0 | int wild; |
203 | | /* For '*' wild processing only; surpress 'used before initialization' |
204 | | * warnings with dummy initialization values; |
205 | | */ |
206 | 0 | const char *strstartseg = NULL; |
207 | 0 | const char *mismatch = NULL; |
208 | 0 | int matchlen = 0; |
209 | |
|
210 | 0 | if (*pattern == '*') |
211 | 0 | goto firstsegment; |
212 | | |
213 | 0 | while (*pattern && *string) |
214 | 0 | { |
215 | | /* Pre-decode "\/" which has no special significance, and |
216 | | * match balanced slashes, starting a new segment pattern |
217 | | */ |
218 | 0 | if (slash && escape && (*pattern == '\\') && (pattern[1] == '/')) |
219 | 0 | ++pattern; |
220 | 0 | if (slash && (*pattern == '/') && (*string == '/')) { |
221 | 0 | ++pattern; |
222 | 0 | ++string; |
223 | 0 | } |
224 | |
|
225 | 0 | firstsegment: |
226 | | /* At the beginning of each segment, validate leading period behavior. |
227 | | */ |
228 | 0 | if ((flags & APR_FNM_PERIOD) && (*string == '.')) |
229 | 0 | { |
230 | 0 | if (*pattern == '.') |
231 | 0 | ++pattern; |
232 | 0 | else if (escape && (*pattern == '\\') && (pattern[1] == '.')) |
233 | 0 | pattern += 2; |
234 | 0 | else |
235 | 0 | return APR_FNM_NOMATCH; |
236 | 0 | ++string; |
237 | 0 | } |
238 | | |
239 | | /* Determine the end of string segment |
240 | | * |
241 | | * Presumes '/' character is unique, not composite in any MBCS encoding |
242 | | */ |
243 | 0 | if (slash) { |
244 | 0 | strendseg = strchr(string, '/'); |
245 | 0 | if (!strendseg) |
246 | 0 | strendseg = strchr(string, '\0'); |
247 | 0 | } |
248 | 0 | else { |
249 | 0 | strendseg = strchr(string, '\0'); |
250 | 0 | } |
251 | | |
252 | | /* Allow pattern '*' to be consumed even with no remaining string to match |
253 | | */ |
254 | 0 | while (*pattern) |
255 | 0 | { |
256 | 0 | if ((string > strendseg) |
257 | 0 | || ((string == strendseg) && (*pattern != '*'))) |
258 | 0 | break; |
259 | | |
260 | 0 | if (slash && ((*pattern == '/') |
261 | 0 | || (escape && (*pattern == '\\') |
262 | 0 | && (pattern[1] == '/')))) |
263 | 0 | break; |
264 | | |
265 | | /* Reduce groups of '*' and '?' to n '?' matches |
266 | | * followed by one '*' test for simplicity |
267 | | */ |
268 | 0 | for (wild = 0; ((*pattern == '*') || (*pattern == '?')); ++pattern) |
269 | 0 | { |
270 | 0 | if (*pattern == '*') { |
271 | 0 | wild = 1; |
272 | 0 | } |
273 | 0 | else if (string < strendseg) { /* && (*pattern == '?') */ |
274 | | /* XXX: Advance 1 char for MBCS locale */ |
275 | 0 | ++string; |
276 | 0 | } |
277 | 0 | else { /* (string >= strendseg) && (*pattern == '?') */ |
278 | 0 | return APR_FNM_NOMATCH; |
279 | 0 | } |
280 | 0 | } |
281 | | |
282 | 0 | if (wild) |
283 | 0 | { |
284 | 0 | strstartseg = string; |
285 | 0 | mismatch = pattern; |
286 | | |
287 | | /* Count fixed (non '*') char matches remaining in pattern |
288 | | * excluding '/' (or "\/") and '*' |
289 | | */ |
290 | 0 | for (matchptr = pattern, matchlen = 0; 1; ++matchlen) |
291 | 0 | { |
292 | 0 | if ((*matchptr == '\0') |
293 | 0 | || (slash && ((*matchptr == '/') |
294 | 0 | || (escape && (*matchptr == '\\') |
295 | 0 | && (matchptr[1] == '/'))))) |
296 | 0 | { |
297 | | /* Compare precisely this many trailing string chars, |
298 | | * the resulting match needs no wildcard loop |
299 | | */ |
300 | | /* XXX: Adjust for MBCS */ |
301 | 0 | if (string + matchlen > strendseg) |
302 | 0 | return APR_FNM_NOMATCH; |
303 | | |
304 | 0 | string = strendseg - matchlen; |
305 | 0 | wild = 0; |
306 | 0 | break; |
307 | 0 | } |
308 | | |
309 | 0 | if (*matchptr == '*') |
310 | 0 | { |
311 | | /* Ensure at least this many trailing string chars remain |
312 | | * for the first comparison |
313 | | */ |
314 | | /* XXX: Adjust for MBCS */ |
315 | 0 | if (string + matchlen > strendseg) |
316 | 0 | return APR_FNM_NOMATCH; |
317 | | |
318 | | /* Begin first wild comparison at the current position */ |
319 | 0 | break; |
320 | 0 | } |
321 | | |
322 | | /* Skip forward in pattern by a single character match |
323 | | * Use a dummy fnmatch_ch() test to count one "[range]" escape |
324 | | */ |
325 | | /* XXX: Adjust for MBCS */ |
326 | 0 | if (escape && (*matchptr == '\\') && matchptr[1]) { |
327 | 0 | matchptr += 2; |
328 | 0 | } |
329 | 0 | else if (*matchptr == '[') { |
330 | 0 | dummyptr = dummystring; |
331 | 0 | fnmatch_ch(&matchptr, &dummyptr, flags); |
332 | 0 | } |
333 | 0 | else { |
334 | 0 | ++matchptr; |
335 | 0 | } |
336 | 0 | } |
337 | 0 | } |
338 | | |
339 | | /* Incrementally match string against the pattern |
340 | | */ |
341 | 0 | while (*pattern && (string < strendseg)) |
342 | 0 | { |
343 | | /* Success; begin a new wild pattern search |
344 | | */ |
345 | 0 | if (*pattern == '*') |
346 | 0 | break; |
347 | | |
348 | 0 | if (slash && ((*string == '/') |
349 | 0 | || (*pattern == '/') |
350 | 0 | || (escape && (*pattern == '\\') |
351 | 0 | && (pattern[1] == '/')))) |
352 | 0 | break; |
353 | | |
354 | | /* Compare ch's (the pattern is advanced over "\/" to the '/', |
355 | | * but slashes will mismatch, and are not consumed) |
356 | | */ |
357 | 0 | if (!fnmatch_ch(&pattern, &string, flags)) |
358 | 0 | continue; |
359 | | |
360 | | /* Failed to match, loop against next char offset of string segment |
361 | | * until not enough string chars remain to match the fixed pattern |
362 | | */ |
363 | 0 | if (wild) { |
364 | | /* XXX: Advance 1 char for MBCS locale */ |
365 | 0 | string = ++strstartseg; |
366 | 0 | if (string + matchlen > strendseg) |
367 | 0 | return APR_FNM_NOMATCH; |
368 | | |
369 | 0 | pattern = mismatch; |
370 | 0 | continue; |
371 | 0 | } |
372 | 0 | else |
373 | 0 | return APR_FNM_NOMATCH; |
374 | 0 | } |
375 | 0 | } |
376 | | |
377 | 0 | if (*string && !(slash && (*string == '/'))) |
378 | 0 | return APR_FNM_NOMATCH; |
379 | | |
380 | 0 | if (*pattern && !(slash && ((*pattern == '/') |
381 | 0 | || (escape && (*pattern == '\\') |
382 | 0 | && (pattern[1] == '/'))))) |
383 | 0 | return APR_FNM_NOMATCH; |
384 | 0 | } |
385 | | |
386 | | /* Where both pattern and string are at EOS, declare success |
387 | | */ |
388 | 0 | if (!*string && !*pattern) |
389 | 0 | return 0; |
390 | | |
391 | | /* pattern didn't match to the end of string */ |
392 | 0 | return APR_FNM_NOMATCH; |
393 | 0 | } |
394 | | |
395 | | |
396 | | /* This function is an Apache addition |
397 | | * return non-zero if pattern has any glob chars in it |
398 | | * @bug Function does not distinguish for FNM_PATHNAME mode, which renders |
399 | | * a false positive for test[/]this (which is not a range, but |
400 | | * seperate test[ and ]this segments and no glob.) |
401 | | * @bug Function does not distinguish for non-FNM_ESCAPE mode. |
402 | | * @bug Function does not parse []] correctly |
403 | | * Solution may be to use fnmatch_ch() to walk the patterns? |
404 | | */ |
405 | | APR_DECLARE(int) apr_fnmatch_test(const char *pattern) |
406 | 0 | { |
407 | 0 | int nesting; |
408 | |
|
409 | 0 | nesting = 0; |
410 | 0 | while (*pattern) { |
411 | 0 | switch (*pattern) { |
412 | 0 | case '?': |
413 | 0 | case '*': |
414 | 0 | return 1; |
415 | | |
416 | 0 | case '\\': |
417 | 0 | if (*++pattern == '\0') { |
418 | 0 | return 0; |
419 | 0 | } |
420 | 0 | break; |
421 | | |
422 | 0 | case '[': /* '[' is only a glob if it has a matching ']' */ |
423 | 0 | ++nesting; |
424 | 0 | break; |
425 | | |
426 | 0 | case ']': |
427 | 0 | if (nesting) { |
428 | 0 | return 1; |
429 | 0 | } |
430 | 0 | break; |
431 | 0 | } |
432 | 0 | ++pattern; } |
433 | 0 | return 0; |
434 | 0 | } |
435 | | |
436 | | |
437 | | /* Find all files matching the specified pattern */ |
438 | | APR_DECLARE(apr_status_t) apr_match_glob(const char *pattern, |
439 | | apr_array_header_t **result, |
440 | | apr_pool_t *p) |
441 | 0 | { |
442 | 0 | apr_dir_t *dir; |
443 | 0 | apr_finfo_t finfo; |
444 | 0 | apr_status_t rv; |
445 | 0 | char *path; |
446 | | |
447 | | /* XXX So, this is kind of bogus. Basically, I need to strip any leading |
448 | | * directories off the pattern, but there is no portable way to do that. |
449 | | * So, for now we just find the last occurance of '/' and if that doesn't |
450 | | * return anything, then we look for '\'. This means that we could |
451 | | * screw up on unix if the pattern is something like "foo\.*" That '\' |
452 | | * isn't a directory delimiter, it is a part of the filename. To fix this, |
453 | | * we really need apr_filepath_basename, which will be coming as soon as |
454 | | * I get to it. rbb |
455 | | */ |
456 | 0 | char *idx = strrchr(pattern, '/'); |
457 | |
|
458 | 0 | if (idx == NULL) { |
459 | 0 | idx = strrchr(pattern, '\\'); |
460 | 0 | } |
461 | 0 | if (idx == NULL) { |
462 | 0 | path = "."; |
463 | 0 | } |
464 | 0 | else { |
465 | 0 | path = apr_pstrmemdup(p, pattern, idx - pattern); |
466 | 0 | pattern = idx + 1; |
467 | 0 | } |
468 | |
|
469 | 0 | *result = apr_array_make(p, 0, sizeof(char *)); |
470 | 0 | rv = apr_dir_open(&dir, path, p); |
471 | 0 | if (rv != APR_SUCCESS) { |
472 | 0 | return rv; |
473 | 0 | } |
474 | | |
475 | 0 | while (apr_dir_read(&finfo, APR_FINFO_NAME, dir) == APR_SUCCESS) { |
476 | 0 | if (apr_fnmatch(pattern, finfo.name, 0) == APR_SUCCESS) { |
477 | 0 | *(const char **)apr_array_push(*result) = apr_pstrdup(p, finfo.name); |
478 | 0 | } |
479 | 0 | } |
480 | 0 | apr_dir_close(dir); |
481 | 0 | return APR_SUCCESS; |
482 | 0 | } |