/src/CMake/Utilities/cmlibarchive/libarchive/archive_pathmatch.c
Line | Count | Source |
1 | | /*- |
2 | | * Copyright (c) 2003-2007 Tim Kientzle |
3 | | * All rights reserved. |
4 | | * |
5 | | * Redistribution and use in source and binary forms, with or without |
6 | | * modification, are permitted provided that the following conditions |
7 | | * are met: |
8 | | * 1. Redistributions of source code must retain the above copyright |
9 | | * notice, this list of conditions and the following disclaimer |
10 | | * in this position and unchanged. |
11 | | * 2. Redistributions in binary form must reproduce the above copyright |
12 | | * notice, this list of conditions and the following disclaimer in the |
13 | | * documentation and/or other materials provided with the distribution. |
14 | | * |
15 | | * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR |
16 | | * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES |
17 | | * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
18 | | * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT, |
19 | | * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT |
20 | | * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, |
21 | | * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY |
22 | | * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT |
23 | | * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF |
24 | | * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
25 | | */ |
26 | | |
27 | | #include "archive_platform.h" |
28 | | |
29 | | #ifdef HAVE_STRING_H |
30 | | #include <string.h> |
31 | | #endif |
32 | | #ifdef HAVE_WCHAR_H |
33 | | #include <wchar.h> |
34 | | #endif |
35 | | |
36 | | #include "archive_pathmatch.h" |
37 | | |
38 | | /* |
39 | | * Check whether a character 'c' is matched by a list specification [...]: |
40 | | * * Leading '!' or '^' negates the class. |
41 | | * * <char>-<char> is a range of characters |
42 | | * * \<char> removes any special meaning for <char> |
43 | | * |
44 | | * Some interesting boundary cases: |
45 | | * a-d-e is one range (a-d) followed by two single characters - and e. |
46 | | * \a-\d is same as a-d |
47 | | * a\-d is three single characters: a, d, - |
48 | | * Trailing - is not special (so [a-] is two characters a and -). |
49 | | * Initial - is not special ([a-] is same as [-a] is same as [\\-a]) |
50 | | * This function never sees a trailing \. |
51 | | * [] always fails |
52 | | * [!] always succeeds |
53 | | */ |
54 | | static int |
55 | | pm_list(const char *start, const char *end, const char c, int flags) |
56 | 0 | { |
57 | 0 | const char *p = start; |
58 | 0 | char rangeStart = '\0', nextRangeStart; |
59 | 0 | int match = 1, nomatch = 0; |
60 | | |
61 | | /* This will be used soon... */ |
62 | 0 | (void)flags; /* UNUSED */ |
63 | | |
64 | | /* If this is a negated class, return success for nomatch. */ |
65 | 0 | if ((*p == '!' || *p == '^') && p < end) { |
66 | 0 | match = 0; |
67 | 0 | nomatch = 1; |
68 | 0 | ++p; |
69 | 0 | } |
70 | |
|
71 | 0 | while (p < end) { |
72 | 0 | nextRangeStart = '\0'; |
73 | 0 | switch (*p) { |
74 | 0 | case '-': |
75 | | /* Trailing or initial '-' is not special. */ |
76 | 0 | if ((rangeStart == '\0') || (p == end - 1)) { |
77 | 0 | if (*p == c) |
78 | 0 | return (match); |
79 | 0 | } else { |
80 | 0 | char rangeEnd = *++p; |
81 | 0 | if (rangeEnd == '\\') |
82 | 0 | rangeEnd = *++p; |
83 | 0 | if ((rangeStart <= c) && (c <= rangeEnd)) |
84 | 0 | return (match); |
85 | 0 | } |
86 | 0 | break; |
87 | 0 | case '\\': |
88 | 0 | ++p; |
89 | | /* Fall through */ |
90 | 0 | default: |
91 | 0 | if (*p == c) |
92 | 0 | return (match); |
93 | 0 | nextRangeStart = *p; /* Possible start of range. */ |
94 | 0 | } |
95 | 0 | rangeStart = nextRangeStart; |
96 | 0 | ++p; |
97 | 0 | } |
98 | 0 | return (nomatch); |
99 | 0 | } |
100 | | |
101 | | static int |
102 | | pm_list_w(const wchar_t *start, const wchar_t *end, const wchar_t c, int flags) |
103 | 0 | { |
104 | 0 | const wchar_t *p = start; |
105 | 0 | wchar_t rangeStart = L'\0', nextRangeStart; |
106 | 0 | int match = 1, nomatch = 0; |
107 | | |
108 | | /* This will be used soon... */ |
109 | 0 | (void)flags; /* UNUSED */ |
110 | | |
111 | | /* If this is a negated class, return success for nomatch. */ |
112 | 0 | if ((*p == L'!' || *p == L'^') && p < end) { |
113 | 0 | match = 0; |
114 | 0 | nomatch = 1; |
115 | 0 | ++p; |
116 | 0 | } |
117 | |
|
118 | 0 | while (p < end) { |
119 | 0 | nextRangeStart = L'\0'; |
120 | 0 | switch (*p) { |
121 | 0 | case L'-': |
122 | | /* Trailing or initial '-' is not special. */ |
123 | 0 | if ((rangeStart == L'\0') || (p == end - 1)) { |
124 | 0 | if (*p == c) |
125 | 0 | return (match); |
126 | 0 | } else { |
127 | 0 | wchar_t rangeEnd = *++p; |
128 | 0 | if (rangeEnd == L'\\') |
129 | 0 | rangeEnd = *++p; |
130 | 0 | if ((rangeStart <= c) && (c <= rangeEnd)) |
131 | 0 | return (match); |
132 | 0 | } |
133 | 0 | break; |
134 | 0 | case L'\\': |
135 | 0 | ++p; |
136 | | /* Fall through */ |
137 | 0 | default: |
138 | 0 | if (*p == c) |
139 | 0 | return (match); |
140 | 0 | nextRangeStart = *p; /* Possible start of range. */ |
141 | 0 | } |
142 | 0 | rangeStart = nextRangeStart; |
143 | 0 | ++p; |
144 | 0 | } |
145 | 0 | return (nomatch); |
146 | 0 | } |
147 | | |
148 | | /* |
149 | | * If s is pointing to "./", ".//", "./././" or the like, skip it. |
150 | | */ |
151 | | static const char * |
152 | 0 | pm_slashskip(const char *s) { |
153 | 0 | while ((*s == '/') |
154 | 0 | || (s[0] == '.' && s[1] == '/') |
155 | 0 | || (s[0] == '.' && s[1] == '\0')) |
156 | 0 | ++s; |
157 | 0 | return (s); |
158 | 0 | } |
159 | | |
160 | | static const wchar_t * |
161 | 0 | pm_slashskip_w(const wchar_t *s) { |
162 | 0 | while ((*s == L'/') |
163 | 0 | || (s[0] == L'.' && s[1] == L'/') |
164 | 0 | || (s[0] == L'.' && s[1] == L'\0')) |
165 | 0 | ++s; |
166 | 0 | return (s); |
167 | 0 | } |
168 | | |
169 | | static int |
170 | | pm(const char *p, const char *s, int flags) |
171 | 0 | { |
172 | 0 | const char *end; |
173 | | |
174 | | /* |
175 | | * Ignore leading './', './/', '././', etc. |
176 | | */ |
177 | 0 | if (s[0] == '.' && s[1] == '/') |
178 | 0 | s = pm_slashskip(s + 1); |
179 | 0 | if (p[0] == '.' && p[1] == '/') |
180 | 0 | p = pm_slashskip(p + 1); |
181 | |
|
182 | 0 | for (;;) { |
183 | 0 | switch (*p) { |
184 | 0 | case '\0': |
185 | 0 | if (s[0] == '/') { |
186 | 0 | if (flags & PATHMATCH_NO_ANCHOR_END) |
187 | 0 | return (1); |
188 | | /* "dir" == "dir/" == "dir/." */ |
189 | 0 | s = pm_slashskip(s); |
190 | 0 | } |
191 | 0 | return (*s == '\0'); |
192 | 0 | case '?': |
193 | | /* ? always succeeds, unless we hit end of 's' */ |
194 | 0 | if (*s == '\0') |
195 | 0 | return (0); |
196 | 0 | break; |
197 | 0 | case '*': |
198 | | /* "*" == "**" == "***" ... */ |
199 | 0 | while (*p == '*') |
200 | 0 | ++p; |
201 | | /* Trailing '*' always succeeds. */ |
202 | 0 | if (*p == '\0') |
203 | 0 | return (1); |
204 | 0 | while (*s) { |
205 | 0 | if (archive_pathmatch(p, s, flags)) |
206 | 0 | return (1); |
207 | 0 | ++s; |
208 | 0 | } |
209 | 0 | return (0); |
210 | 0 | case '[': |
211 | | /* |
212 | | * Find the end of the [...] character class, |
213 | | * ignoring \] that might occur within the class. |
214 | | */ |
215 | 0 | end = p + 1; |
216 | 0 | while (*end != '\0' && *end != ']') { |
217 | 0 | if (*end == '\\' && end[1] != '\0') |
218 | 0 | ++end; |
219 | 0 | ++end; |
220 | 0 | } |
221 | 0 | if (*end == ']') { |
222 | | /* We found [...], try to match it. */ |
223 | 0 | if (!pm_list(p + 1, end, *s, flags)) |
224 | 0 | return (0); |
225 | 0 | p = end; /* Jump to trailing ']' char. */ |
226 | 0 | break; |
227 | 0 | } else |
228 | | /* No final ']', so just match '['. */ |
229 | 0 | if (*p != *s) |
230 | 0 | return (0); |
231 | 0 | break; |
232 | 0 | case '\\': |
233 | | /* Trailing '\\' matches itself. */ |
234 | 0 | if (p[1] == '\0') { |
235 | 0 | if (*s != '\\') |
236 | 0 | return (0); |
237 | 0 | } else { |
238 | 0 | ++p; |
239 | 0 | if (*p != *s) |
240 | 0 | return (0); |
241 | 0 | } |
242 | 0 | break; |
243 | 0 | case '/': |
244 | 0 | if (*s != '/' && *s != '\0') |
245 | 0 | return (0); |
246 | | /* Note: pattern "/\./" won't match "/"; |
247 | | * pm_slashskip() correctly stops at backslash. */ |
248 | 0 | p = pm_slashskip(p); |
249 | 0 | s = pm_slashskip(s); |
250 | 0 | if (*p == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)) |
251 | 0 | return (1); |
252 | 0 | --p; /* Counteract the increment below. */ |
253 | 0 | --s; |
254 | 0 | break; |
255 | 0 | case '$': |
256 | | /* '$' is special only at end of pattern and only |
257 | | * if PATHMATCH_NO_ANCHOR_END is specified. */ |
258 | 0 | if (p[1] == '\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ |
259 | | /* "dir" == "dir/" == "dir/." */ |
260 | 0 | return (*pm_slashskip(s) == '\0'); |
261 | 0 | } |
262 | | /* Otherwise, '$' is not special. */ |
263 | | /* FALL THROUGH */ |
264 | 0 | default: |
265 | 0 | if (*p != *s) |
266 | 0 | return (0); |
267 | 0 | break; |
268 | 0 | } |
269 | 0 | ++p; |
270 | 0 | ++s; |
271 | 0 | } |
272 | 0 | } |
273 | | |
274 | | static int |
275 | | pm_w(const wchar_t *p, const wchar_t *s, int flags) |
276 | 0 | { |
277 | 0 | const wchar_t *end; |
278 | | |
279 | | /* |
280 | | * Ignore leading './', './/', '././', etc. |
281 | | */ |
282 | 0 | if (s[0] == L'.' && s[1] == L'/') |
283 | 0 | s = pm_slashskip_w(s + 1); |
284 | 0 | if (p[0] == L'.' && p[1] == L'/') |
285 | 0 | p = pm_slashskip_w(p + 1); |
286 | |
|
287 | 0 | for (;;) { |
288 | 0 | switch (*p) { |
289 | 0 | case L'\0': |
290 | 0 | if (s[0] == L'/') { |
291 | 0 | if (flags & PATHMATCH_NO_ANCHOR_END) |
292 | 0 | return (1); |
293 | | /* "dir" == "dir/" == "dir/." */ |
294 | 0 | s = pm_slashskip_w(s); |
295 | 0 | } |
296 | 0 | return (*s == L'\0'); |
297 | 0 | case L'?': |
298 | | /* ? always succeeds, unless we hit end of 's' */ |
299 | 0 | if (*s == L'\0') |
300 | 0 | return (0); |
301 | 0 | break; |
302 | 0 | case L'*': |
303 | | /* "*" == "**" == "***" ... */ |
304 | 0 | while (*p == L'*') |
305 | 0 | ++p; |
306 | | /* Trailing '*' always succeeds. */ |
307 | 0 | if (*p == L'\0') |
308 | 0 | return (1); |
309 | 0 | while (*s) { |
310 | 0 | if (archive_pathmatch_w(p, s, flags)) |
311 | 0 | return (1); |
312 | 0 | ++s; |
313 | 0 | } |
314 | 0 | return (0); |
315 | 0 | case L'[': |
316 | | /* |
317 | | * Find the end of the [...] character class, |
318 | | * ignoring \] that might occur within the class. |
319 | | */ |
320 | 0 | end = p + 1; |
321 | 0 | while (*end != L'\0' && *end != L']') { |
322 | 0 | if (*end == L'\\' && end[1] != L'\0') |
323 | 0 | ++end; |
324 | 0 | ++end; |
325 | 0 | } |
326 | 0 | if (*end == L']') { |
327 | | /* We found [...], try to match it. */ |
328 | 0 | if (!pm_list_w(p + 1, end, *s, flags)) |
329 | 0 | return (0); |
330 | 0 | p = end; /* Jump to trailing ']' char. */ |
331 | 0 | break; |
332 | 0 | } else |
333 | | /* No final ']', so just match '['. */ |
334 | 0 | if (*p != *s) |
335 | 0 | return (0); |
336 | 0 | break; |
337 | 0 | case L'\\': |
338 | | /* Trailing '\\' matches itself. */ |
339 | 0 | if (p[1] == L'\0') { |
340 | 0 | if (*s != L'\\') |
341 | 0 | return (0); |
342 | 0 | } else { |
343 | 0 | ++p; |
344 | 0 | if (*p != *s) |
345 | 0 | return (0); |
346 | 0 | } |
347 | 0 | break; |
348 | 0 | case L'/': |
349 | 0 | if (*s != L'/' && *s != L'\0') |
350 | 0 | return (0); |
351 | | /* Note: pattern "/\./" won't match "/"; |
352 | | * pm_slashskip() correctly stops at backslash. */ |
353 | 0 | p = pm_slashskip_w(p); |
354 | 0 | s = pm_slashskip_w(s); |
355 | 0 | if (*p == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)) |
356 | 0 | return (1); |
357 | 0 | --p; /* Counteract the increment below. */ |
358 | 0 | --s; |
359 | 0 | break; |
360 | 0 | case L'$': |
361 | | /* '$' is special only at end of pattern and only |
362 | | * if PATHMATCH_NO_ANCHOR_END is specified. */ |
363 | 0 | if (p[1] == L'\0' && (flags & PATHMATCH_NO_ANCHOR_END)){ |
364 | | /* "dir" == "dir/" == "dir/." */ |
365 | 0 | return (*pm_slashskip_w(s) == L'\0'); |
366 | 0 | } |
367 | | /* Otherwise, '$' is not special. */ |
368 | | /* FALL THROUGH */ |
369 | 0 | default: |
370 | 0 | if (*p != *s) |
371 | 0 | return (0); |
372 | 0 | break; |
373 | 0 | } |
374 | 0 | ++p; |
375 | 0 | ++s; |
376 | 0 | } |
377 | 0 | } |
378 | | |
379 | | /* Main entry point. */ |
380 | | int |
381 | | __archive_pathmatch(const char *p, const char *s, int flags) |
382 | 0 | { |
383 | | /* Empty pattern only matches the empty string. */ |
384 | 0 | if (p == NULL || *p == '\0') |
385 | 0 | return (s == NULL || *s == '\0'); |
386 | 0 | else if (s == NULL) |
387 | 0 | return (0); |
388 | | |
389 | | /* Leading '^' anchors the start of the pattern. */ |
390 | 0 | if (*p == '^') { |
391 | 0 | ++p; |
392 | 0 | flags &= ~PATHMATCH_NO_ANCHOR_START; |
393 | 0 | } |
394 | |
|
395 | 0 | if (*p == '/' && *s != '/') |
396 | 0 | return (0); |
397 | | |
398 | | /* Certain patterns anchor implicitly. */ |
399 | 0 | if (*p == '*' || *p == '/') { |
400 | 0 | while (*p == '/') |
401 | 0 | ++p; |
402 | 0 | while (*s == '/') |
403 | 0 | ++s; |
404 | 0 | return (pm(p, s, flags)); |
405 | 0 | } |
406 | | |
407 | | /* If start is unanchored, try to match start of each path element. */ |
408 | 0 | if (flags & PATHMATCH_NO_ANCHOR_START) { |
409 | 0 | for ( ; s != NULL; s = strchr(s, '/')) { |
410 | 0 | if (*s == '/') |
411 | 0 | s++; |
412 | 0 | if (pm(p, s, flags)) |
413 | 0 | return (1); |
414 | 0 | } |
415 | 0 | return (0); |
416 | 0 | } |
417 | | |
418 | | /* Default: Match from beginning. */ |
419 | 0 | return (pm(p, s, flags)); |
420 | 0 | } |
421 | | |
422 | | int |
423 | | __archive_pathmatch_w(const wchar_t *p, const wchar_t *s, int flags) |
424 | 0 | { |
425 | | /* Empty pattern only matches the empty string. */ |
426 | 0 | if (p == NULL || *p == L'\0') |
427 | 0 | return (s == NULL || *s == L'\0'); |
428 | 0 | else if (s == NULL) |
429 | 0 | return (0); |
430 | | |
431 | | /* Leading '^' anchors the start of the pattern. */ |
432 | 0 | if (*p == L'^') { |
433 | 0 | ++p; |
434 | 0 | flags &= ~PATHMATCH_NO_ANCHOR_START; |
435 | 0 | } |
436 | |
|
437 | 0 | if (*p == L'/' && *s != L'/') |
438 | 0 | return (0); |
439 | | |
440 | | /* Certain patterns anchor implicitly. */ |
441 | 0 | if (*p == L'*' || *p == L'/') { |
442 | 0 | while (*p == L'/') |
443 | 0 | ++p; |
444 | 0 | while (*s == L'/') |
445 | 0 | ++s; |
446 | 0 | return (pm_w(p, s, flags)); |
447 | 0 | } |
448 | | |
449 | | /* If start is unanchored, try to match start of each path element. */ |
450 | 0 | if (flags & PATHMATCH_NO_ANCHOR_START) { |
451 | 0 | for ( ; s != NULL; s = wcschr(s, L'/')) { |
452 | 0 | if (*s == L'/') |
453 | 0 | s++; |
454 | 0 | if (pm_w(p, s, flags)) |
455 | 0 | return (1); |
456 | 0 | } |
457 | 0 | return (0); |
458 | 0 | } |
459 | | |
460 | | /* Default: Match from beginning. */ |
461 | 0 | return (pm_w(p, s, flags)); |
462 | 0 | } |