/rust/registry/src/index.crates.io-1949cf8c6b5b557f/glob-0.3.3/src/lib.rs
Line | Count | Source |
1 | | // Copyright 2014 The Rust Project Developers. See the COPYRIGHT |
2 | | // file at the top-level directory of this distribution and at |
3 | | // http://rust-lang.org/COPYRIGHT. |
4 | | // |
5 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or |
6 | | // http://www.apache.org/licenses/LICENSE-2.0> or the MIT license |
7 | | // <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your |
8 | | // option. This file may not be copied, modified, or distributed |
9 | | // except according to those terms. |
10 | | |
11 | | //! Support for matching file paths against Unix shell style patterns. |
12 | | //! |
13 | | //! The `glob` and `glob_with` functions allow querying the filesystem for all |
14 | | //! files that match a particular pattern (similar to the libc `glob` function). |
15 | | //! The methods on the `Pattern` type provide functionality for checking if |
16 | | //! individual paths match a particular pattern (similar to the libc `fnmatch` |
17 | | //! function). |
18 | | //! |
19 | | //! For consistency across platforms, and for Windows support, this module |
20 | | //! is implemented entirely in Rust rather than deferring to the libc |
21 | | //! `glob`/`fnmatch` functions. |
22 | | //! |
23 | | //! # Examples |
24 | | //! |
25 | | //! To print all jpg files in `/media/` and all of its subdirectories. |
26 | | //! |
27 | | //! ```rust,no_run |
28 | | //! use glob::glob; |
29 | | //! |
30 | | //! for entry in glob("/media/**/*.jpg").expect("Failed to read glob pattern") { |
31 | | //! match entry { |
32 | | //! Ok(path) => println!("{:?}", path.display()), |
33 | | //! Err(e) => println!("{:?}", e), |
34 | | //! } |
35 | | //! } |
36 | | //! ``` |
37 | | //! |
38 | | //! To print all files containing the letter "a", case insensitive, in a `local` |
39 | | //! directory relative to the current working directory. This ignores errors |
40 | | //! instead of printing them. |
41 | | //! |
42 | | //! ```rust,no_run |
43 | | //! use glob::glob_with; |
44 | | //! use glob::MatchOptions; |
45 | | //! |
46 | | //! let options = MatchOptions { |
47 | | //! case_sensitive: false, |
48 | | //! require_literal_separator: false, |
49 | | //! require_literal_leading_dot: false, |
50 | | //! }; |
51 | | //! for entry in glob_with("local/*a*", options).unwrap() { |
52 | | //! if let Ok(path) = entry { |
53 | | //! println!("{:?}", path.display()) |
54 | | //! } |
55 | | //! } |
56 | | //! ``` |
57 | | |
58 | | #![doc( |
59 | | html_logo_url = "https://www.rust-lang.org/logos/rust-logo-128x128-blk-v2.png", |
60 | | html_favicon_url = "https://www.rust-lang.org/favicon.ico", |
61 | | html_root_url = "https://docs.rs/glob/0.3.1" |
62 | | )] |
63 | | #![deny(missing_docs)] |
64 | | #![allow(clippy::while_let_loop)] |
65 | | |
66 | | #[cfg(test)] |
67 | | #[macro_use] |
68 | | extern crate doc_comment; |
69 | | |
70 | | #[cfg(test)] |
71 | | doctest!("../README.md"); |
72 | | |
73 | | use std::cmp; |
74 | | use std::cmp::Ordering; |
75 | | use std::error::Error; |
76 | | use std::fmt; |
77 | | use std::fs; |
78 | | use std::fs::DirEntry; |
79 | | use std::io; |
80 | | use std::ops::Deref; |
81 | | use std::path::{self, Component, Path, PathBuf}; |
82 | | use std::str::FromStr; |
83 | | |
84 | | use CharSpecifier::{CharRange, SingleChar}; |
85 | | use MatchResult::{EntirePatternDoesntMatch, Match, SubPatternDoesntMatch}; |
86 | | use PatternToken::AnyExcept; |
87 | | use PatternToken::{AnyChar, AnyRecursiveSequence, AnySequence, AnyWithin, Char}; |
88 | | |
89 | | /// An iterator that yields `Path`s from the filesystem that match a particular |
90 | | /// pattern. |
91 | | /// |
92 | | /// Note that it yields `GlobResult` in order to report any `IoErrors` that may |
93 | | /// arise during iteration. If a directory matches but is unreadable, |
94 | | /// thereby preventing its contents from being checked for matches, a |
95 | | /// `GlobError` is returned to express this. |
96 | | /// |
97 | | /// See the `glob` function for more details. |
98 | | #[derive(Debug)] |
99 | | pub struct Paths { |
100 | | dir_patterns: Vec<Pattern>, |
101 | | require_dir: bool, |
102 | | options: MatchOptions, |
103 | | todo: Vec<Result<(PathWrapper, usize), GlobError>>, |
104 | | scope: Option<PathWrapper>, |
105 | | } |
106 | | |
107 | | /// Return an iterator that produces all the `Path`s that match the given |
108 | | /// pattern using default match options, which may be absolute or relative to |
109 | | /// the current working directory. |
110 | | /// |
111 | | /// This may return an error if the pattern is invalid. |
112 | | /// |
113 | | /// This method uses the default match options and is equivalent to calling |
114 | | /// `glob_with(pattern, MatchOptions::new())`. Use `glob_with` directly if you |
115 | | /// want to use non-default match options. |
116 | | /// |
117 | | /// When iterating, each result is a `GlobResult` which expresses the |
118 | | /// possibility that there was an `IoError` when attempting to read the contents |
119 | | /// of the matched path. In other words, each item returned by the iterator |
120 | | /// will either be an `Ok(Path)` if the path matched, or an `Err(GlobError)` if |
121 | | /// the path (partially) matched _but_ its contents could not be read in order |
122 | | /// to determine if its contents matched. |
123 | | /// |
124 | | /// See the `Paths` documentation for more information. |
125 | | /// |
126 | | /// # Examples |
127 | | /// |
128 | | /// Consider a directory `/media/pictures` containing only the files |
129 | | /// `kittens.jpg`, `puppies.jpg` and `hamsters.gif`: |
130 | | /// |
131 | | /// ```rust,no_run |
132 | | /// use glob::glob; |
133 | | /// |
134 | | /// for entry in glob("/media/pictures/*.jpg").unwrap() { |
135 | | /// match entry { |
136 | | /// Ok(path) => println!("{:?}", path.display()), |
137 | | /// |
138 | | /// // if the path matched but was unreadable, |
139 | | /// // thereby preventing its contents from matching |
140 | | /// Err(e) => println!("{:?}", e), |
141 | | /// } |
142 | | /// } |
143 | | /// ``` |
144 | | /// |
145 | | /// The above code will print: |
146 | | /// |
147 | | /// ```ignore |
148 | | /// /media/pictures/kittens.jpg |
149 | | /// /media/pictures/puppies.jpg |
150 | | /// ``` |
151 | | /// |
152 | | /// If you want to ignore unreadable paths, you can use something like |
153 | | /// `filter_map`: |
154 | | /// |
155 | | /// ```rust |
156 | | /// use glob::glob; |
157 | | /// use std::result::Result; |
158 | | /// |
159 | | /// for path in glob("/media/pictures/*.jpg").unwrap().filter_map(Result::ok) { |
160 | | /// println!("{}", path.display()); |
161 | | /// } |
162 | | /// ``` |
163 | | /// Paths are yielded in alphabetical order. |
164 | 0 | pub fn glob(pattern: &str) -> Result<Paths, PatternError> { |
165 | 0 | glob_with(pattern, MatchOptions::new()) |
166 | 0 | } |
167 | | |
168 | | /// Return an iterator that produces all the `Path`s that match the given |
169 | | /// pattern using the specified match options, which may be absolute or relative |
170 | | /// to the current working directory. |
171 | | /// |
172 | | /// This may return an error if the pattern is invalid. |
173 | | /// |
174 | | /// This function accepts Unix shell style patterns as described by |
175 | | /// `Pattern::new(..)`. The options given are passed through unchanged to |
176 | | /// `Pattern::matches_with(..)` with the exception that |
177 | | /// `require_literal_separator` is always set to `true` regardless of the value |
178 | | /// passed to this function. |
179 | | /// |
180 | | /// Paths are yielded in alphabetical order. |
181 | 0 | pub fn glob_with(pattern: &str, options: MatchOptions) -> Result<Paths, PatternError> { |
182 | | #[cfg(windows)] |
183 | | fn check_windows_verbatim(p: &Path) -> bool { |
184 | | match p.components().next() { |
185 | | Some(Component::Prefix(ref p)) => { |
186 | | // Allow VerbatimDisk paths. std canonicalize() generates them, and they work fine |
187 | | p.kind().is_verbatim() |
188 | | && if let std::path::Prefix::VerbatimDisk(_) = p.kind() { |
189 | | false |
190 | | } else { |
191 | | true |
192 | | } |
193 | | } |
194 | | _ => false, |
195 | | } |
196 | | } |
197 | | #[cfg(not(windows))] |
198 | 0 | fn check_windows_verbatim(_: &Path) -> bool { |
199 | 0 | false |
200 | 0 | } |
201 | | |
202 | | #[cfg(windows)] |
203 | | fn to_scope(p: &Path) -> PathBuf { |
204 | | // FIXME handle volume relative paths here |
205 | | p.to_path_buf() |
206 | | } |
207 | | #[cfg(not(windows))] |
208 | 0 | fn to_scope(p: &Path) -> PathBuf { |
209 | 0 | p.to_path_buf() |
210 | 0 | } |
211 | | |
212 | | // make sure that the pattern is valid first, else early return with error |
213 | 0 | let _ = Pattern::new(pattern)?; |
214 | | |
215 | 0 | let mut components = Path::new(pattern).components().peekable(); |
216 | | loop { |
217 | 0 | match components.peek() { |
218 | 0 | Some(&Component::Prefix(..)) | Some(&Component::RootDir) => { |
219 | 0 | components.next(); |
220 | 0 | } |
221 | 0 | _ => break, |
222 | | } |
223 | | } |
224 | 0 | let rest = components.map(|s| s.as_os_str()).collect::<PathBuf>(); |
225 | 0 | let normalized_pattern = Path::new(pattern).iter().collect::<PathBuf>(); |
226 | 0 | let root_len = normalized_pattern.to_str().unwrap().len() - rest.to_str().unwrap().len(); |
227 | 0 | let root = if root_len > 0 { |
228 | 0 | Some(Path::new(&pattern[..root_len])) |
229 | | } else { |
230 | 0 | None |
231 | | }; |
232 | | |
233 | 0 | if root_len > 0 && check_windows_verbatim(root.unwrap()) { |
234 | | // FIXME: How do we want to handle verbatim paths? I'm inclined to |
235 | | // return nothing, since we can't very well find all UNC shares with a |
236 | | // 1-letter server name. |
237 | 0 | return Ok(Paths { |
238 | 0 | dir_patterns: Vec::new(), |
239 | 0 | require_dir: false, |
240 | 0 | options, |
241 | 0 | todo: Vec::new(), |
242 | 0 | scope: None, |
243 | 0 | }); |
244 | 0 | } |
245 | | |
246 | 0 | let scope = root.map_or_else(|| PathBuf::from("."), to_scope); |
247 | 0 | let scope = PathWrapper::from_path(scope); |
248 | | |
249 | 0 | let mut dir_patterns = Vec::new(); |
250 | 0 | let components = |
251 | 0 | pattern[cmp::min(root_len, pattern.len())..].split_terminator(path::is_separator); |
252 | | |
253 | 0 | for component in components { |
254 | 0 | dir_patterns.push(Pattern::new(component)?); |
255 | | } |
256 | | |
257 | 0 | if root_len == pattern.len() { |
258 | 0 | dir_patterns.push(Pattern { |
259 | 0 | original: "".to_string(), |
260 | 0 | tokens: Vec::new(), |
261 | 0 | is_recursive: false, |
262 | 0 | has_metachars: false, |
263 | 0 | }); |
264 | 0 | } |
265 | | |
266 | 0 | let last_is_separator = pattern.chars().next_back().map(path::is_separator); |
267 | 0 | let require_dir = last_is_separator == Some(true); |
268 | 0 | let todo = Vec::new(); |
269 | | |
270 | 0 | Ok(Paths { |
271 | 0 | dir_patterns, |
272 | 0 | require_dir, |
273 | 0 | options, |
274 | 0 | todo, |
275 | 0 | scope: Some(scope), |
276 | 0 | }) |
277 | 0 | } |
278 | | |
279 | | /// A glob iteration error. |
280 | | /// |
281 | | /// This is typically returned when a particular path cannot be read |
282 | | /// to determine if its contents match the glob pattern. This is possible |
283 | | /// if the program lacks the appropriate permissions, for example. |
284 | | #[derive(Debug)] |
285 | | pub struct GlobError { |
286 | | path: PathBuf, |
287 | | error: io::Error, |
288 | | } |
289 | | |
290 | | impl GlobError { |
291 | | /// The Path that the error corresponds to. |
292 | 0 | pub fn path(&self) -> &Path { |
293 | 0 | &self.path |
294 | 0 | } |
295 | | |
296 | | /// The error in question. |
297 | 0 | pub fn error(&self) -> &io::Error { |
298 | 0 | &self.error |
299 | 0 | } |
300 | | |
301 | | /// Consumes self, returning the _raw_ underlying `io::Error` |
302 | 0 | pub fn into_error(self) -> io::Error { |
303 | 0 | self.error |
304 | 0 | } |
305 | | } |
306 | | |
307 | | impl Error for GlobError { |
308 | | #[allow(deprecated)] |
309 | 0 | fn description(&self) -> &str { |
310 | 0 | self.error.description() |
311 | 0 | } |
312 | | |
313 | | #[allow(unknown_lints, bare_trait_objects)] |
314 | 0 | fn cause(&self) -> Option<&Error> { |
315 | 0 | Some(&self.error) |
316 | 0 | } |
317 | | } |
318 | | |
319 | | impl fmt::Display for GlobError { |
320 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
321 | 0 | write!( |
322 | 0 | f, |
323 | 0 | "attempting to read `{}` resulted in an error: {}", |
324 | 0 | self.path.display(), |
325 | | self.error |
326 | | ) |
327 | 0 | } |
328 | | } |
329 | | |
330 | | #[derive(Debug)] |
331 | | struct PathWrapper { |
332 | | path: PathBuf, |
333 | | is_directory: bool, |
334 | | } |
335 | | |
336 | | impl PathWrapper { |
337 | 0 | fn from_dir_entry(path: PathBuf, e: DirEntry) -> Self { |
338 | 0 | let is_directory = e |
339 | 0 | .file_type() |
340 | 0 | .ok() |
341 | 0 | .and_then(|file_type| { |
342 | | // We need to use fs::metadata to resolve the actual path |
343 | | // if it's a symlink. |
344 | 0 | if file_type.is_symlink() { |
345 | 0 | None |
346 | | } else { |
347 | 0 | Some(file_type.is_dir()) |
348 | | } |
349 | 0 | }) |
350 | 0 | .or_else(|| fs::metadata(&path).map(|m| m.is_dir()).ok()) |
351 | 0 | .unwrap_or(false); |
352 | 0 | Self { path, is_directory } |
353 | 0 | } |
354 | 0 | fn from_path(path: PathBuf) -> Self { |
355 | 0 | let is_directory = fs::metadata(&path).map(|m| m.is_dir()).unwrap_or(false); |
356 | 0 | Self { path, is_directory } |
357 | 0 | } |
358 | | |
359 | 0 | fn into_path(self) -> PathBuf { |
360 | 0 | self.path |
361 | 0 | } |
362 | | } |
363 | | |
364 | | impl Deref for PathWrapper { |
365 | | type Target = Path; |
366 | | |
367 | 0 | fn deref(&self) -> &Self::Target { |
368 | 0 | self.path.deref() |
369 | 0 | } |
370 | | } |
371 | | |
372 | | impl AsRef<Path> for PathWrapper { |
373 | 0 | fn as_ref(&self) -> &Path { |
374 | 0 | self.path.as_ref() |
375 | 0 | } |
376 | | } |
377 | | |
378 | | /// An alias for a glob iteration result. |
379 | | /// |
380 | | /// This represents either a matched path or a glob iteration error, |
381 | | /// such as failing to read a particular directory's contents. |
382 | | pub type GlobResult = Result<PathBuf, GlobError>; |
383 | | |
384 | | impl Iterator for Paths { |
385 | | type Item = GlobResult; |
386 | | |
387 | 0 | fn next(&mut self) -> Option<GlobResult> { |
388 | | // the todo buffer hasn't been initialized yet, so it's done at this |
389 | | // point rather than in glob() so that the errors are unified that is, |
390 | | // failing to fill the buffer is an iteration error construction of the |
391 | | // iterator (i.e. glob()) only fails if it fails to compile the Pattern |
392 | 0 | if let Some(scope) = self.scope.take() { |
393 | 0 | if !self.dir_patterns.is_empty() { |
394 | | // Shouldn't happen, but we're using -1 as a special index. |
395 | 0 | assert!(self.dir_patterns.len() < usize::MAX); |
396 | | |
397 | 0 | fill_todo(&mut self.todo, &self.dir_patterns, 0, &scope, self.options); |
398 | 0 | } |
399 | 0 | } |
400 | | |
401 | | loop { |
402 | 0 | if self.dir_patterns.is_empty() || self.todo.is_empty() { |
403 | 0 | return None; |
404 | 0 | } |
405 | | |
406 | 0 | let (path, mut idx) = match self.todo.pop().unwrap() { |
407 | 0 | Ok(pair) => pair, |
408 | 0 | Err(e) => return Some(Err(e)), |
409 | | }; |
410 | | |
411 | | // idx -1: was already checked by fill_todo, maybe path was '.' or |
412 | | // '..' that we can't match here because of normalization. |
413 | 0 | if idx == usize::MAX { |
414 | 0 | if self.require_dir && !path.is_directory { |
415 | 0 | continue; |
416 | 0 | } |
417 | 0 | return Some(Ok(path.into_path())); |
418 | 0 | } |
419 | | |
420 | 0 | if self.dir_patterns[idx].is_recursive { |
421 | 0 | let mut next = idx; |
422 | | |
423 | | // collapse consecutive recursive patterns |
424 | 0 | while (next + 1) < self.dir_patterns.len() |
425 | 0 | && self.dir_patterns[next + 1].is_recursive |
426 | 0 | { |
427 | 0 | next += 1; |
428 | 0 | } |
429 | | |
430 | 0 | if path.is_directory { |
431 | | // the path is a directory, so it's a match |
432 | | |
433 | | // push this directory's contents |
434 | 0 | fill_todo( |
435 | 0 | &mut self.todo, |
436 | 0 | &self.dir_patterns, |
437 | 0 | next, |
438 | 0 | &path, |
439 | 0 | self.options, |
440 | | ); |
441 | | |
442 | 0 | if next == self.dir_patterns.len() - 1 { |
443 | | // pattern ends in recursive pattern, so return this |
444 | | // directory as a result |
445 | 0 | return Some(Ok(path.into_path())); |
446 | 0 | } else { |
447 | 0 | // advanced to the next pattern for this path |
448 | 0 | idx = next + 1; |
449 | 0 | } |
450 | 0 | } else if next == self.dir_patterns.len() - 1 { |
451 | | // not a directory and it's the last pattern, meaning no |
452 | | // match |
453 | 0 | continue; |
454 | 0 | } else { |
455 | 0 | // advanced to the next pattern for this path |
456 | 0 | idx = next + 1; |
457 | 0 | } |
458 | 0 | } |
459 | | |
460 | | // not recursive, so match normally |
461 | 0 | if self.dir_patterns[idx].matches_with( |
462 | | { |
463 | 0 | match path.file_name().and_then(|s| s.to_str()) { |
464 | | // FIXME (#9639): How do we handle non-utf8 filenames? |
465 | | // Ignore them for now; ideally we'd still match them |
466 | | // against a * |
467 | 0 | None => continue, |
468 | 0 | Some(x) => x, |
469 | | } |
470 | | }, |
471 | 0 | self.options, |
472 | | ) { |
473 | 0 | if idx == self.dir_patterns.len() - 1 { |
474 | | // it is not possible for a pattern to match a directory |
475 | | // *AND* its children so we don't need to check the |
476 | | // children |
477 | | |
478 | 0 | if !self.require_dir || path.is_directory { |
479 | 0 | return Some(Ok(path.into_path())); |
480 | 0 | } |
481 | 0 | } else { |
482 | 0 | fill_todo( |
483 | 0 | &mut self.todo, |
484 | 0 | &self.dir_patterns, |
485 | 0 | idx + 1, |
486 | 0 | &path, |
487 | 0 | self.options, |
488 | 0 | ); |
489 | 0 | } |
490 | 0 | } |
491 | | } |
492 | 0 | } |
493 | | } |
494 | | |
495 | | /// A pattern parsing error. |
496 | | #[derive(Debug)] |
497 | | #[allow(missing_copy_implementations)] |
498 | | pub struct PatternError { |
499 | | /// The approximate character index of where the error occurred. |
500 | | pub pos: usize, |
501 | | |
502 | | /// A message describing the error. |
503 | | pub msg: &'static str, |
504 | | } |
505 | | |
506 | | impl Error for PatternError { |
507 | 0 | fn description(&self) -> &str { |
508 | 0 | self.msg |
509 | 0 | } |
510 | | } |
511 | | |
512 | | impl fmt::Display for PatternError { |
513 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
514 | 0 | write!( |
515 | 0 | f, |
516 | 0 | "Pattern syntax error near position {}: {}", |
517 | | self.pos, self.msg |
518 | | ) |
519 | 0 | } |
520 | | } |
521 | | |
522 | | /// A compiled Unix shell style pattern. |
523 | | /// |
524 | | /// - `?` matches any single character. |
525 | | /// |
526 | | /// - `*` matches any (possibly empty) sequence of characters. |
527 | | /// |
528 | | /// - `**` matches the current directory and arbitrary |
529 | | /// subdirectories. To match files in arbitrary subdirectories, use |
530 | | /// `**/*`. |
531 | | /// |
532 | | /// This sequence **must** form a single path component, so both |
533 | | /// `**a` and `b**` are invalid and will result in an error. A |
534 | | /// sequence of more than two consecutive `*` characters is also |
535 | | /// invalid. |
536 | | /// |
537 | | /// - `[...]` matches any character inside the brackets. Character sequences |
538 | | /// can also specify ranges of characters, as ordered by Unicode, so e.g. |
539 | | /// `[0-9]` specifies any character between 0 and 9 inclusive. An unclosed |
540 | | /// bracket is invalid. |
541 | | /// |
542 | | /// - `[!...]` is the negation of `[...]`, i.e. it matches any characters |
543 | | /// **not** in the brackets. |
544 | | /// |
545 | | /// - The metacharacters `?`, `*`, `[`, `]` can be matched by using brackets |
546 | | /// (e.g. `[?]`). When a `]` occurs immediately following `[` or `[!` then it |
547 | | /// is interpreted as being part of, rather then ending, the character set, so |
548 | | /// `]` and NOT `]` can be matched by `[]]` and `[!]]` respectively. The `-` |
549 | | /// character can be specified inside a character sequence pattern by placing |
550 | | /// it at the start or the end, e.g. `[abc-]`. |
551 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default, Debug)] |
552 | | pub struct Pattern { |
553 | | original: String, |
554 | | tokens: Vec<PatternToken>, |
555 | | is_recursive: bool, |
556 | | /// A bool value that indicates whether the pattern contains any metacharacters. |
557 | | /// We use this information for some fast path optimizations. |
558 | | has_metachars: bool, |
559 | | } |
560 | | |
561 | | /// Show the original glob pattern. |
562 | | impl fmt::Display for Pattern { |
563 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
564 | 0 | self.original.fmt(f) |
565 | 0 | } |
566 | | } |
567 | | |
568 | | impl FromStr for Pattern { |
569 | | type Err = PatternError; |
570 | | |
571 | 0 | fn from_str(s: &str) -> Result<Self, PatternError> { |
572 | 0 | Self::new(s) |
573 | 0 | } |
574 | | } |
575 | | |
576 | | #[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
577 | | enum PatternToken { |
578 | | Char(char), |
579 | | AnyChar, |
580 | | AnySequence, |
581 | | AnyRecursiveSequence, |
582 | | AnyWithin(Vec<CharSpecifier>), |
583 | | AnyExcept(Vec<CharSpecifier>), |
584 | | } |
585 | | |
586 | | #[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] |
587 | | enum CharSpecifier { |
588 | | SingleChar(char), |
589 | | CharRange(char, char), |
590 | | } |
591 | | |
592 | | #[derive(Copy, Clone, PartialEq)] |
593 | | enum MatchResult { |
594 | | Match, |
595 | | SubPatternDoesntMatch, |
596 | | EntirePatternDoesntMatch, |
597 | | } |
598 | | |
599 | | const ERROR_WILDCARDS: &str = "wildcards are either regular `*` or recursive `**`"; |
600 | | const ERROR_RECURSIVE_WILDCARDS: &str = "recursive wildcards must form a single path \ |
601 | | component"; |
602 | | const ERROR_INVALID_RANGE: &str = "invalid range pattern"; |
603 | | |
604 | | impl Pattern { |
605 | | /// This function compiles Unix shell style patterns. |
606 | | /// |
607 | | /// An invalid glob pattern will yield a `PatternError`. |
608 | 0 | pub fn new(pattern: &str) -> Result<Self, PatternError> { |
609 | 0 | let chars = pattern.chars().collect::<Vec<_>>(); |
610 | 0 | let mut tokens = Vec::new(); |
611 | 0 | let mut is_recursive = false; |
612 | 0 | let mut has_metachars = false; |
613 | 0 | let mut i = 0; |
614 | | |
615 | 0 | while i < chars.len() { |
616 | 0 | match chars[i] { |
617 | 0 | '?' => { |
618 | 0 | has_metachars = true; |
619 | 0 | tokens.push(AnyChar); |
620 | 0 | i += 1; |
621 | 0 | } |
622 | | '*' => { |
623 | 0 | has_metachars = true; |
624 | | |
625 | 0 | let old = i; |
626 | | |
627 | 0 | while i < chars.len() && chars[i] == '*' { |
628 | 0 | i += 1; |
629 | 0 | } |
630 | | |
631 | 0 | let count = i - old; |
632 | | |
633 | 0 | match count.cmp(&2) { |
634 | | Ordering::Greater => { |
635 | 0 | return Err(PatternError { |
636 | 0 | pos: old + 2, |
637 | 0 | msg: ERROR_WILDCARDS, |
638 | 0 | }) |
639 | | } |
640 | | Ordering::Equal => { |
641 | | // ** can only be an entire path component |
642 | | // i.e. a/**/b is valid, but a**/b or a/**b is not |
643 | | // invalid matches are treated literally |
644 | 0 | let is_valid = if i == 2 || path::is_separator(chars[i - count - 1]) { |
645 | | // it ends in a '/' |
646 | 0 | if i < chars.len() && path::is_separator(chars[i]) { |
647 | 0 | i += 1; |
648 | 0 | true |
649 | | // or the pattern ends here |
650 | | // this enables the existing globbing mechanism |
651 | 0 | } else if i == chars.len() { |
652 | 0 | true |
653 | | // `**` ends in non-separator |
654 | | } else { |
655 | 0 | return Err(PatternError { |
656 | 0 | pos: i, |
657 | 0 | msg: ERROR_RECURSIVE_WILDCARDS, |
658 | 0 | }); |
659 | | } |
660 | | // `**` begins with non-separator |
661 | | } else { |
662 | 0 | return Err(PatternError { |
663 | 0 | pos: old - 1, |
664 | 0 | msg: ERROR_RECURSIVE_WILDCARDS, |
665 | 0 | }); |
666 | | }; |
667 | | |
668 | 0 | if is_valid { |
669 | | // collapse consecutive AnyRecursiveSequence to a |
670 | | // single one |
671 | | |
672 | 0 | let tokens_len = tokens.len(); |
673 | | |
674 | 0 | if !(tokens_len > 1 |
675 | 0 | && tokens[tokens_len - 1] == AnyRecursiveSequence) |
676 | 0 | { |
677 | 0 | is_recursive = true; |
678 | 0 | tokens.push(AnyRecursiveSequence); |
679 | 0 | } |
680 | 0 | } |
681 | | } |
682 | 0 | Ordering::Less => tokens.push(AnySequence), |
683 | | } |
684 | | } |
685 | | '[' => { |
686 | 0 | has_metachars = true; |
687 | | |
688 | 0 | if i + 4 <= chars.len() && chars[i + 1] == '!' { |
689 | 0 | match chars[i + 3..].iter().position(|x| *x == ']') { |
690 | 0 | None => (), |
691 | 0 | Some(j) => { |
692 | 0 | let chars = &chars[i + 2..i + 3 + j]; |
693 | 0 | let cs = parse_char_specifiers(chars); |
694 | 0 | tokens.push(AnyExcept(cs)); |
695 | 0 | i += j + 4; |
696 | 0 | continue; |
697 | | } |
698 | | } |
699 | 0 | } else if i + 3 <= chars.len() && chars[i + 1] != '!' { |
700 | 0 | match chars[i + 2..].iter().position(|x| *x == ']') { |
701 | 0 | None => (), |
702 | 0 | Some(j) => { |
703 | 0 | let cs = parse_char_specifiers(&chars[i + 1..i + 2 + j]); |
704 | 0 | tokens.push(AnyWithin(cs)); |
705 | 0 | i += j + 3; |
706 | 0 | continue; |
707 | | } |
708 | | } |
709 | 0 | } |
710 | | |
711 | | // if we get here then this is not a valid range pattern |
712 | 0 | return Err(PatternError { |
713 | 0 | pos: i, |
714 | 0 | msg: ERROR_INVALID_RANGE, |
715 | 0 | }); |
716 | | } |
717 | 0 | c => { |
718 | 0 | tokens.push(Char(c)); |
719 | 0 | i += 1; |
720 | 0 | } |
721 | | } |
722 | | } |
723 | | |
724 | 0 | Ok(Self { |
725 | 0 | tokens, |
726 | 0 | original: pattern.to_string(), |
727 | 0 | is_recursive, |
728 | 0 | has_metachars, |
729 | 0 | }) |
730 | 0 | } |
731 | | |
732 | | /// Escape metacharacters within the given string by surrounding them in |
733 | | /// brackets. The resulting string will, when compiled into a `Pattern`, |
734 | | /// match the input string and nothing else. |
735 | 0 | pub fn escape(s: &str) -> String { |
736 | 0 | let mut escaped = String::new(); |
737 | 0 | for c in s.chars() { |
738 | 0 | match c { |
739 | | // note that ! does not need escaping because it is only special |
740 | | // inside brackets |
741 | 0 | '?' | '*' | '[' | ']' => { |
742 | 0 | escaped.push('['); |
743 | 0 | escaped.push(c); |
744 | 0 | escaped.push(']'); |
745 | 0 | } |
746 | 0 | c => { |
747 | 0 | escaped.push(c); |
748 | 0 | } |
749 | | } |
750 | | } |
751 | 0 | escaped |
752 | 0 | } |
753 | | |
754 | | /// Return if the given `str` matches this `Pattern` using the default |
755 | | /// match options (i.e. `MatchOptions::new()`). |
756 | | /// |
757 | | /// # Examples |
758 | | /// |
759 | | /// ```rust |
760 | | /// use glob::Pattern; |
761 | | /// |
762 | | /// assert!(Pattern::new("c?t").unwrap().matches("cat")); |
763 | | /// assert!(Pattern::new("k[!e]tteh").unwrap().matches("kitteh")); |
764 | | /// assert!(Pattern::new("d*g").unwrap().matches("doog")); |
765 | | /// ``` |
766 | 0 | pub fn matches(&self, str: &str) -> bool { |
767 | 0 | self.matches_with(str, MatchOptions::new()) |
768 | 0 | } |
769 | | |
770 | | /// Return if the given `Path`, when converted to a `str`, matches this |
771 | | /// `Pattern` using the default match options (i.e. `MatchOptions::new()`). |
772 | 0 | pub fn matches_path(&self, path: &Path) -> bool { |
773 | | // FIXME (#9639): This needs to handle non-utf8 paths |
774 | 0 | path.to_str().map_or(false, |s| self.matches(s)) |
775 | 0 | } |
776 | | |
777 | | /// Return if the given `str` matches this `Pattern` using the specified |
778 | | /// match options. |
779 | 0 | pub fn matches_with(&self, str: &str, options: MatchOptions) -> bool { |
780 | 0 | self.matches_from(true, str.chars(), 0, options) == Match |
781 | 0 | } |
782 | | |
783 | | /// Return if the given `Path`, when converted to a `str`, matches this |
784 | | /// `Pattern` using the specified match options. |
785 | 0 | pub fn matches_path_with(&self, path: &Path, options: MatchOptions) -> bool { |
786 | | // FIXME (#9639): This needs to handle non-utf8 paths |
787 | 0 | path.to_str() |
788 | 0 | .map_or(false, |s| self.matches_with(s, options)) |
789 | 0 | } |
790 | | |
791 | | /// Access the original glob pattern. |
792 | 0 | pub fn as_str(&self) -> &str { |
793 | 0 | &self.original |
794 | 0 | } |
795 | | |
796 | 0 | fn matches_from( |
797 | 0 | &self, |
798 | 0 | mut follows_separator: bool, |
799 | 0 | mut file: std::str::Chars, |
800 | 0 | i: usize, |
801 | 0 | options: MatchOptions, |
802 | 0 | ) -> MatchResult { |
803 | 0 | for (ti, token) in self.tokens[i..].iter().enumerate() { |
804 | 0 | match *token { |
805 | | AnySequence | AnyRecursiveSequence => { |
806 | | // ** must be at the start. |
807 | 0 | debug_assert!(match *token { |
808 | 0 | AnyRecursiveSequence => follows_separator, |
809 | 0 | _ => true, |
810 | | }); |
811 | | |
812 | | // Empty match |
813 | 0 | match self.matches_from(follows_separator, file.clone(), i + ti + 1, options) { |
814 | 0 | SubPatternDoesntMatch => (), // keep trying |
815 | 0 | m => return m, |
816 | | }; |
817 | | |
818 | 0 | while let Some(c) = file.next() { |
819 | 0 | if follows_separator && options.require_literal_leading_dot && c == '.' { |
820 | 0 | return SubPatternDoesntMatch; |
821 | 0 | } |
822 | 0 | follows_separator = path::is_separator(c); |
823 | 0 | match *token { |
824 | 0 | AnyRecursiveSequence if !follows_separator => continue, |
825 | | AnySequence |
826 | 0 | if options.require_literal_separator && follows_separator => |
827 | | { |
828 | 0 | return SubPatternDoesntMatch |
829 | | } |
830 | 0 | _ => (), |
831 | | } |
832 | 0 | match self.matches_from( |
833 | 0 | follows_separator, |
834 | 0 | file.clone(), |
835 | 0 | i + ti + 1, |
836 | 0 | options, |
837 | 0 | ) { |
838 | 0 | SubPatternDoesntMatch => (), // keep trying |
839 | 0 | m => return m, |
840 | | } |
841 | | } |
842 | | } |
843 | | _ => { |
844 | 0 | let c = match file.next() { |
845 | 0 | Some(c) => c, |
846 | 0 | None => return EntirePatternDoesntMatch, |
847 | | }; |
848 | | |
849 | 0 | let is_sep = path::is_separator(c); |
850 | | |
851 | 0 | if !match *token { |
852 | | AnyChar | AnyWithin(..) | AnyExcept(..) |
853 | 0 | if (options.require_literal_separator && is_sep) |
854 | 0 | || (follows_separator |
855 | 0 | && options.require_literal_leading_dot |
856 | 0 | && c == '.') => |
857 | | { |
858 | 0 | false |
859 | | } |
860 | 0 | AnyChar => true, |
861 | 0 | AnyWithin(ref specifiers) => in_char_specifiers(specifiers, c, options), |
862 | 0 | AnyExcept(ref specifiers) => !in_char_specifiers(specifiers, c, options), |
863 | 0 | Char(c2) => chars_eq(c, c2, options.case_sensitive), |
864 | 0 | AnySequence | AnyRecursiveSequence => unreachable!(), |
865 | | } { |
866 | 0 | return SubPatternDoesntMatch; |
867 | 0 | } |
868 | 0 | follows_separator = is_sep; |
869 | | } |
870 | | } |
871 | | } |
872 | | |
873 | | // Iter is fused. |
874 | 0 | if file.next().is_none() { |
875 | 0 | Match |
876 | | } else { |
877 | 0 | SubPatternDoesntMatch |
878 | | } |
879 | 0 | } |
880 | | } |
881 | | |
882 | | // Fills `todo` with paths under `path` to be matched by `patterns[idx]`, |
883 | | // special-casing patterns to match `.` and `..`, and avoiding `readdir()` |
884 | | // calls when there are no metacharacters in the pattern. |
885 | 0 | fn fill_todo( |
886 | 0 | todo: &mut Vec<Result<(PathWrapper, usize), GlobError>>, |
887 | 0 | patterns: &[Pattern], |
888 | 0 | idx: usize, |
889 | 0 | path: &PathWrapper, |
890 | 0 | options: MatchOptions, |
891 | 0 | ) { |
892 | 0 | let add = |todo: &mut Vec<_>, next_path: PathWrapper| { |
893 | 0 | if idx + 1 == patterns.len() { |
894 | 0 | // We know it's good, so don't make the iterator match this path |
895 | 0 | // against the pattern again. In particular, it can't match |
896 | 0 | // . or .. globs since these never show up as path components. |
897 | 0 | todo.push(Ok((next_path, usize::MAX))); |
898 | 0 | } else { |
899 | 0 | fill_todo(todo, patterns, idx + 1, &next_path, options); |
900 | 0 | } |
901 | 0 | }; |
902 | | |
903 | 0 | let pattern = &patterns[idx]; |
904 | 0 | let is_dir = path.is_directory; |
905 | 0 | let curdir = path.as_ref() == Path::new("."); |
906 | 0 | match (pattern.has_metachars, is_dir) { |
907 | | (false, _) => { |
908 | 0 | debug_assert!( |
909 | 0 | pattern |
910 | 0 | .tokens |
911 | 0 | .iter() |
912 | 0 | .all(|tok| matches!(tok, PatternToken::Char(_))), |
913 | | "broken invariant: pattern has metachars but shouldn't" |
914 | | ); |
915 | 0 | let s = pattern.as_str(); |
916 | | |
917 | | // This pattern component doesn't have any metacharacters, so we |
918 | | // don't need to read the current directory to know where to |
919 | | // continue. So instead of passing control back to the iterator, |
920 | | // we can just check for that one entry and potentially recurse |
921 | | // right away. |
922 | 0 | let special = "." == s || ".." == s; |
923 | 0 | let next_path = if curdir { |
924 | 0 | PathBuf::from(s) |
925 | | } else { |
926 | 0 | path.join(s) |
927 | | }; |
928 | 0 | let next_path = PathWrapper::from_path(next_path); |
929 | 0 | if (special && is_dir) |
930 | 0 | || (!special |
931 | 0 | && (fs::metadata(&next_path).is_ok() |
932 | 0 | || fs::symlink_metadata(&next_path).is_ok())) |
933 | 0 | { |
934 | 0 | add(todo, next_path); |
935 | 0 | } |
936 | | } |
937 | | (true, true) => { |
938 | 0 | let dirs = fs::read_dir(path).and_then(|d| { |
939 | 0 | d.map(|e| { |
940 | 0 | e.map(|e| { |
941 | 0 | let path = if curdir { |
942 | 0 | PathBuf::from(e.path().file_name().unwrap()) |
943 | | } else { |
944 | 0 | e.path() |
945 | | }; |
946 | 0 | PathWrapper::from_dir_entry(path, e) |
947 | 0 | }) |
948 | 0 | }) |
949 | 0 | .collect::<Result<Vec<_>, _>>() |
950 | 0 | }); |
951 | 0 | match dirs { |
952 | 0 | Ok(mut children) => { |
953 | 0 | if options.require_literal_leading_dot { |
954 | 0 | children |
955 | 0 | .retain(|x| !x.file_name().unwrap().to_str().unwrap().starts_with('.')); |
956 | 0 | } |
957 | 0 | children.sort_by(|p1, p2| p2.file_name().cmp(&p1.file_name())); |
958 | 0 | todo.extend(children.into_iter().map(|x| Ok((x, idx)))); |
959 | | |
960 | | // Matching the special directory entries . and .. that |
961 | | // refer to the current and parent directory respectively |
962 | | // requires that the pattern has a leading dot, even if the |
963 | | // `MatchOptions` field `require_literal_leading_dot` is not |
964 | | // set. |
965 | 0 | if !pattern.tokens.is_empty() && pattern.tokens[0] == Char('.') { |
966 | 0 | for &special in &[".", ".."] { |
967 | 0 | if pattern.matches_with(special, options) { |
968 | 0 | add(todo, PathWrapper::from_path(path.join(special))); |
969 | 0 | } |
970 | | } |
971 | 0 | } |
972 | | } |
973 | 0 | Err(e) => { |
974 | 0 | todo.push(Err(GlobError { |
975 | 0 | path: path.to_path_buf(), |
976 | 0 | error: e, |
977 | 0 | })); |
978 | 0 | } |
979 | | } |
980 | | } |
981 | 0 | (true, false) => { |
982 | 0 | // not a directory, nothing more to find |
983 | 0 | } |
984 | | } |
985 | 0 | } |
986 | | |
987 | 0 | fn parse_char_specifiers(s: &[char]) -> Vec<CharSpecifier> { |
988 | 0 | let mut cs = Vec::new(); |
989 | 0 | let mut i = 0; |
990 | 0 | while i < s.len() { |
991 | 0 | if i + 3 <= s.len() && s[i + 1] == '-' { |
992 | 0 | cs.push(CharRange(s[i], s[i + 2])); |
993 | 0 | i += 3; |
994 | 0 | } else { |
995 | 0 | cs.push(SingleChar(s[i])); |
996 | 0 | i += 1; |
997 | 0 | } |
998 | | } |
999 | 0 | cs |
1000 | 0 | } |
1001 | | |
1002 | 0 | fn in_char_specifiers(specifiers: &[CharSpecifier], c: char, options: MatchOptions) -> bool { |
1003 | 0 | for &specifier in specifiers.iter() { |
1004 | 0 | match specifier { |
1005 | 0 | SingleChar(sc) => { |
1006 | 0 | if chars_eq(c, sc, options.case_sensitive) { |
1007 | 0 | return true; |
1008 | 0 | } |
1009 | | } |
1010 | 0 | CharRange(start, end) => { |
1011 | | // FIXME: work with non-ascii chars properly (issue #1347) |
1012 | 0 | if !options.case_sensitive && c.is_ascii() && start.is_ascii() && end.is_ascii() { |
1013 | 0 | let start = start.to_ascii_lowercase(); |
1014 | 0 | let end = end.to_ascii_lowercase(); |
1015 | | |
1016 | 0 | let start_up = start.to_uppercase().next().unwrap(); |
1017 | 0 | let end_up = end.to_uppercase().next().unwrap(); |
1018 | | |
1019 | | // only allow case insensitive matching when |
1020 | | // both start and end are within a-z or A-Z |
1021 | 0 | if start != start_up && end != end_up { |
1022 | 0 | let c = c.to_ascii_lowercase(); |
1023 | 0 | if c >= start && c <= end { |
1024 | 0 | return true; |
1025 | 0 | } |
1026 | 0 | } |
1027 | 0 | } |
1028 | | |
1029 | 0 | if c >= start && c <= end { |
1030 | 0 | return true; |
1031 | 0 | } |
1032 | | } |
1033 | | } |
1034 | | } |
1035 | | |
1036 | 0 | false |
1037 | 0 | } |
1038 | | |
1039 | | /// A helper function to determine if two chars are (possibly case-insensitively) equal. |
1040 | 0 | fn chars_eq(a: char, b: char, case_sensitive: bool) -> bool { |
1041 | 0 | if cfg!(windows) && path::is_separator(a) && path::is_separator(b) { |
1042 | 0 | true |
1043 | 0 | } else if !case_sensitive && a.is_ascii() && b.is_ascii() { |
1044 | | // FIXME: work with non-ascii chars properly (issue #9084) |
1045 | 0 | a.eq_ignore_ascii_case(&b) |
1046 | | } else { |
1047 | 0 | a == b |
1048 | | } |
1049 | 0 | } |
1050 | | |
1051 | | /// Configuration options to modify the behaviour of `Pattern::matches_with(..)`. |
1052 | | #[allow(missing_copy_implementations)] |
1053 | | #[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Default)] |
1054 | | pub struct MatchOptions { |
1055 | | /// Whether or not patterns should be matched in a case-sensitive manner. |
1056 | | /// This currently only considers upper/lower case relationships between |
1057 | | /// ASCII characters, but in future this might be extended to work with |
1058 | | /// Unicode. |
1059 | | pub case_sensitive: bool, |
1060 | | |
1061 | | /// Whether or not path-component separator characters (e.g. `/` on |
1062 | | /// Posix) must be matched by a literal `/`, rather than by `*` or `?` or |
1063 | | /// `[...]`. |
1064 | | pub require_literal_separator: bool, |
1065 | | |
1066 | | /// Whether or not paths that contain components that start with a `.` |
1067 | | /// will require that `.` appears literally in the pattern; `*`, `?`, `**`, |
1068 | | /// or `[...]` will not match. This is useful because such files are |
1069 | | /// conventionally considered hidden on Unix systems and it might be |
1070 | | /// desirable to skip them when listing files. |
1071 | | pub require_literal_leading_dot: bool, |
1072 | | } |
1073 | | |
1074 | | impl MatchOptions { |
1075 | | /// Constructs a new `MatchOptions` with default field values. This is used |
1076 | | /// when calling functions that do not take an explicit `MatchOptions` |
1077 | | /// parameter. |
1078 | | /// |
1079 | | /// This function always returns this value: |
1080 | | /// |
1081 | | /// ```rust,ignore |
1082 | | /// MatchOptions { |
1083 | | /// case_sensitive: true, |
1084 | | /// require_literal_separator: false, |
1085 | | /// require_literal_leading_dot: false |
1086 | | /// } |
1087 | | /// ``` |
1088 | | /// |
1089 | | /// # Note |
1090 | | /// The behavior of this method doesn't match `default()`'s. This returns |
1091 | | /// `case_sensitive` as `true` while `default()` does it as `false`. |
1092 | | // FIXME: Consider unity the behavior with `default()` in a next major release. |
1093 | 0 | pub fn new() -> Self { |
1094 | 0 | Self { |
1095 | 0 | case_sensitive: true, |
1096 | 0 | require_literal_separator: false, |
1097 | 0 | require_literal_leading_dot: false, |
1098 | 0 | } |
1099 | 0 | } |
1100 | | } |
1101 | | |
1102 | | #[cfg(test)] |
1103 | | mod test { |
1104 | | use super::{glob, MatchOptions, Pattern}; |
1105 | | use std::path::Path; |
1106 | | |
1107 | | #[test] |
1108 | | fn test_pattern_from_str() { |
1109 | | assert!("a*b".parse::<Pattern>().unwrap().matches("a_b")); |
1110 | | assert!("a/**b".parse::<Pattern>().unwrap_err().pos == 4); |
1111 | | } |
1112 | | |
1113 | | #[test] |
1114 | | fn test_wildcard_errors() { |
1115 | | assert!(Pattern::new("a/**b").unwrap_err().pos == 4); |
1116 | | assert!(Pattern::new("a/bc**").unwrap_err().pos == 3); |
1117 | | assert!(Pattern::new("a/*****").unwrap_err().pos == 4); |
1118 | | assert!(Pattern::new("a/b**c**d").unwrap_err().pos == 2); |
1119 | | assert!(Pattern::new("a**b").unwrap_err().pos == 0); |
1120 | | } |
1121 | | |
1122 | | #[test] |
1123 | | fn test_unclosed_bracket_errors() { |
1124 | | assert!(Pattern::new("abc[def").unwrap_err().pos == 3); |
1125 | | assert!(Pattern::new("abc[!def").unwrap_err().pos == 3); |
1126 | | assert!(Pattern::new("abc[").unwrap_err().pos == 3); |
1127 | | assert!(Pattern::new("abc[!").unwrap_err().pos == 3); |
1128 | | assert!(Pattern::new("abc[d").unwrap_err().pos == 3); |
1129 | | assert!(Pattern::new("abc[!d").unwrap_err().pos == 3); |
1130 | | assert!(Pattern::new("abc[]").unwrap_err().pos == 3); |
1131 | | assert!(Pattern::new("abc[!]").unwrap_err().pos == 3); |
1132 | | } |
1133 | | |
1134 | | #[test] |
1135 | | fn test_glob_errors() { |
1136 | | assert!(glob("a/**b").err().unwrap().pos == 4); |
1137 | | assert!(glob("abc[def").err().unwrap().pos == 3); |
1138 | | } |
1139 | | |
1140 | | // this test assumes that there is a /root directory and that |
1141 | | // the user running this test is not root or otherwise doesn't |
1142 | | // have permission to read its contents |
1143 | | #[cfg(all(unix, not(target_os = "macos")))] |
1144 | | #[test] |
1145 | | fn test_iteration_errors() { |
1146 | | use std::io; |
1147 | | let mut iter = glob("/root/*").unwrap(); |
1148 | | |
1149 | | // GlobErrors shouldn't halt iteration |
1150 | | let next = iter.next(); |
1151 | | assert!(next.is_some()); |
1152 | | |
1153 | | let err = next.unwrap(); |
1154 | | assert!(err.is_err()); |
1155 | | |
1156 | | let err = err.err().unwrap(); |
1157 | | assert!(err.path() == Path::new("/root")); |
1158 | | assert!(err.error().kind() == io::ErrorKind::PermissionDenied); |
1159 | | } |
1160 | | |
1161 | | #[test] |
1162 | | fn test_absolute_pattern() { |
1163 | | assert!(glob("/").unwrap().next().is_some()); |
1164 | | assert!(glob("//").unwrap().next().is_some()); |
1165 | | |
1166 | | // assume that the filesystem is not empty! |
1167 | | assert!(glob("/*").unwrap().next().is_some()); |
1168 | | |
1169 | | #[cfg(not(windows))] |
1170 | | fn win() {} |
1171 | | |
1172 | | #[cfg(windows)] |
1173 | | fn win() { |
1174 | | use std::env::current_dir; |
1175 | | use std::path::Component; |
1176 | | |
1177 | | // check windows absolute paths with host/device components |
1178 | | let root_with_device = current_dir() |
1179 | | .ok() |
1180 | | .and_then(|p| match p.components().next().unwrap() { |
1181 | | Component::Prefix(prefix_component) => { |
1182 | | let path = Path::new(prefix_component.as_os_str()).join("*"); |
1183 | | Some(path.to_path_buf()) |
1184 | | } |
1185 | | _ => panic!("no prefix in this path"), |
1186 | | }) |
1187 | | .unwrap(); |
1188 | | // FIXME (#9639): This needs to handle non-utf8 paths |
1189 | | assert!(glob(root_with_device.as_os_str().to_str().unwrap()) |
1190 | | .unwrap() |
1191 | | .next() |
1192 | | .is_some()); |
1193 | | } |
1194 | | win() |
1195 | | } |
1196 | | |
1197 | | #[test] |
1198 | | fn test_wildcards() { |
1199 | | assert!(Pattern::new("a*b").unwrap().matches("a_b")); |
1200 | | assert!(Pattern::new("a*b*c").unwrap().matches("abc")); |
1201 | | assert!(!Pattern::new("a*b*c").unwrap().matches("abcd")); |
1202 | | assert!(Pattern::new("a*b*c").unwrap().matches("a_b_c")); |
1203 | | assert!(Pattern::new("a*b*c").unwrap().matches("a___b___c")); |
1204 | | assert!(Pattern::new("abc*abc*abc") |
1205 | | .unwrap() |
1206 | | .matches("abcabcabcabcabcabcabc")); |
1207 | | assert!(!Pattern::new("abc*abc*abc") |
1208 | | .unwrap() |
1209 | | .matches("abcabcabcabcabcabcabca")); |
1210 | | assert!(Pattern::new("a*a*a*a*a*a*a*a*a") |
1211 | | .unwrap() |
1212 | | .matches("aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa")); |
1213 | | assert!(Pattern::new("a*b[xyz]c*d").unwrap().matches("abxcdbxcddd")); |
1214 | | } |
1215 | | |
1216 | | #[test] |
1217 | | fn test_recursive_wildcards() { |
1218 | | let pat = Pattern::new("some/**/needle.txt").unwrap(); |
1219 | | assert!(pat.matches("some/needle.txt")); |
1220 | | assert!(pat.matches("some/one/needle.txt")); |
1221 | | assert!(pat.matches("some/one/two/needle.txt")); |
1222 | | assert!(pat.matches("some/other/needle.txt")); |
1223 | | assert!(!pat.matches("some/other/notthis.txt")); |
1224 | | |
1225 | | // a single ** should be valid, for globs |
1226 | | // Should accept anything |
1227 | | let pat = Pattern::new("**").unwrap(); |
1228 | | assert!(pat.is_recursive); |
1229 | | assert!(pat.matches("abcde")); |
1230 | | assert!(pat.matches("")); |
1231 | | assert!(pat.matches(".asdf")); |
1232 | | assert!(pat.matches("/x/.asdf")); |
1233 | | |
1234 | | // collapse consecutive wildcards |
1235 | | let pat = Pattern::new("some/**/**/needle.txt").unwrap(); |
1236 | | assert!(pat.matches("some/needle.txt")); |
1237 | | assert!(pat.matches("some/one/needle.txt")); |
1238 | | assert!(pat.matches("some/one/two/needle.txt")); |
1239 | | assert!(pat.matches("some/other/needle.txt")); |
1240 | | assert!(!pat.matches("some/other/notthis.txt")); |
1241 | | |
1242 | | // ** can begin the pattern |
1243 | | let pat = Pattern::new("**/test").unwrap(); |
1244 | | assert!(pat.matches("one/two/test")); |
1245 | | assert!(pat.matches("one/test")); |
1246 | | assert!(pat.matches("test")); |
1247 | | |
1248 | | // /** can begin the pattern |
1249 | | let pat = Pattern::new("/**/test").unwrap(); |
1250 | | assert!(pat.matches("/one/two/test")); |
1251 | | assert!(pat.matches("/one/test")); |
1252 | | assert!(pat.matches("/test")); |
1253 | | assert!(!pat.matches("/one/notthis")); |
1254 | | assert!(!pat.matches("/notthis")); |
1255 | | |
1256 | | // Only start sub-patterns on start of path segment. |
1257 | | let pat = Pattern::new("**/.*").unwrap(); |
1258 | | assert!(pat.matches(".abc")); |
1259 | | assert!(pat.matches("abc/.abc")); |
1260 | | assert!(!pat.matches("ab.c")); |
1261 | | assert!(!pat.matches("abc/ab.c")); |
1262 | | } |
1263 | | |
1264 | | #[test] |
1265 | | fn test_lots_of_files() { |
1266 | | // this is a good test because it touches lots of differently named files |
1267 | | glob("/*/*/*/*").unwrap().skip(10000).next(); |
1268 | | } |
1269 | | |
1270 | | #[test] |
1271 | | fn test_range_pattern() { |
1272 | | let pat = Pattern::new("a[0-9]b").unwrap(); |
1273 | | for i in 0..10 { |
1274 | | assert!(pat.matches(&format!("a{}b", i))); |
1275 | | } |
1276 | | assert!(!pat.matches("a_b")); |
1277 | | |
1278 | | let pat = Pattern::new("a[!0-9]b").unwrap(); |
1279 | | for i in 0..10 { |
1280 | | assert!(!pat.matches(&format!("a{}b", i))); |
1281 | | } |
1282 | | assert!(pat.matches("a_b")); |
1283 | | |
1284 | | let pats = ["[a-z123]", "[1a-z23]", "[123a-z]"]; |
1285 | | for &p in pats.iter() { |
1286 | | let pat = Pattern::new(p).unwrap(); |
1287 | | for c in "abcdefghijklmnopqrstuvwxyz".chars() { |
1288 | | assert!(pat.matches(&c.to_string())); |
1289 | | } |
1290 | | for c in "ABCDEFGHIJKLMNOPQRSTUVWXYZ".chars() { |
1291 | | let options = MatchOptions { |
1292 | | case_sensitive: false, |
1293 | | ..MatchOptions::new() |
1294 | | }; |
1295 | | assert!(pat.matches_with(&c.to_string(), options)); |
1296 | | } |
1297 | | assert!(pat.matches("1")); |
1298 | | assert!(pat.matches("2")); |
1299 | | assert!(pat.matches("3")); |
1300 | | } |
1301 | | |
1302 | | let pats = ["[abc-]", "[-abc]", "[a-c-]"]; |
1303 | | for &p in pats.iter() { |
1304 | | let pat = Pattern::new(p).unwrap(); |
1305 | | assert!(pat.matches("a")); |
1306 | | assert!(pat.matches("b")); |
1307 | | assert!(pat.matches("c")); |
1308 | | assert!(pat.matches("-")); |
1309 | | assert!(!pat.matches("d")); |
1310 | | } |
1311 | | |
1312 | | let pat = Pattern::new("[2-1]").unwrap(); |
1313 | | assert!(!pat.matches("1")); |
1314 | | assert!(!pat.matches("2")); |
1315 | | |
1316 | | assert!(Pattern::new("[-]").unwrap().matches("-")); |
1317 | | assert!(!Pattern::new("[!-]").unwrap().matches("-")); |
1318 | | } |
1319 | | |
1320 | | #[test] |
1321 | | fn test_pattern_matches() { |
1322 | | let txt_pat = Pattern::new("*hello.txt").unwrap(); |
1323 | | assert!(txt_pat.matches("hello.txt")); |
1324 | | assert!(txt_pat.matches("gareth_says_hello.txt")); |
1325 | | assert!(txt_pat.matches("some/path/to/hello.txt")); |
1326 | | assert!(txt_pat.matches("some\\path\\to\\hello.txt")); |
1327 | | assert!(txt_pat.matches("/an/absolute/path/to/hello.txt")); |
1328 | | assert!(!txt_pat.matches("hello.txt-and-then-some")); |
1329 | | assert!(!txt_pat.matches("goodbye.txt")); |
1330 | | |
1331 | | let dir_pat = Pattern::new("*some/path/to/hello.txt").unwrap(); |
1332 | | assert!(dir_pat.matches("some/path/to/hello.txt")); |
1333 | | assert!(dir_pat.matches("a/bigger/some/path/to/hello.txt")); |
1334 | | assert!(!dir_pat.matches("some/path/to/hello.txt-and-then-some")); |
1335 | | assert!(!dir_pat.matches("some/other/path/to/hello.txt")); |
1336 | | } |
1337 | | |
1338 | | #[test] |
1339 | | fn test_pattern_escape() { |
1340 | | let s = "_[_]_?_*_!_"; |
1341 | | assert_eq!(Pattern::escape(s), "_[[]_[]]_[?]_[*]_!_".to_string()); |
1342 | | assert!(Pattern::new(&Pattern::escape(s)).unwrap().matches(s)); |
1343 | | } |
1344 | | |
1345 | | #[test] |
1346 | | fn test_pattern_matches_case_insensitive() { |
1347 | | let pat = Pattern::new("aBcDeFg").unwrap(); |
1348 | | let options = MatchOptions { |
1349 | | case_sensitive: false, |
1350 | | require_literal_separator: false, |
1351 | | require_literal_leading_dot: false, |
1352 | | }; |
1353 | | |
1354 | | assert!(pat.matches_with("aBcDeFg", options)); |
1355 | | assert!(pat.matches_with("abcdefg", options)); |
1356 | | assert!(pat.matches_with("ABCDEFG", options)); |
1357 | | assert!(pat.matches_with("AbCdEfG", options)); |
1358 | | } |
1359 | | |
1360 | | #[test] |
1361 | | fn test_pattern_matches_case_insensitive_range() { |
1362 | | let pat_within = Pattern::new("[a]").unwrap(); |
1363 | | let pat_except = Pattern::new("[!a]").unwrap(); |
1364 | | |
1365 | | let options_case_insensitive = MatchOptions { |
1366 | | case_sensitive: false, |
1367 | | require_literal_separator: false, |
1368 | | require_literal_leading_dot: false, |
1369 | | }; |
1370 | | let options_case_sensitive = MatchOptions { |
1371 | | case_sensitive: true, |
1372 | | require_literal_separator: false, |
1373 | | require_literal_leading_dot: false, |
1374 | | }; |
1375 | | |
1376 | | assert!(pat_within.matches_with("a", options_case_insensitive)); |
1377 | | assert!(pat_within.matches_with("A", options_case_insensitive)); |
1378 | | assert!(!pat_within.matches_with("A", options_case_sensitive)); |
1379 | | |
1380 | | assert!(!pat_except.matches_with("a", options_case_insensitive)); |
1381 | | assert!(!pat_except.matches_with("A", options_case_insensitive)); |
1382 | | assert!(pat_except.matches_with("A", options_case_sensitive)); |
1383 | | } |
1384 | | |
1385 | | #[test] |
1386 | | fn test_pattern_matches_require_literal_separator() { |
1387 | | let options_require_literal = MatchOptions { |
1388 | | case_sensitive: true, |
1389 | | require_literal_separator: true, |
1390 | | require_literal_leading_dot: false, |
1391 | | }; |
1392 | | let options_not_require_literal = MatchOptions { |
1393 | | case_sensitive: true, |
1394 | | require_literal_separator: false, |
1395 | | require_literal_leading_dot: false, |
1396 | | }; |
1397 | | |
1398 | | assert!(Pattern::new("abc/def") |
1399 | | .unwrap() |
1400 | | .matches_with("abc/def", options_require_literal)); |
1401 | | assert!(!Pattern::new("abc?def") |
1402 | | .unwrap() |
1403 | | .matches_with("abc/def", options_require_literal)); |
1404 | | assert!(!Pattern::new("abc*def") |
1405 | | .unwrap() |
1406 | | .matches_with("abc/def", options_require_literal)); |
1407 | | assert!(!Pattern::new("abc[/]def") |
1408 | | .unwrap() |
1409 | | .matches_with("abc/def", options_require_literal)); |
1410 | | |
1411 | | assert!(Pattern::new("abc/def") |
1412 | | .unwrap() |
1413 | | .matches_with("abc/def", options_not_require_literal)); |
1414 | | assert!(Pattern::new("abc?def") |
1415 | | .unwrap() |
1416 | | .matches_with("abc/def", options_not_require_literal)); |
1417 | | assert!(Pattern::new("abc*def") |
1418 | | .unwrap() |
1419 | | .matches_with("abc/def", options_not_require_literal)); |
1420 | | assert!(Pattern::new("abc[/]def") |
1421 | | .unwrap() |
1422 | | .matches_with("abc/def", options_not_require_literal)); |
1423 | | } |
1424 | | |
1425 | | #[test] |
1426 | | fn test_pattern_matches_require_literal_leading_dot() { |
1427 | | let options_require_literal_leading_dot = MatchOptions { |
1428 | | case_sensitive: true, |
1429 | | require_literal_separator: false, |
1430 | | require_literal_leading_dot: true, |
1431 | | }; |
1432 | | let options_not_require_literal_leading_dot = MatchOptions { |
1433 | | case_sensitive: true, |
1434 | | require_literal_separator: false, |
1435 | | require_literal_leading_dot: false, |
1436 | | }; |
1437 | | |
1438 | | let f = |options| { |
1439 | | Pattern::new("*.txt") |
1440 | | .unwrap() |
1441 | | .matches_with(".hello.txt", options) |
1442 | | }; |
1443 | | assert!(f(options_not_require_literal_leading_dot)); |
1444 | | assert!(!f(options_require_literal_leading_dot)); |
1445 | | |
1446 | | let f = |options| { |
1447 | | Pattern::new(".*.*") |
1448 | | .unwrap() |
1449 | | .matches_with(".hello.txt", options) |
1450 | | }; |
1451 | | assert!(f(options_not_require_literal_leading_dot)); |
1452 | | assert!(f(options_require_literal_leading_dot)); |
1453 | | |
1454 | | let f = |options| { |
1455 | | Pattern::new("aaa/bbb/*") |
1456 | | .unwrap() |
1457 | | .matches_with("aaa/bbb/.ccc", options) |
1458 | | }; |
1459 | | assert!(f(options_not_require_literal_leading_dot)); |
1460 | | assert!(!f(options_require_literal_leading_dot)); |
1461 | | |
1462 | | let f = |options| { |
1463 | | Pattern::new("aaa/bbb/*") |
1464 | | .unwrap() |
1465 | | .matches_with("aaa/bbb/c.c.c.", options) |
1466 | | }; |
1467 | | assert!(f(options_not_require_literal_leading_dot)); |
1468 | | assert!(f(options_require_literal_leading_dot)); |
1469 | | |
1470 | | let f = |options| { |
1471 | | Pattern::new("aaa/bbb/.*") |
1472 | | .unwrap() |
1473 | | .matches_with("aaa/bbb/.ccc", options) |
1474 | | }; |
1475 | | assert!(f(options_not_require_literal_leading_dot)); |
1476 | | assert!(f(options_require_literal_leading_dot)); |
1477 | | |
1478 | | let f = |options| { |
1479 | | Pattern::new("aaa/?bbb") |
1480 | | .unwrap() |
1481 | | .matches_with("aaa/.bbb", options) |
1482 | | }; |
1483 | | assert!(f(options_not_require_literal_leading_dot)); |
1484 | | assert!(!f(options_require_literal_leading_dot)); |
1485 | | |
1486 | | let f = |options| { |
1487 | | Pattern::new("aaa/[.]bbb") |
1488 | | .unwrap() |
1489 | | .matches_with("aaa/.bbb", options) |
1490 | | }; |
1491 | | assert!(f(options_not_require_literal_leading_dot)); |
1492 | | assert!(!f(options_require_literal_leading_dot)); |
1493 | | |
1494 | | let f = |options| Pattern::new("**/*").unwrap().matches_with(".bbb", options); |
1495 | | assert!(f(options_not_require_literal_leading_dot)); |
1496 | | assert!(!f(options_require_literal_leading_dot)); |
1497 | | } |
1498 | | |
1499 | | #[test] |
1500 | | fn test_matches_path() { |
1501 | | // on windows, (Path::new("a/b").as_str().unwrap() == "a\\b"), so this |
1502 | | // tests that / and \ are considered equivalent on windows |
1503 | | assert!(Pattern::new("a/b").unwrap().matches_path(Path::new("a/b"))); |
1504 | | } |
1505 | | |
1506 | | #[test] |
1507 | | fn test_path_join() { |
1508 | | let pattern = Path::new("one").join(Path::new("**/*.rs")); |
1509 | | assert!(Pattern::new(pattern.to_str().unwrap()).is_ok()); |
1510 | | } |
1511 | | } |