/rust/registry/src/index.crates.io-1949cf8c6b5b557f/shell-words-1.1.1/src/lib.rs
Line | Count | Source |
1 | | // Copyright 2018 Tomasz Miąsko |
2 | | // |
3 | | // Licensed under the Apache License, Version 2.0 <LICENSE-APACHE> |
4 | | // or the MIT license <LICENSE-MIT>, at your option. |
5 | | // |
6 | | //! Process command line according to parsing rules of Unix shell as specified |
7 | | //! in [Shell Command Language in POSIX.1-2008][posix-shell]. |
8 | | //! |
9 | | //! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html |
10 | | |
11 | | #![cfg_attr(not(feature = "std"), no_std)] |
12 | | #![forbid(unsafe_code)] |
13 | | |
14 | | #[cfg(feature = "std")] |
15 | | extern crate core; |
16 | | |
17 | | use core::fmt; |
18 | | use core::mem; |
19 | | |
20 | | #[cfg(not(feature = "std"))] |
21 | | #[macro_use] |
22 | | extern crate alloc; |
23 | | |
24 | | #[cfg(not(feature = "std"))] |
25 | | use alloc::string::String; |
26 | | #[cfg(not(feature = "std"))] |
27 | | use alloc::vec::Vec; |
28 | | |
29 | | #[cfg(not(feature = "std"))] |
30 | | use alloc::borrow::Cow; |
31 | | #[cfg(feature = "std")] |
32 | | use std::borrow::Cow; |
33 | | |
34 | | /// An error returned when shell parsing fails. |
35 | | #[derive(Clone, Copy, Debug, PartialEq, Eq)] |
36 | | pub struct ParseError; |
37 | | |
38 | | impl fmt::Display for ParseError { |
39 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
40 | 0 | f.write_str("missing closing quote") |
41 | 0 | } |
42 | | } |
43 | | |
44 | | #[cfg(feature = "std")] |
45 | | impl std::error::Error for ParseError {} |
46 | | |
47 | | enum State { |
48 | | /// Within a delimiter. |
49 | | Delimiter, |
50 | | /// After backslash, but before starting word. |
51 | | Backslash, |
52 | | /// Within an unquoted word. |
53 | | Unquoted, |
54 | | /// After backslash in an unquoted word. |
55 | | UnquotedBackslash, |
56 | | /// Within a single quoted word. |
57 | | SingleQuoted, |
58 | | /// Within a double quoted word. |
59 | | DoubleQuoted, |
60 | | /// After backslash inside a double quoted word. |
61 | | DoubleQuotedBackslash, |
62 | | /// Inside a comment. |
63 | | Comment, |
64 | | } |
65 | | |
66 | | /// Splits command line into separate arguments, in much the same way Unix shell |
67 | | /// would, but without many of expansion the shell would perform. |
68 | | /// |
69 | | /// The split functionality is compatible with behaviour of Unix shell, but with |
70 | | /// word expansions limited to quote removal, and without special token |
71 | | /// recognition rules for operators. |
72 | | /// |
73 | | /// The result is exactly the same as one obtained from Unix shell as long as |
74 | | /// those unsupported features are not present in input: no operators, no |
75 | | /// variable assignments, no tilde expansion, no parameter expansion, no command |
76 | | /// substitution, no arithmetic expansion, no pathname expansion. |
77 | | /// |
78 | | /// In case those unsupported shell features are present, the syntax that |
79 | | /// introduce them is interpreted literally. |
80 | | /// |
81 | | /// # Errors |
82 | | /// |
83 | | /// When input contains unmatched quote, an error is returned. |
84 | | /// |
85 | | /// # Compatibility with other implementations |
86 | | /// |
87 | | /// It should be fully compatible with g_shell_parse_argv from GLib, except that |
88 | | /// in GLib it is an error not to have any words after tokenization. |
89 | | /// |
90 | | /// It is also very close to shlex.split available in Python standard library, |
91 | | /// when used in POSIX mode with support for comments. Though, shlex |
92 | | /// implementation diverges from POSIX, and from implementation contained herein |
93 | | /// in three aspects. First, it doesn't support line continuations. |
94 | | /// Second, inside double quotes, the backslash characters retains its special |
95 | | /// meaning as an escape character only when followed by \\ or \", whereas POSIX |
96 | | /// specifies that it should retain its special meaning when followed by: $, \`, |
97 | | /// \", \\, or a newline. Third, it treats carriage return as one of delimiters. |
98 | | /// |
99 | | /// # Examples |
100 | | /// |
101 | | /// Building an executable using compiler obtained from CC environment variable |
102 | | /// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build |
103 | | /// rule for C used in GNU Make: |
104 | | /// |
105 | | /// ```rust,no_run |
106 | | /// use std::env::var; |
107 | | /// use std::process::Command; |
108 | | /// |
109 | | /// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned()); |
110 | | /// |
111 | | /// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new()); |
112 | | /// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS"); |
113 | | /// |
114 | | /// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new()); |
115 | | /// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS"); |
116 | | /// |
117 | | /// Command::new(cc) |
118 | | /// .args(cflags) |
119 | | /// .args(cppflags) |
120 | | /// .args(&["-c", "a.c", "-o", "a.out"]) |
121 | | /// .spawn() |
122 | | /// .expect("failed to start subprocess") |
123 | | /// .wait() |
124 | | /// .expect("failed to wait for subprocess"); |
125 | | /// ``` |
126 | 0 | pub fn split(s: &str) -> Result<Vec<String>, ParseError> { |
127 | | use State::*; |
128 | | |
129 | 0 | let mut words = Vec::new(); |
130 | 0 | let mut word = String::new(); |
131 | 0 | let mut chars = s.chars(); |
132 | 0 | let mut state = Delimiter; |
133 | | |
134 | | loop { |
135 | 0 | let c = chars.next(); |
136 | 0 | state = match state { |
137 | 0 | Delimiter => match c { |
138 | 0 | None => break, |
139 | 0 | Some('\'') => SingleQuoted, |
140 | 0 | Some('\"') => DoubleQuoted, |
141 | 0 | Some('\\') => Backslash, |
142 | 0 | Some('\t') | Some(' ') | Some('\n') => Delimiter, |
143 | 0 | Some('#') => Comment, |
144 | 0 | Some(c) => { |
145 | 0 | word.push(c); |
146 | 0 | Unquoted |
147 | | } |
148 | | }, |
149 | 0 | Backslash => match c { |
150 | | None => { |
151 | 0 | word.push('\\'); |
152 | 0 | words.push(mem::replace(&mut word, String::new())); |
153 | 0 | break; |
154 | | } |
155 | 0 | Some('\n') => Delimiter, |
156 | 0 | Some(c) => { |
157 | 0 | word.push(c); |
158 | 0 | Unquoted |
159 | | } |
160 | | }, |
161 | 0 | Unquoted => match c { |
162 | | None => { |
163 | 0 | words.push(mem::replace(&mut word, String::new())); |
164 | 0 | break; |
165 | | } |
166 | 0 | Some('\'') => SingleQuoted, |
167 | 0 | Some('\"') => DoubleQuoted, |
168 | 0 | Some('\\') => UnquotedBackslash, |
169 | | Some('\t') | Some(' ') | Some('\n') => { |
170 | 0 | words.push(mem::replace(&mut word, String::new())); |
171 | 0 | Delimiter |
172 | | } |
173 | 0 | Some(c) => { |
174 | 0 | word.push(c); |
175 | 0 | Unquoted |
176 | | } |
177 | | }, |
178 | 0 | UnquotedBackslash => match c { |
179 | | None => { |
180 | 0 | word.push('\\'); |
181 | 0 | words.push(mem::replace(&mut word, String::new())); |
182 | 0 | break; |
183 | | } |
184 | 0 | Some('\n') => Unquoted, |
185 | 0 | Some(c) => { |
186 | 0 | word.push(c); |
187 | 0 | Unquoted |
188 | | } |
189 | | }, |
190 | 0 | SingleQuoted => match c { |
191 | 0 | None => return Err(ParseError), |
192 | 0 | Some('\'') => Unquoted, |
193 | 0 | Some(c) => { |
194 | 0 | word.push(c); |
195 | 0 | SingleQuoted |
196 | | } |
197 | | }, |
198 | 0 | DoubleQuoted => match c { |
199 | 0 | None => return Err(ParseError), |
200 | 0 | Some('\"') => Unquoted, |
201 | 0 | Some('\\') => DoubleQuotedBackslash, |
202 | 0 | Some(c) => { |
203 | 0 | word.push(c); |
204 | 0 | DoubleQuoted |
205 | | } |
206 | | }, |
207 | 0 | DoubleQuotedBackslash => match c { |
208 | 0 | None => return Err(ParseError), |
209 | 0 | Some('\n') => DoubleQuoted, |
210 | 0 | Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => { |
211 | 0 | word.push(c); |
212 | 0 | DoubleQuoted |
213 | | } |
214 | 0 | Some(c) => { |
215 | 0 | word.push('\\'); |
216 | 0 | word.push(c); |
217 | 0 | DoubleQuoted |
218 | | } |
219 | | }, |
220 | 0 | Comment => match c { |
221 | 0 | None => break, |
222 | 0 | Some('\n') => Delimiter, |
223 | 0 | Some(_) => Comment, |
224 | | }, |
225 | | } |
226 | | } |
227 | | |
228 | 0 | Ok(words) |
229 | 0 | } |
230 | | |
231 | | enum EscapeStyle { |
232 | | /// No escaping. |
233 | | None, |
234 | | /// Wrap in single quotes. |
235 | | SingleQuoted, |
236 | | /// Single quotes combined with backslash. |
237 | | Mixed, |
238 | | } |
239 | | |
240 | | /// Determines escaping style to use. |
241 | 0 | fn escape_style(s: &str) -> EscapeStyle { |
242 | 0 | if s.is_empty() { |
243 | 0 | return EscapeStyle::SingleQuoted; |
244 | 0 | } |
245 | | |
246 | 0 | let mut special = false; |
247 | 0 | let mut newline = false; |
248 | 0 | let mut single_quote = false; |
249 | | |
250 | 0 | for c in s.chars() { |
251 | 0 | match c { |
252 | 0 | '\n' => { |
253 | 0 | newline = true; |
254 | 0 | special = true; |
255 | 0 | } |
256 | 0 | '\'' => { |
257 | 0 | single_quote = true; |
258 | 0 | special = true; |
259 | 0 | } |
260 | | '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*' |
261 | 0 | | '?' | '[' | '#' | '~' | '=' | '%' => { |
262 | 0 | special = true; |
263 | 0 | } |
264 | 0 | _ => continue, |
265 | | } |
266 | | } |
267 | | |
268 | 0 | if !special { |
269 | 0 | EscapeStyle::None |
270 | 0 | } else if newline && !single_quote { |
271 | 0 | EscapeStyle::SingleQuoted |
272 | | } else { |
273 | 0 | EscapeStyle::Mixed |
274 | | } |
275 | 0 | } |
276 | | |
277 | | /// Escapes special characters in a string, so that it will retain its literal |
278 | | /// meaning when used as a part of command in Unix shell. |
279 | | /// |
280 | | /// It tries to avoid introducing any unnecessary quotes or escape characters, |
281 | | /// but specifics regarding quoting style are left unspecified. |
282 | 0 | pub fn quote(s: &str) -> Cow<str> { |
283 | | // We are going somewhat out of the way to provide |
284 | | // minimal amount of quoting in typical cases. |
285 | 0 | match escape_style(s) { |
286 | 0 | EscapeStyle::None => s.into(), |
287 | 0 | EscapeStyle::SingleQuoted => format!("'{}'", s).into(), |
288 | | EscapeStyle::Mixed => { |
289 | 0 | let mut quoted = String::new(); |
290 | 0 | quoted.push('\''); |
291 | 0 | for c in s.chars() { |
292 | 0 | if c == '\'' { |
293 | 0 | quoted.push_str("'\\''"); |
294 | 0 | } else { |
295 | 0 | quoted.push(c); |
296 | 0 | } |
297 | | } |
298 | 0 | quoted.push('\''); |
299 | 0 | quoted.into() |
300 | | } |
301 | | } |
302 | 0 | } |
303 | | |
304 | | /// Joins arguments into a single command line suitable for execution in Unix |
305 | | /// shell. |
306 | | /// |
307 | | /// Each argument is quoted using [`quote`] to preserve its literal meaning when |
308 | | /// parsed by Unix shell. |
309 | | /// |
310 | | /// Note: This function is essentially an inverse of [`split`]. |
311 | | /// |
312 | | /// # Examples |
313 | | /// |
314 | | /// Logging executed commands in format that can be easily copied and pasted |
315 | | /// into an actual shell: |
316 | | /// |
317 | | /// ```rust,no_run |
318 | | /// fn execute(args: &[&str]) { |
319 | | /// use std::process::Command; |
320 | | /// println!("Executing: {}", shell_words::join(args)); |
321 | | /// Command::new(&args[0]) |
322 | | /// .args(&args[1..]) |
323 | | /// .spawn() |
324 | | /// .expect("failed to start subprocess") |
325 | | /// .wait() |
326 | | /// .expect("failed to wait for subprocess"); |
327 | | /// } |
328 | | /// |
329 | | /// execute(&["python", "-c", "print('Hello world!')"]); |
330 | | /// ``` |
331 | | /// |
332 | | /// [`quote`]: fn.quote.html |
333 | | /// [`split`]: fn.split.html |
334 | 0 | pub fn join<I, S>(words: I) -> String |
335 | 0 | where |
336 | 0 | I: IntoIterator<Item = S>, |
337 | 0 | S: AsRef<str>, |
338 | | { |
339 | 0 | let mut line = words.into_iter().fold(String::new(), |mut line, word| { |
340 | 0 | let quoted = quote(word.as_ref()); |
341 | 0 | line.push_str(quoted.as_ref()); |
342 | 0 | line.push(' '); |
343 | 0 | line |
344 | 0 | }); |
345 | 0 | line.pop(); |
346 | 0 | line |
347 | 0 | } |
348 | | |
349 | | #[cfg(test)] |
350 | | mod tests { |
351 | | use super::*; |
352 | | |
353 | | fn split_ok(cases: &[(&str, &[&str])]) { |
354 | | for &(input, expected) in cases { |
355 | | match split(input) { |
356 | | Err(actual) => { |
357 | | panic!( |
358 | | "After split({:?})\nexpected: Ok({:?})\n actual: Err({:?})\n", |
359 | | input, expected, actual |
360 | | ); |
361 | | } |
362 | | Ok(actual) => { |
363 | | assert!( |
364 | | expected == actual.as_slice(), |
365 | | "After split({:?}).unwrap()\nexpected: {:?}\n actual: {:?}\n", |
366 | | input, |
367 | | expected, |
368 | | actual |
369 | | ); |
370 | | } |
371 | | } |
372 | | } |
373 | | } |
374 | | |
375 | | #[test] |
376 | | fn split_empty() { |
377 | | split_ok(&[("", &[])]); |
378 | | } |
379 | | |
380 | | #[test] |
381 | | fn split_initial_whitespace_is_removed() { |
382 | | split_ok(&[ |
383 | | (" a", &["a"]), |
384 | | ("\t\t\t\tbar", &["bar"]), |
385 | | ("\t \nc", &["c"]), |
386 | | ]); |
387 | | } |
388 | | |
389 | | #[test] |
390 | | fn split_trailing_whitespace_is_removed() { |
391 | | split_ok(&[ |
392 | | ("a ", &["a"]), |
393 | | ("b\t", &["b"]), |
394 | | ("c\t \n \n \n", &["c"]), |
395 | | ("d\n\n", &["d"]), |
396 | | ]); |
397 | | } |
398 | | |
399 | | #[test] |
400 | | fn split_carriage_return_is_not_special() { |
401 | | split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]); |
402 | | } |
403 | | |
404 | | #[test] |
405 | | fn split_single_quotes() { |
406 | | split_ok(&[ |
407 | | (r#"''"#, &[r#""#]), |
408 | | (r#"'a'"#, &[r#"a"#]), |
409 | | (r#"'\'"#, &[r#"\"#]), |
410 | | (r#"' \ '"#, &[r#" \ "#]), |
411 | | (r#"'#'"#, &[r#"#"#]), |
412 | | ]); |
413 | | } |
414 | | |
415 | | #[test] |
416 | | fn split_double_quotes() { |
417 | | split_ok(&[ |
418 | | (r#""""#, &[""]), |
419 | | (r#""""""#, &[""]), |
420 | | (r#""a b c' d""#, &["a b c' d"]), |
421 | | (r#""\a""#, &["\\a"]), |
422 | | (r#""$""#, &["$"]), |
423 | | (r#""\$""#, &["$"]), |
424 | | (r#""`""#, &["`"]), |
425 | | (r#""\`""#, &["`"]), |
426 | | (r#""\"""#, &["\""]), |
427 | | (r#""\\""#, &["\\"]), |
428 | | ("\"\n\"", &["\n"]), |
429 | | ("\"\\\n\"", &[""]), |
430 | | ]); |
431 | | } |
432 | | |
433 | | #[test] |
434 | | fn split_unquoted() { |
435 | | split_ok(&[ |
436 | | (r#"\|\&\;"#, &[r#"|&;"#]), |
437 | | (r#"\<\>"#, &[r#"<>"#]), |
438 | | (r#"\(\)"#, &[r#"()"#]), |
439 | | (r#"\$"#, &[r#"$"#]), |
440 | | (r#"\`"#, &[r#"`"#]), |
441 | | (r#"\""#, &[r#"""#]), |
442 | | (r#"\'"#, &[r#"'"#]), |
443 | | ("\\\n", &[]), |
444 | | (" \\\n \n", &[]), |
445 | | ("a\nb\nc", &["a", "b", "c"]), |
446 | | ("a\\\nb\\\nc", &["abc"]), |
447 | | ("foo bar baz", &["foo", "bar", "baz"]), |
448 | | (r#"\🦉"#, &[r"🦉"]), |
449 | | ]); |
450 | | } |
451 | | |
452 | | #[test] |
453 | | fn split_trailing_backslash() { |
454 | | split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]); |
455 | | } |
456 | | |
457 | | #[test] |
458 | | fn split_errors() { |
459 | | assert_eq!(split("'abc"), Err(ParseError)); |
460 | | assert_eq!(split("\""), Err(ParseError)); |
461 | | assert_eq!(split("'\\"), Err(ParseError)); |
462 | | assert_eq!(split("'\\"), Err(ParseError)); |
463 | | } |
464 | | |
465 | | #[test] |
466 | | fn split_comments() { |
467 | | split_ok(&[ |
468 | | (r#" x # comment "#, &["x"]), |
469 | | (r#" w1#w2 "#, &["w1#w2"]), |
470 | | (r#"'not really a # comment'"#, &["not really a # comment"]), |
471 | | (" a # very long comment \n b # another comment", &["a", "b"]), |
472 | | ]); |
473 | | } |
474 | | |
475 | | #[test] |
476 | | fn test_quote() { |
477 | | assert_eq!(quote(""), "''"); |
478 | | assert_eq!(quote("'"), "''\\'''"); |
479 | | assert_eq!(quote("abc"), "abc"); |
480 | | assert_eq!(quote("a \n b"), "'a \n b'"); |
481 | | assert_eq!(quote("X'\nY"), "'X'\\''\nY'"); |
482 | | assert_eq!(quote("~root"), "'~root'"); |
483 | | } |
484 | | |
485 | | #[test] |
486 | | fn test_join() { |
487 | | assert_eq!(join(&["a", "b", "c"]), "a b c"); |
488 | | assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'"); |
489 | | } |
490 | | |
491 | | #[test] |
492 | | fn join_followed_by_split_is_identity() { |
493 | | let cases: Vec<&[&str]> = vec![ |
494 | | &["a"], |
495 | | &["python", "-c", "print('Hello world!')"], |
496 | | &["echo", " arg with spaces ", "arg \' with \" quotes"], |
497 | | &["even newlines are quoted correctly\n", "\n", "\n\n\t "], |
498 | | &["$", "`test`"], |
499 | | &["cat", "~user/log*"], |
500 | | &["test", "'a \"b", "\"X'"], |
501 | | &["empty", "", "", ""], |
502 | | ]; |
503 | | for argv in cases { |
504 | | let args = join(argv); |
505 | | assert_eq!(split(&args).unwrap(), argv); |
506 | | } |
507 | | } |
508 | | } |