Coverage Report

Created: 2026-02-14 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/shell-words-1.1.1/src/lib.rs
Line
Count
Source
1
// Copyright 2018 Tomasz Miąsko
2
//
3
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE>
4
// or the MIT license <LICENSE-MIT>, at your option.
5
//
6
//! Process command line according to parsing rules of Unix shell as specified
7
//! in [Shell Command Language in POSIX.1-2008][posix-shell].
8
//!
9
//! [posix-shell]: http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
10
11
#![cfg_attr(not(feature = "std"), no_std)]
12
#![forbid(unsafe_code)]
13
14
#[cfg(feature = "std")]
15
extern crate core;
16
17
use core::fmt;
18
use core::mem;
19
20
#[cfg(not(feature = "std"))]
21
#[macro_use]
22
extern crate alloc;
23
24
#[cfg(not(feature = "std"))]
25
use alloc::string::String;
26
#[cfg(not(feature = "std"))]
27
use alloc::vec::Vec;
28
29
#[cfg(not(feature = "std"))]
30
use alloc::borrow::Cow;
31
#[cfg(feature = "std")]
32
use std::borrow::Cow;
33
34
/// An error returned when shell parsing fails.
35
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
36
pub struct ParseError;
37
38
impl fmt::Display for ParseError {
39
0
    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
40
0
        f.write_str("missing closing quote")
41
0
    }
42
}
43
44
#[cfg(feature = "std")]
45
impl std::error::Error for ParseError {}
46
47
enum State {
48
    /// Within a delimiter.
49
    Delimiter,
50
    /// After backslash, but before starting word.
51
    Backslash,
52
    /// Within an unquoted word.
53
    Unquoted,
54
    /// After backslash in an unquoted word.
55
    UnquotedBackslash,
56
    /// Within a single quoted word.
57
    SingleQuoted,
58
    /// Within a double quoted word.
59
    DoubleQuoted,
60
    /// After backslash inside a double quoted word.
61
    DoubleQuotedBackslash,
62
    /// Inside a comment.
63
    Comment,
64
}
65
66
/// Splits command line into separate arguments, in much the same way Unix shell
67
/// would, but without many of expansion the shell would perform.
68
///
69
/// The split functionality is compatible with behaviour of Unix shell, but with
70
/// word expansions limited to quote removal, and without special token
71
/// recognition rules for operators.
72
///
73
/// The result is exactly the same as one obtained from Unix shell as long as
74
/// those unsupported features are not present in input: no operators, no
75
/// variable assignments, no tilde expansion, no parameter expansion, no command
76
/// substitution, no arithmetic expansion, no pathname expansion.
77
///
78
/// In case those unsupported shell features are present, the syntax that
79
/// introduce them is interpreted literally.
80
///
81
/// # Errors
82
///
83
/// When input contains unmatched quote, an error is returned.
84
///
85
/// # Compatibility with other implementations
86
///
87
/// It should be fully compatible with g_shell_parse_argv from GLib, except that
88
/// in GLib it is an error not to have any words after tokenization.
89
///
90
/// It is also very close to shlex.split available in Python standard library,
91
/// when used in POSIX mode with support for comments. Though, shlex
92
/// implementation diverges from POSIX, and from implementation contained herein
93
/// in three aspects. First, it doesn't support line continuations.
94
/// Second, inside double quotes, the backslash characters retains its special
95
/// meaning as an escape character only when followed by \\ or \", whereas POSIX
96
/// specifies that it should retain its special meaning when followed by: $, \`,
97
/// \", \\, or a newline. Third, it treats carriage return as one of delimiters.
98
///
99
/// # Examples
100
///
101
/// Building an executable using compiler obtained from CC environment variable
102
/// and compiler flags from both CFLAGS and CPPFLAGS. Similar to default build
103
/// rule for C used in GNU Make:
104
///
105
/// ```rust,no_run
106
/// use std::env::var;
107
/// use std::process::Command;
108
///
109
/// let cc = var("CC").unwrap_or_else(|_| "cc".to_owned());
110
///
111
/// let cflags_str = var("CFLAGS").unwrap_or_else(|_| String::new());
112
/// let cflags = shell_words::split(&cflags_str).expect("failed to parse CFLAGS");
113
///
114
/// let cppflags_str = var("CPPFLAGS").unwrap_or_else(|_| String::new());
115
/// let cppflags = shell_words::split(&cppflags_str).expect("failed to parse CPPFLAGS");
116
///
117
/// Command::new(cc)
118
///     .args(cflags)
119
///     .args(cppflags)
120
///     .args(&["-c", "a.c", "-o", "a.out"])
121
///     .spawn()
122
///     .expect("failed to start subprocess")
123
///     .wait()
124
///     .expect("failed to wait for subprocess");
125
/// ```
126
0
pub fn split(s: &str) -> Result<Vec<String>, ParseError> {
127
    use State::*;
128
129
0
    let mut words = Vec::new();
130
0
    let mut word = String::new();
131
0
    let mut chars = s.chars();
132
0
    let mut state = Delimiter;
133
134
    loop {
135
0
        let c = chars.next();
136
0
        state = match state {
137
0
            Delimiter => match c {
138
0
                None => break,
139
0
                Some('\'') => SingleQuoted,
140
0
                Some('\"') => DoubleQuoted,
141
0
                Some('\\') => Backslash,
142
0
                Some('\t') | Some(' ') | Some('\n') => Delimiter,
143
0
                Some('#') => Comment,
144
0
                Some(c) => {
145
0
                    word.push(c);
146
0
                    Unquoted
147
                }
148
            },
149
0
            Backslash => match c {
150
                None => {
151
0
                    word.push('\\');
152
0
                    words.push(mem::replace(&mut word, String::new()));
153
0
                    break;
154
                }
155
0
                Some('\n') => Delimiter,
156
0
                Some(c) => {
157
0
                    word.push(c);
158
0
                    Unquoted
159
                }
160
            },
161
0
            Unquoted => match c {
162
                None => {
163
0
                    words.push(mem::replace(&mut word, String::new()));
164
0
                    break;
165
                }
166
0
                Some('\'') => SingleQuoted,
167
0
                Some('\"') => DoubleQuoted,
168
0
                Some('\\') => UnquotedBackslash,
169
                Some('\t') | Some(' ') | Some('\n') => {
170
0
                    words.push(mem::replace(&mut word, String::new()));
171
0
                    Delimiter
172
                }
173
0
                Some(c) => {
174
0
                    word.push(c);
175
0
                    Unquoted
176
                }
177
            },
178
0
            UnquotedBackslash => match c {
179
                None => {
180
0
                    word.push('\\');
181
0
                    words.push(mem::replace(&mut word, String::new()));
182
0
                    break;
183
                }
184
0
                Some('\n') => Unquoted,
185
0
                Some(c) => {
186
0
                    word.push(c);
187
0
                    Unquoted
188
                }
189
            },
190
0
            SingleQuoted => match c {
191
0
                None => return Err(ParseError),
192
0
                Some('\'') => Unquoted,
193
0
                Some(c) => {
194
0
                    word.push(c);
195
0
                    SingleQuoted
196
                }
197
            },
198
0
            DoubleQuoted => match c {
199
0
                None => return Err(ParseError),
200
0
                Some('\"') => Unquoted,
201
0
                Some('\\') => DoubleQuotedBackslash,
202
0
                Some(c) => {
203
0
                    word.push(c);
204
0
                    DoubleQuoted
205
                }
206
            },
207
0
            DoubleQuotedBackslash => match c {
208
0
                None => return Err(ParseError),
209
0
                Some('\n') => DoubleQuoted,
210
0
                Some(c @ '$') | Some(c @ '`') | Some(c @ '"') | Some(c @ '\\') => {
211
0
                    word.push(c);
212
0
                    DoubleQuoted
213
                }
214
0
                Some(c) => {
215
0
                    word.push('\\');
216
0
                    word.push(c);
217
0
                    DoubleQuoted
218
                }
219
            },
220
0
            Comment => match c {
221
0
                None => break,
222
0
                Some('\n') => Delimiter,
223
0
                Some(_) => Comment,
224
            },
225
        }
226
    }
227
228
0
    Ok(words)
229
0
}
230
231
enum EscapeStyle {
232
    /// No escaping.
233
    None,
234
    /// Wrap in single quotes.
235
    SingleQuoted,
236
    /// Single quotes combined with backslash.
237
    Mixed,
238
}
239
240
/// Determines escaping style to use.
241
0
fn escape_style(s: &str) -> EscapeStyle {
242
0
    if s.is_empty() {
243
0
        return EscapeStyle::SingleQuoted;
244
0
    }
245
246
0
    let mut special = false;
247
0
    let mut newline = false;
248
0
    let mut single_quote = false;
249
250
0
    for c in s.chars() {
251
0
        match c {
252
0
            '\n' => {
253
0
                newline = true;
254
0
                special = true;
255
0
            }
256
0
            '\'' => {
257
0
                single_quote = true;
258
0
                special = true;
259
0
            }
260
            '|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | ' ' | '\t' | '*'
261
0
            | '?' | '[' | '#' | '~' | '=' | '%' => {
262
0
                special = true;
263
0
            }
264
0
            _ => continue,
265
        }
266
    }
267
268
0
    if !special {
269
0
        EscapeStyle::None
270
0
    } else if newline && !single_quote {
271
0
        EscapeStyle::SingleQuoted
272
    } else {
273
0
        EscapeStyle::Mixed
274
    }
275
0
}
276
277
/// Escapes special characters in a string, so that it will retain its literal
278
/// meaning when used as a part of command in Unix shell.
279
///
280
/// It tries to avoid introducing any unnecessary quotes or escape characters,
281
/// but specifics regarding quoting style are left unspecified.
282
0
pub fn quote(s: &str) -> Cow<str> {
283
    // We are going somewhat out of the way to provide
284
    // minimal amount of quoting in typical cases.
285
0
    match escape_style(s) {
286
0
        EscapeStyle::None => s.into(),
287
0
        EscapeStyle::SingleQuoted => format!("'{}'", s).into(),
288
        EscapeStyle::Mixed => {
289
0
            let mut quoted = String::new();
290
0
            quoted.push('\'');
291
0
            for c in s.chars() {
292
0
                if c == '\'' {
293
0
                    quoted.push_str("'\\''");
294
0
                } else {
295
0
                    quoted.push(c);
296
0
                }
297
            }
298
0
            quoted.push('\'');
299
0
            quoted.into()
300
        }
301
    }
302
0
}
303
304
/// Joins arguments into a single command line suitable for execution in Unix
305
/// shell.
306
///
307
/// Each argument is quoted using [`quote`] to preserve its literal meaning when
308
/// parsed by Unix shell.
309
///
310
/// Note: This function is essentially an inverse of [`split`].
311
///
312
/// # Examples
313
///
314
/// Logging executed commands in format that can be easily copied and pasted
315
/// into an actual shell:
316
///
317
/// ```rust,no_run
318
/// fn execute(args: &[&str]) {
319
///     use std::process::Command;
320
///     println!("Executing: {}", shell_words::join(args));
321
///     Command::new(&args[0])
322
///         .args(&args[1..])
323
///         .spawn()
324
///         .expect("failed to start subprocess")
325
///         .wait()
326
///         .expect("failed to wait for subprocess");
327
/// }
328
///
329
/// execute(&["python", "-c", "print('Hello world!')"]);
330
/// ```
331
///
332
/// [`quote`]: fn.quote.html
333
/// [`split`]: fn.split.html
334
0
pub fn join<I, S>(words: I) -> String
335
0
where
336
0
    I: IntoIterator<Item = S>,
337
0
    S: AsRef<str>,
338
{
339
0
    let mut line = words.into_iter().fold(String::new(), |mut line, word| {
340
0
        let quoted = quote(word.as_ref());
341
0
        line.push_str(quoted.as_ref());
342
0
        line.push(' ');
343
0
        line
344
0
    });
345
0
    line.pop();
346
0
    line
347
0
}
348
349
#[cfg(test)]
350
mod tests {
351
    use super::*;
352
353
    fn split_ok(cases: &[(&str, &[&str])]) {
354
        for &(input, expected) in cases {
355
            match split(input) {
356
                Err(actual) => {
357
                    panic!(
358
                        "After split({:?})\nexpected: Ok({:?})\n  actual: Err({:?})\n",
359
                        input, expected, actual
360
                    );
361
                }
362
                Ok(actual) => {
363
                    assert!(
364
                        expected == actual.as_slice(),
365
                        "After split({:?}).unwrap()\nexpected: {:?}\n  actual: {:?}\n",
366
                        input,
367
                        expected,
368
                        actual
369
                    );
370
                }
371
            }
372
        }
373
    }
374
375
    #[test]
376
    fn split_empty() {
377
        split_ok(&[("", &[])]);
378
    }
379
380
    #[test]
381
    fn split_initial_whitespace_is_removed() {
382
        split_ok(&[
383
            ("     a", &["a"]),
384
            ("\t\t\t\tbar", &["bar"]),
385
            ("\t \nc", &["c"]),
386
        ]);
387
    }
388
389
    #[test]
390
    fn split_trailing_whitespace_is_removed() {
391
        split_ok(&[
392
            ("a  ", &["a"]),
393
            ("b\t", &["b"]),
394
            ("c\t \n \n \n", &["c"]),
395
            ("d\n\n", &["d"]),
396
        ]);
397
    }
398
399
    #[test]
400
    fn split_carriage_return_is_not_special() {
401
        split_ok(&[("c\ra\r'\r'\r", &["c\ra\r\r\r"])]);
402
    }
403
404
    #[test]
405
    fn split_single_quotes() {
406
        split_ok(&[
407
            (r#"''"#, &[r#""#]),
408
            (r#"'a'"#, &[r#"a"#]),
409
            (r#"'\'"#, &[r#"\"#]),
410
            (r#"' \ '"#, &[r#" \ "#]),
411
            (r#"'#'"#, &[r#"#"#]),
412
        ]);
413
    }
414
415
    #[test]
416
    fn split_double_quotes() {
417
        split_ok(&[
418
            (r#""""#, &[""]),
419
            (r#""""""#, &[""]),
420
            (r#""a b c' d""#, &["a b c' d"]),
421
            (r#""\a""#, &["\\a"]),
422
            (r#""$""#, &["$"]),
423
            (r#""\$""#, &["$"]),
424
            (r#""`""#, &["`"]),
425
            (r#""\`""#, &["`"]),
426
            (r#""\"""#, &["\""]),
427
            (r#""\\""#, &["\\"]),
428
            ("\"\n\"", &["\n"]),
429
            ("\"\\\n\"", &[""]),
430
        ]);
431
    }
432
433
    #[test]
434
    fn split_unquoted() {
435
        split_ok(&[
436
            (r#"\|\&\;"#, &[r#"|&;"#]),
437
            (r#"\<\>"#, &[r#"<>"#]),
438
            (r#"\(\)"#, &[r#"()"#]),
439
            (r#"\$"#, &[r#"$"#]),
440
            (r#"\`"#, &[r#"`"#]),
441
            (r#"\""#, &[r#"""#]),
442
            (r#"\'"#, &[r#"'"#]),
443
            ("\\\n", &[]),
444
            (" \\\n \n", &[]),
445
            ("a\nb\nc", &["a", "b", "c"]),
446
            ("a\\\nb\\\nc", &["abc"]),
447
            ("foo bar baz", &["foo", "bar", "baz"]),
448
            (r#"\🦉"#, &[r"🦉"]),
449
        ]);
450
    }
451
452
    #[test]
453
    fn split_trailing_backslash() {
454
        split_ok(&[("\\", &["\\"]), (" \\", &["\\"]), ("a\\", &["a\\"])]);
455
    }
456
457
    #[test]
458
    fn split_errors() {
459
        assert_eq!(split("'abc"), Err(ParseError));
460
        assert_eq!(split("\""), Err(ParseError));
461
        assert_eq!(split("'\\"), Err(ParseError));
462
        assert_eq!(split("'\\"), Err(ParseError));
463
    }
464
465
    #[test]
466
    fn split_comments() {
467
        split_ok(&[
468
            (r#" x # comment "#, &["x"]),
469
            (r#" w1#w2 "#, &["w1#w2"]),
470
            (r#"'not really a # comment'"#, &["not really a # comment"]),
471
            (" a # very long comment \n b # another comment", &["a", "b"]),
472
        ]);
473
    }
474
475
    #[test]
476
    fn test_quote() {
477
        assert_eq!(quote(""), "''");
478
        assert_eq!(quote("'"), "''\\'''");
479
        assert_eq!(quote("abc"), "abc");
480
        assert_eq!(quote("a \n  b"), "'a \n  b'");
481
        assert_eq!(quote("X'\nY"), "'X'\\''\nY'");
482
        assert_eq!(quote("~root"), "'~root'");
483
    }
484
485
    #[test]
486
    fn test_join() {
487
        assert_eq!(join(&["a", "b", "c"]), "a b c");
488
        assert_eq!(join(&[" ", "$", "\n"]), "' ' '$' '\n'");
489
    }
490
491
    #[test]
492
    fn join_followed_by_split_is_identity() {
493
        let cases: Vec<&[&str]> = vec![
494
            &["a"],
495
            &["python", "-c", "print('Hello world!')"],
496
            &["echo", " arg with spaces ", "arg \' with \" quotes"],
497
            &["even newlines are quoted correctly\n", "\n", "\n\n\t "],
498
            &["$", "`test`"],
499
            &["cat", "~user/log*"],
500
            &["test", "'a \"b", "\"X'"],
501
            &["empty", "", "", ""],
502
        ];
503
        for argv in cases {
504
            let args = join(argv);
505
            assert_eq!(split(&args).unwrap(), argv);
506
        }
507
    }
508
}