Coverage Report

Created: 2025-06-02 07:01

/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-syntax-0.8.5/src/error.rs
Line
Count
Source (jump to first uncovered line)
1
use alloc::{
2
    format,
3
    string::{String, ToString},
4
    vec,
5
    vec::Vec,
6
};
7
8
use crate::{ast, hir};
9
10
/// This error type encompasses any error that can be returned by this crate.
11
///
12
/// This error type is marked as `non_exhaustive`. This means that adding a
13
/// new variant is not considered a breaking change.
14
#[non_exhaustive]
15
#[derive(Clone, Debug, Eq, PartialEq)]
16
pub enum Error {
17
    /// An error that occurred while translating concrete syntax into abstract
18
    /// syntax (AST).
19
    Parse(ast::Error),
20
    /// An error that occurred while translating abstract syntax into a high
21
    /// level intermediate representation (HIR).
22
    Translate(hir::Error),
23
}
24
25
impl From<ast::Error> for Error {
26
0
    fn from(err: ast::Error) -> Error {
27
0
        Error::Parse(err)
28
0
    }
29
}
30
31
impl From<hir::Error> for Error {
32
0
    fn from(err: hir::Error) -> Error {
33
0
        Error::Translate(err)
34
0
    }
35
}
36
37
#[cfg(feature = "std")]
38
impl std::error::Error for Error {}
39
40
impl core::fmt::Display for Error {
41
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42
0
        match *self {
43
0
            Error::Parse(ref x) => x.fmt(f),
44
0
            Error::Translate(ref x) => x.fmt(f),
45
        }
46
0
    }
47
}
48
49
/// A helper type for formatting nice error messages.
50
///
51
/// This type is responsible for reporting regex parse errors in a nice human
52
/// readable format. Most of its complexity is from interspersing notational
53
/// markers pointing out the position where an error occurred.
54
#[derive(Debug)]
55
pub struct Formatter<'e, E> {
56
    /// The original regex pattern in which the error occurred.
57
    pattern: &'e str,
58
    /// The error kind. It must impl fmt::Display.
59
    err: &'e E,
60
    /// The primary span of the error.
61
    span: &'e ast::Span,
62
    /// An auxiliary and optional span, in case the error needs to point to
63
    /// two locations (e.g., when reporting a duplicate capture group name).
64
    aux_span: Option<&'e ast::Span>,
65
}
66
67
impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68
0
    fn from(err: &'e ast::Error) -> Self {
69
0
        Formatter {
70
0
            pattern: err.pattern(),
71
0
            err: err.kind(),
72
0
            span: err.span(),
73
0
            aux_span: err.auxiliary_span(),
74
0
        }
75
0
    }
76
}
77
78
impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79
0
    fn from(err: &'e hir::Error) -> Self {
80
0
        Formatter {
81
0
            pattern: err.pattern(),
82
0
            err: err.kind(),
83
0
            span: err.span(),
84
0
            aux_span: None,
85
0
        }
86
0
    }
87
}
88
89
impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90
0
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91
0
        let spans = Spans::from_formatter(self);
92
0
        if self.pattern.contains('\n') {
93
0
            let divider = repeat_char('~', 79);
94
0
95
0
            writeln!(f, "regex parse error:")?;
96
0
            writeln!(f, "{}", divider)?;
97
0
            let notated = spans.notate();
98
0
            write!(f, "{}", notated)?;
99
0
            writeln!(f, "{}", divider)?;
100
            // If we have error spans that cover multiple lines, then we just
101
            // note the line numbers.
102
0
            if !spans.multi_line.is_empty() {
103
0
                let mut notes = vec![];
104
0
                for span in &spans.multi_line {
105
0
                    notes.push(format!(
106
0
                        "on line {} (column {}) through line {} (column {})",
107
0
                        span.start.line,
108
0
                        span.start.column,
109
0
                        span.end.line,
110
0
                        span.end.column - 1
111
0
                    ));
112
0
                }
113
0
                writeln!(f, "{}", notes.join("\n"))?;
114
0
            }
115
0
            write!(f, "error: {}", self.err)?;
116
        } else {
117
0
            writeln!(f, "regex parse error:")?;
118
0
            let notated = Spans::from_formatter(self).notate();
119
0
            write!(f, "{}", notated)?;
120
0
            write!(f, "error: {}", self.err)?;
121
        }
122
0
        Ok(())
123
0
    }
Unexecuted instantiation: <regex_syntax::error::Formatter<regex_syntax::ast::ErrorKind> as core::fmt::Display>::fmt
Unexecuted instantiation: <regex_syntax::error::Formatter<regex_syntax::hir::ErrorKind> as core::fmt::Display>::fmt
124
}
125
126
/// This type represents an arbitrary number of error spans in a way that makes
127
/// it convenient to notate the regex pattern. ("Notate" means "point out
128
/// exactly where the error occurred in the regex pattern.")
129
///
130
/// Technically, we can only ever have two spans given our current error
131
/// structure. However, after toiling with a specific algorithm for handling
132
/// two spans, it became obvious that an algorithm to handle an arbitrary
133
/// number of spans was actually much simpler.
134
struct Spans<'p> {
135
    /// The original regex pattern string.
136
    pattern: &'p str,
137
    /// The total width that should be used for line numbers. The width is
138
    /// used for left padding the line numbers for alignment.
139
    ///
140
    /// A value of `0` means line numbers should not be displayed. That is,
141
    /// the pattern is itself only one line.
142
    line_number_width: usize,
143
    /// All error spans that occur on a single line. This sequence always has
144
    /// length equivalent to the number of lines in `pattern`, where the index
145
    /// of the sequence represents a line number, starting at `0`. The spans
146
    /// in each line are sorted in ascending order.
147
    by_line: Vec<Vec<ast::Span>>,
148
    /// All error spans that occur over one or more lines. That is, the start
149
    /// and end position of the span have different line numbers. The spans are
150
    /// sorted in ascending order.
151
    multi_line: Vec<ast::Span>,
152
}
153
154
impl<'p> Spans<'p> {
155
    /// Build a sequence of spans from a formatter.
156
0
    fn from_formatter<'e, E: core::fmt::Display>(
157
0
        fmter: &'p Formatter<'e, E>,
158
0
    ) -> Spans<'p> {
159
0
        let mut line_count = fmter.pattern.lines().count();
160
0
        // If the pattern ends with a `\n` literal, then our line count is
161
0
        // off by one, since a span can occur immediately after the last `\n`,
162
0
        // which is consider to be an additional line.
163
0
        if fmter.pattern.ends_with('\n') {
164
0
            line_count += 1;
165
0
        }
166
0
        let line_number_width =
167
0
            if line_count <= 1 { 0 } else { line_count.to_string().len() };
168
0
        let mut spans = Spans {
169
0
            pattern: &fmter.pattern,
170
0
            line_number_width,
171
0
            by_line: vec![vec![]; line_count],
172
0
            multi_line: vec![],
173
0
        };
174
0
        spans.add(fmter.span.clone());
175
0
        if let Some(span) = fmter.aux_span {
176
0
            spans.add(span.clone());
177
0
        }
178
0
        spans
179
0
    }
Unexecuted instantiation: <regex_syntax::error::Spans>::from_formatter::<regex_syntax::ast::ErrorKind>
Unexecuted instantiation: <regex_syntax::error::Spans>::from_formatter::<regex_syntax::hir::ErrorKind>
180
181
    /// Add the given span to this sequence, putting it in the right place.
182
0
    fn add(&mut self, span: ast::Span) {
183
0
        // This is grossly inefficient since we sort after each add, but right
184
0
        // now, we only ever add two spans at most.
185
0
        if span.is_one_line() {
186
0
            let i = span.start.line - 1; // because lines are 1-indexed
187
0
            self.by_line[i].push(span);
188
0
            self.by_line[i].sort();
189
0
        } else {
190
0
            self.multi_line.push(span);
191
0
            self.multi_line.sort();
192
0
        }
193
0
    }
194
195
    /// Notate the pattern string with carents (`^`) pointing at each span
196
    /// location. This only applies to spans that occur within a single line.
197
0
    fn notate(&self) -> String {
198
0
        let mut notated = String::new();
199
0
        for (i, line) in self.pattern.lines().enumerate() {
200
0
            if self.line_number_width > 0 {
201
0
                notated.push_str(&self.left_pad_line_number(i + 1));
202
0
                notated.push_str(": ");
203
0
            } else {
204
0
                notated.push_str("    ");
205
0
            }
206
0
            notated.push_str(line);
207
0
            notated.push('\n');
208
0
            if let Some(notes) = self.notate_line(i) {
209
0
                notated.push_str(&notes);
210
0
                notated.push('\n');
211
0
            }
212
        }
213
0
        notated
214
0
    }
215
216
    /// Return notes for the line indexed at `i` (zero-based). If there are no
217
    /// spans for the given line, then `None` is returned. Otherwise, an
218
    /// appropriately space padded string with correctly positioned `^` is
219
    /// returned, accounting for line numbers.
220
0
    fn notate_line(&self, i: usize) -> Option<String> {
221
0
        let spans = &self.by_line[i];
222
0
        if spans.is_empty() {
223
0
            return None;
224
0
        }
225
0
        let mut notes = String::new();
226
0
        for _ in 0..self.line_number_padding() {
227
0
            notes.push(' ');
228
0
        }
229
0
        let mut pos = 0;
230
0
        for span in spans {
231
0
            for _ in pos..(span.start.column - 1) {
232
0
                notes.push(' ');
233
0
                pos += 1;
234
0
            }
235
0
            let note_len = span.end.column.saturating_sub(span.start.column);
236
0
            for _ in 0..core::cmp::max(1, note_len) {
237
0
                notes.push('^');
238
0
                pos += 1;
239
0
            }
240
        }
241
0
        Some(notes)
242
0
    }
243
244
    /// Left pad the given line number with spaces such that it is aligned with
245
    /// other line numbers.
246
0
    fn left_pad_line_number(&self, n: usize) -> String {
247
0
        let n = n.to_string();
248
0
        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249
0
        let mut result = repeat_char(' ', pad);
250
0
        result.push_str(&n);
251
0
        result
252
0
    }
253
254
    /// Return the line number padding beginning at the start of each line of
255
    /// the pattern.
256
    ///
257
    /// If the pattern is only one line, then this returns a fixed padding
258
    /// for visual indentation.
259
0
    fn line_number_padding(&self) -> usize {
260
0
        if self.line_number_width == 0 {
261
0
            4
262
        } else {
263
0
            2 + self.line_number_width
264
        }
265
0
    }
266
}
267
268
0
fn repeat_char(c: char, count: usize) -> String {
269
0
    core::iter::repeat(c).take(count).collect()
270
0
}
271
272
#[cfg(test)]
273
mod tests {
274
    use alloc::string::ToString;
275
276
    use crate::ast::parse::Parser;
277
278
    fn assert_panic_message(pattern: &str, expected_msg: &str) {
279
        let result = Parser::new().parse(pattern);
280
        match result {
281
            Ok(_) => {
282
                panic!("regex should not have parsed");
283
            }
284
            Err(err) => {
285
                assert_eq!(err.to_string(), expected_msg.trim());
286
            }
287
        }
288
    }
289
290
    // See: https://github.com/rust-lang/regex/issues/464
291
    #[test]
292
    fn regression_464() {
293
        let err = Parser::new().parse("a{\n").unwrap_err();
294
        // This test checks that the error formatter doesn't panic.
295
        assert!(!err.to_string().is_empty());
296
    }
297
298
    // See: https://github.com/rust-lang/regex/issues/545
299
    #[test]
300
    fn repetition_quantifier_expects_a_valid_decimal() {
301
        assert_panic_message(
302
            r"\\u{[^}]*}",
303
            r#"
304
regex parse error:
305
    \\u{[^}]*}
306
        ^
307
error: repetition quantifier expects a valid decimal
308
"#,
309
        );
310
    }
311
}