Coverage Report

Created: 2025-07-23 07:16

/src/regex/regex-syntax/src/error.rs
Line
Count
Source (jump to first uncovered line)
1
use alloc::{
2
    format,
3
    string::{String, ToString},
4
    vec,
5
    vec::Vec,
6
};
7
8
use crate::{ast, hir};
9
10
/// This error type encompasses any error that can be returned by this crate.
11
///
12
/// This error type is marked as `non_exhaustive`. This means that adding a
13
/// new variant is not considered a breaking change.
14
#[non_exhaustive]
15
#[derive(Clone, Debug, Eq, PartialEq)]
16
pub enum Error {
17
    /// An error that occurred while translating concrete syntax into abstract
18
    /// syntax (AST).
19
    Parse(ast::Error),
20
    /// An error that occurred while translating abstract syntax into a high
21
    /// level intermediate representation (HIR).
22
    Translate(hir::Error),
23
}
24
25
impl From<ast::Error> for Error {
26
6.12k
    fn from(err: ast::Error) -> Error {
27
6.12k
        Error::Parse(err)
28
6.12k
    }
29
}
30
31
impl From<hir::Error> for Error {
32
2.70k
    fn from(err: hir::Error) -> Error {
33
2.70k
        Error::Translate(err)
34
2.70k
    }
35
}
36
37
#[cfg(feature = "std")]
38
impl std::error::Error for Error {}
39
40
impl core::fmt::Display for Error {
41
8.82k
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
42
8.82k
        match *self {
43
6.12k
            Error::Parse(ref x) => x.fmt(f),
44
2.70k
            Error::Translate(ref x) => x.fmt(f),
45
        }
46
8.82k
    }
47
}
48
49
/// A helper type for formatting nice error messages.
50
///
51
/// This type is responsible for reporting regex parse errors in a nice human
52
/// readable format. Most of its complexity is from interspersing notational
53
/// markers pointing out the position where an error occurred.
54
#[derive(Debug)]
55
pub struct Formatter<'e, E> {
56
    /// The original regex pattern in which the error occurred.
57
    pattern: &'e str,
58
    /// The error kind. It must impl fmt::Display.
59
    err: &'e E,
60
    /// The primary span of the error.
61
    span: &'e ast::Span,
62
    /// An auxiliary and optional span, in case the error needs to point to
63
    /// two locations (e.g., when reporting a duplicate capture group name).
64
    aux_span: Option<&'e ast::Span>,
65
}
66
67
impl<'e> From<&'e ast::Error> for Formatter<'e, ast::ErrorKind> {
68
6.12k
    fn from(err: &'e ast::Error) -> Self {
69
6.12k
        Formatter {
70
6.12k
            pattern: err.pattern(),
71
6.12k
            err: err.kind(),
72
6.12k
            span: err.span(),
73
6.12k
            aux_span: err.auxiliary_span(),
74
6.12k
        }
75
6.12k
    }
76
}
77
78
impl<'e> From<&'e hir::Error> for Formatter<'e, hir::ErrorKind> {
79
2.70k
    fn from(err: &'e hir::Error) -> Self {
80
2.70k
        Formatter {
81
2.70k
            pattern: err.pattern(),
82
2.70k
            err: err.kind(),
83
2.70k
            span: err.span(),
84
2.70k
            aux_span: None,
85
2.70k
        }
86
2.70k
    }
87
}
88
89
impl<'e, E: core::fmt::Display> core::fmt::Display for Formatter<'e, E> {
90
8.82k
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91
8.82k
        let spans = Spans::from_formatter(self);
92
8.82k
        if self.pattern.contains('\n') {
93
573
            let divider = repeat_char('~', 79);
94
573
95
573
            writeln!(f, "regex parse error:")?;
96
573
            writeln!(f, "{}", divider)?;
97
573
            let notated = spans.notate();
98
573
            write!(f, "{}", notated)?;
99
573
            writeln!(f, "{}", divider)?;
100
            // If we have error spans that cover multiple lines, then we just
101
            // note the line numbers.
102
573
            if !spans.multi_line.is_empty() {
103
49
                let mut notes = vec![];
104
98
                for span in &spans.multi_line {
105
49
                    notes.push(format!(
106
49
                        "on line {} (column {}) through line {} (column {})",
107
49
                        span.start.line,
108
49
                        span.start.column,
109
49
                        span.end.line,
110
49
                        span.end.column - 1
111
49
                    ));
112
49
                }
113
49
                writeln!(f, "{}", notes.join("\n"))?;
114
524
            }
115
573
            write!(f, "error: {}", self.err)?;
116
        } else {
117
8.25k
            writeln!(f, "regex parse error:")?;
118
8.25k
            let notated = Spans::from_formatter(self).notate();
119
8.25k
            write!(f, "{}", notated)?;
120
8.25k
            write!(f, "error: {}", self.err)?;
121
        }
122
8.82k
        Ok(())
123
8.82k
    }
<regex_syntax::error::Formatter<regex_syntax::ast::ErrorKind> as core::fmt::Display>::fmt
Line
Count
Source
90
6.12k
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91
6.12k
        let spans = Spans::from_formatter(self);
92
6.12k
        if self.pattern.contains('\n') {
93
419
            let divider = repeat_char('~', 79);
94
419
95
419
            writeln!(f, "regex parse error:")?;
96
419
            writeln!(f, "{}", divider)?;
97
419
            let notated = spans.notate();
98
419
            write!(f, "{}", notated)?;
99
419
            writeln!(f, "{}", divider)?;
100
            // If we have error spans that cover multiple lines, then we just
101
            // note the line numbers.
102
419
            if !spans.multi_line.is_empty() {
103
44
                let mut notes = vec![];
104
88
                for span in &spans.multi_line {
105
44
                    notes.push(format!(
106
44
                        "on line {} (column {}) through line {} (column {})",
107
44
                        span.start.line,
108
44
                        span.start.column,
109
44
                        span.end.line,
110
44
                        span.end.column - 1
111
44
                    ));
112
44
                }
113
44
                writeln!(f, "{}", notes.join("\n"))?;
114
375
            }
115
419
            write!(f, "error: {}", self.err)?;
116
        } else {
117
5.70k
            writeln!(f, "regex parse error:")?;
118
5.70k
            let notated = Spans::from_formatter(self).notate();
119
5.70k
            write!(f, "{}", notated)?;
120
5.70k
            write!(f, "error: {}", self.err)?;
121
        }
122
6.12k
        Ok(())
123
6.12k
    }
<regex_syntax::error::Formatter<regex_syntax::hir::ErrorKind> as core::fmt::Display>::fmt
Line
Count
Source
90
2.70k
    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
91
2.70k
        let spans = Spans::from_formatter(self);
92
2.70k
        if self.pattern.contains('\n') {
93
154
            let divider = repeat_char('~', 79);
94
154
95
154
            writeln!(f, "regex parse error:")?;
96
154
            writeln!(f, "{}", divider)?;
97
154
            let notated = spans.notate();
98
154
            write!(f, "{}", notated)?;
99
154
            writeln!(f, "{}", divider)?;
100
            // If we have error spans that cover multiple lines, then we just
101
            // note the line numbers.
102
154
            if !spans.multi_line.is_empty() {
103
5
                let mut notes = vec![];
104
10
                for span in &spans.multi_line {
105
5
                    notes.push(format!(
106
5
                        "on line {} (column {}) through line {} (column {})",
107
5
                        span.start.line,
108
5
                        span.start.column,
109
5
                        span.end.line,
110
5
                        span.end.column - 1
111
5
                    ));
112
5
                }
113
5
                writeln!(f, "{}", notes.join("\n"))?;
114
149
            }
115
154
            write!(f, "error: {}", self.err)?;
116
        } else {
117
2.54k
            writeln!(f, "regex parse error:")?;
118
2.54k
            let notated = Spans::from_formatter(self).notate();
119
2.54k
            write!(f, "{}", notated)?;
120
2.54k
            write!(f, "error: {}", self.err)?;
121
        }
122
2.70k
        Ok(())
123
2.70k
    }
124
}
125
126
/// This type represents an arbitrary number of error spans in a way that makes
127
/// it convenient to notate the regex pattern. ("Notate" means "point out
128
/// exactly where the error occurred in the regex pattern.")
129
///
130
/// Technically, we can only ever have two spans given our current error
131
/// structure. However, after toiling with a specific algorithm for handling
132
/// two spans, it became obvious that an algorithm to handle an arbitrary
133
/// number of spans was actually much simpler.
134
struct Spans<'p> {
135
    /// The original regex pattern string.
136
    pattern: &'p str,
137
    /// The total width that should be used for line numbers. The width is
138
    /// used for left padding the line numbers for alignment.
139
    ///
140
    /// A value of `0` means line numbers should not be displayed. That is,
141
    /// the pattern is itself only one line.
142
    line_number_width: usize,
143
    /// All error spans that occur on a single line. This sequence always has
144
    /// length equivalent to the number of lines in `pattern`, where the index
145
    /// of the sequence represents a line number, starting at `0`. The spans
146
    /// in each line are sorted in ascending order.
147
    by_line: Vec<Vec<ast::Span>>,
148
    /// All error spans that occur over one or more lines. That is, the start
149
    /// and end position of the span have different line numbers. The spans are
150
    /// sorted in ascending order.
151
    multi_line: Vec<ast::Span>,
152
}
153
154
impl<'p> Spans<'p> {
155
    /// Build a sequence of spans from a formatter.
156
17.0k
    fn from_formatter<'e, E: core::fmt::Display>(
157
17.0k
        fmter: &'p Formatter<'e, E>,
158
17.0k
    ) -> Spans<'p> {
159
17.0k
        let mut line_count = fmter.pattern.lines().count();
160
17.0k
        // If the pattern ends with a `\n` literal, then our line count is
161
17.0k
        // off by one, since a span can occur immediately after the last `\n`,
162
17.0k
        // which is consider to be an additional line.
163
17.0k
        if fmter.pattern.ends_with('\n') {
164
65
            line_count += 1;
165
17.0k
        }
166
17.0k
        let line_number_width =
167
17.0k
            if line_count <= 1 { 0 } else { line_count.to_string().len() };
168
17.0k
        let mut spans = Spans {
169
17.0k
            pattern: &fmter.pattern,
170
17.0k
            line_number_width,
171
17.0k
            by_line: vec![vec![]; line_count],
172
17.0k
            multi_line: vec![],
173
17.0k
        };
174
17.0k
        spans.add(fmter.span.clone());
175
17.0k
        if let Some(span) = fmter.aux_span {
176
982
            spans.add(span.clone());
177
16.0k
        }
178
17.0k
        spans
179
17.0k
    }
<regex_syntax::error::Spans>::from_formatter::<regex_syntax::ast::ErrorKind>
Line
Count
Source
156
11.8k
    fn from_formatter<'e, E: core::fmt::Display>(
157
11.8k
        fmter: &'p Formatter<'e, E>,
158
11.8k
    ) -> Spans<'p> {
159
11.8k
        let mut line_count = fmter.pattern.lines().count();
160
11.8k
        // If the pattern ends with a `\n` literal, then our line count is
161
11.8k
        // off by one, since a span can occur immediately after the last `\n`,
162
11.8k
        // which is consider to be an additional line.
163
11.8k
        if fmter.pattern.ends_with('\n') {
164
52
            line_count += 1;
165
11.7k
        }
166
11.8k
        let line_number_width =
167
11.8k
            if line_count <= 1 { 0 } else { line_count.to_string().len() };
168
11.8k
        let mut spans = Spans {
169
11.8k
            pattern: &fmter.pattern,
170
11.8k
            line_number_width,
171
11.8k
            by_line: vec![vec![]; line_count],
172
11.8k
            multi_line: vec![],
173
11.8k
        };
174
11.8k
        spans.add(fmter.span.clone());
175
11.8k
        if let Some(span) = fmter.aux_span {
176
982
            spans.add(span.clone());
177
10.8k
        }
178
11.8k
        spans
179
11.8k
    }
<regex_syntax::error::Spans>::from_formatter::<regex_syntax::hir::ErrorKind>
Line
Count
Source
156
5.25k
    fn from_formatter<'e, E: core::fmt::Display>(
157
5.25k
        fmter: &'p Formatter<'e, E>,
158
5.25k
    ) -> Spans<'p> {
159
5.25k
        let mut line_count = fmter.pattern.lines().count();
160
5.25k
        // If the pattern ends with a `\n` literal, then our line count is
161
5.25k
        // off by one, since a span can occur immediately after the last `\n`,
162
5.25k
        // which is consider to be an additional line.
163
5.25k
        if fmter.pattern.ends_with('\n') {
164
13
            line_count += 1;
165
5.23k
        }
166
5.25k
        let line_number_width =
167
5.25k
            if line_count <= 1 { 0 } else { line_count.to_string().len() };
168
5.25k
        let mut spans = Spans {
169
5.25k
            pattern: &fmter.pattern,
170
5.25k
            line_number_width,
171
5.25k
            by_line: vec![vec![]; line_count],
172
5.25k
            multi_line: vec![],
173
5.25k
        };
174
5.25k
        spans.add(fmter.span.clone());
175
5.25k
        if let Some(span) = fmter.aux_span {
176
0
            spans.add(span.clone());
177
5.25k
        }
178
5.25k
        spans
179
5.25k
    }
180
181
    /// Add the given span to this sequence, putting it in the right place.
182
18.0k
    fn add(&mut self, span: ast::Span) {
183
18.0k
        // This is grossly inefficient since we sort after each add, but right
184
18.0k
        // now, we only ever add two spans at most.
185
18.0k
        if span.is_one_line() {
186
18.0k
            let i = span.start.line - 1; // because lines are 1-indexed
187
18.0k
            self.by_line[i].push(span);
188
18.0k
            self.by_line[i].sort();
189
18.0k
        } else {
190
49
            self.multi_line.push(span);
191
49
            self.multi_line.sort();
192
49
        }
193
18.0k
    }
194
195
    /// Notate the pattern string with carents (`^`) pointing at each span
196
    /// location. This only applies to spans that occur within a single line.
197
8.82k
    fn notate(&self) -> String {
198
8.82k
        let mut notated = String::new();
199
33.8k
        for (i, line) in self.pattern.lines().enumerate() {
200
33.8k
            if self.line_number_width > 0 {
201
25.5k
                notated.push_str(&self.left_pad_line_number(i + 1));
202
25.5k
                notated.push_str(": ");
203
25.5k
            } else {
204
8.25k
                notated.push_str("    ");
205
8.25k
            }
206
33.8k
            notated.push_str(line);
207
33.8k
            notated.push('\n');
208
33.8k
            if let Some(notes) = self.notate_line(i) {
209
8.79k
                notated.push_str(&notes);
210
8.79k
                notated.push('\n');
211
25.0k
            }
212
        }
213
8.82k
        notated
214
8.82k
    }
215
216
    /// Return notes for the line indexed at `i` (zero-based). If there are no
217
    /// spans for the given line, then `None` is returned. Otherwise, an
218
    /// appropriately space padded string with correctly positioned `^` is
219
    /// returned, accounting for line numbers.
220
33.8k
    fn notate_line(&self, i: usize) -> Option<String> {
221
33.8k
        let spans = &self.by_line[i];
222
33.8k
        if spans.is_empty() {
223
25.0k
            return None;
224
8.79k
        }
225
8.79k
        let mut notes = String::new();
226
34.8k
        for _ in 0..self.line_number_padding() {
227
34.8k
            notes.push(' ');
228
34.8k
        }
229
8.79k
        let mut pos = 0;
230
18.0k
        for span in spans {
231
2.77M
            for _ in pos..(span.start.column - 1) {
232
2.77M
                notes.push(' ');
233
2.77M
                pos += 1;
234
2.77M
            }
235
9.28k
            let note_len = span.end.column.saturating_sub(span.start.column);
236
180k
            for _ in 0..core::cmp::max(1, note_len) {
237
180k
                notes.push('^');
238
180k
                pos += 1;
239
180k
            }
240
        }
241
8.79k
        Some(notes)
242
33.8k
    }
243
244
    /// Left pad the given line number with spaces such that it is aligned with
245
    /// other line numbers.
246
25.5k
    fn left_pad_line_number(&self, n: usize) -> String {
247
25.5k
        let n = n.to_string();
248
25.5k
        let pad = self.line_number_width.checked_sub(n.len()).unwrap();
249
25.5k
        let mut result = repeat_char(' ', pad);
250
25.5k
        result.push_str(&n);
251
25.5k
        result
252
25.5k
    }
253
254
    /// Return the line number padding beginning at the start of each line of
255
    /// the pattern.
256
    ///
257
    /// If the pattern is only one line, then this returns a fixed padding
258
    /// for visual indentation.
259
8.79k
    fn line_number_padding(&self) -> usize {
260
8.79k
        if self.line_number_width == 0 {
261
8.25k
            4
262
        } else {
263
545
            2 + self.line_number_width
264
        }
265
8.79k
    }
266
}
267
268
26.1k
fn repeat_char(c: char, count: usize) -> String {
269
26.1k
    core::iter::repeat(c).take(count).collect()
270
26.1k
}
271
272
#[cfg(test)]
273
mod tests {
274
    use alloc::string::ToString;
275
276
    use crate::ast::parse::Parser;
277
278
    fn assert_panic_message(pattern: &str, expected_msg: &str) {
279
        let result = Parser::new().parse(pattern);
280
        match result {
281
            Ok(_) => {
282
                panic!("regex should not have parsed");
283
            }
284
            Err(err) => {
285
                assert_eq!(err.to_string(), expected_msg.trim());
286
            }
287
        }
288
    }
289
290
    // See: https://github.com/rust-lang/regex/issues/464
291
    #[test]
292
    fn regression_464() {
293
        let err = Parser::new().parse("a{\n").unwrap_err();
294
        // This test checks that the error formatter doesn't panic.
295
        assert!(!err.to_string().is_empty());
296
    }
297
298
    // See: https://github.com/rust-lang/regex/issues/545
299
    #[test]
300
    fn repetition_quantifier_expects_a_valid_decimal() {
301
        assert_panic_message(
302
            r"\\u{[^}]*}",
303
            r#"
304
regex parse error:
305
    \\u{[^}]*}
306
        ^
307
error: repetition quantifier expects a valid decimal
308
"#,
309
        );
310
    }
311
}