Coverage Report

Created: 2025-09-27 06:48

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gitoxide/gix-config/src/parse/events.rs
Line
Count
Source
1
use smallvec::SmallVec;
2
3
use crate::{
4
    parse,
5
    parse::{Event, Section},
6
};
7
8
/// A type store without allocation all events that are typically preceding the first section.
9
pub type FrontMatterEvents<'a> = SmallVec<[Event<'a>; 8]>;
10
11
/// A zero-copy `git-config` file parser.
12
///
13
/// This is parser exposes low-level syntactic events from a `git-config` file.
14
/// Generally speaking, you'll want to use [`File`] as it wraps
15
/// around the parser to provide a higher-level abstraction to a `git-config`
16
/// file, including querying, modifying, and updating values.
17
///
18
/// This parser guarantees that the events emitted are sufficient to
19
/// reconstruct a `git-config` file identical to the source `git-config`
20
/// when writing it.
21
///
22
/// # Differences between a `.ini` parser
23
///
24
/// While the `git-config` format closely resembles the [`.ini` file format],
25
/// there are subtle differences that make them incompatible. For one, the file
26
/// format is not well defined, and there exists no formal specification to
27
/// adhere to.
28
///
29
/// For concrete examples, some notable differences are:
30
/// - `git-config` sections permit subsections via either a quoted string
31
///   (`[some-section "subsection"]`) or via the deprecated dot notation
32
///   (`[some-section.subsection]`). Successful parsing these section names is not
33
///   well defined in typical `.ini` parsers. This parser will handle these cases
34
///   perfectly.
35
/// - Comment markers are not strictly defined either. This parser will always
36
///   and only handle a semicolon or octothorpe (also known as a hash or number
37
///   sign).
38
/// - Global properties may be allowed in `.ini` parsers, but is strictly
39
///   disallowed by this parser.
40
/// - Only `\t`, `\n`, `\b` `\\` are valid escape characters.
41
/// - Quoted and semi-quoted values will be parsed (but quotes will be included
42
///   in event outputs). An example of a semi-quoted value is `5"hello world"`,
43
///   which should be interpreted as `5hello world` after
44
///   [normalization][crate::value::normalize()].
45
/// - Line continuations via a `\` character is supported (inside or outside of quotes)
46
/// - Whitespace handling similarly follows the `git-config` specification as
47
///   closely as possible, where excess whitespace after a non-quoted value are
48
///   trimmed, and line continuations onto a new line with excess spaces are kept.
49
/// - Only equal signs (optionally padded by spaces) are valid name/value
50
///   delimiters.
51
///
52
/// Note that things such as case-sensitivity or duplicate sections are
53
/// _not_ handled. This parser is a low level _syntactic_ interpreter
54
/// and higher level wrappers around this parser, which may
55
/// or may not be zero-copy, should handle _semantic_ values. This also means
56
/// that string-like values are not interpreted. For example, `hello"world"`
57
/// would be read at a high level as `helloworld` but this parser will return
58
/// the former instead, with the extra quotes. This is because it is not the
59
/// responsibility of the parser to interpret these values, and doing so would
60
/// necessarily require a copy, which this parser avoids.
61
///
62
/// # Trait Implementations
63
///
64
/// - This struct does _not_ implement [`FromStr`] due to lifetime
65
///   constraints implied on the required `from_str` method. Instead, it provides
66
///   [`From<&'_ str>`].
67
///
68
/// # Idioms
69
///
70
/// If you do want to use this parser, there are some idioms that may help you
71
/// with interpreting sequences of events.
72
///
73
/// ## `Value` events do not immediately follow `Key` events
74
///
75
/// Consider the following `git-config` example:
76
///
77
/// ```text
78
/// [core]
79
///   autocrlf = input
80
/// ```
81
///
82
/// Because this parser guarantees perfect reconstruction, there are many
83
/// non-significant events that occur in addition to the ones you may expect:
84
///
85
/// ```
86
/// # use gix_config::parse::{Event, Events, section};
87
/// # use std::borrow::Cow;
88
/// # use std::convert::TryFrom;
89
/// # let section_header = section::Header::new("core", None).unwrap();
90
/// # let section_data = "[core]\n  autocrlf = input";
91
/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
92
/// Event::SectionHeader(section_header),
93
/// Event::Newline(Cow::Borrowed("\n".into())),
94
/// Event::Whitespace(Cow::Borrowed("  ".into())),
95
/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
96
/// Event::Whitespace(Cow::Borrowed(" ".into())),
97
/// Event::KeyValueSeparator,
98
/// Event::Whitespace(Cow::Borrowed(" ".into())),
99
/// Event::Value(Cow::Borrowed("input".into())),
100
/// # ]);
101
/// # Ok::<_, Box<dyn std::error::Error>>(())
102
/// ```
103
///
104
/// Note the two whitespace events between the key and value pair! Those two
105
/// events actually refer to the whitespace between the name and value and the
106
/// equal sign. So if the config instead had `autocrlf=input`, those whitespace
107
/// events would no longer be present.
108
///
109
/// ## `KeyValueSeparator` event is not guaranteed to emit
110
///
111
/// Consider the following `git-config` example:
112
///
113
/// ```text
114
/// [core]
115
///   autocrlf
116
/// ```
117
///
118
/// This is a valid config with a `autocrlf` key having an implicit `true`
119
/// value. This means that there is not a `=` separating the key and value,
120
/// which means that the corresponding event won't appear either:
121
///
122
/// ```
123
/// # use gix_config::parse::{Event, Events, section};
124
/// # use std::borrow::Cow;
125
/// # use std::convert::TryFrom;
126
/// # let section_header = section::Header::new("core", None).unwrap();
127
/// # let section_data = "[core]\n  autocrlf";
128
/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
129
/// Event::SectionHeader(section_header),
130
/// Event::Newline(Cow::Borrowed("\n".into())),
131
/// Event::Whitespace(Cow::Borrowed("  ".into())),
132
/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
133
/// Event::Value(Cow::Borrowed("".into())),
134
/// # ]);
135
/// # Ok::<_, Box<dyn std::error::Error>>(())
136
/// ```
137
///
138
/// ## Quoted values are not unquoted
139
///
140
/// Consider the following `git-config` example:
141
///
142
/// ```text
143
/// [core]
144
/// autocrlf=true""
145
/// filemode=fa"lse"
146
/// ```
147
///
148
/// Both these events, when fully processed, should normally be `true` and
149
/// `false`. However, because this parser is zero-copy, we cannot process
150
/// partially quoted values, such as the `false` example. As a result, to
151
/// maintain consistency, the parser will just take all values as literals. The
152
/// relevant event stream emitted is thus emitted as:
153
///
154
/// ```
155
/// # use gix_config::parse::{Event, Events, section};
156
/// # use std::borrow::Cow;
157
/// # use std::convert::TryFrom;
158
/// # let section_header = section::Header::new("core", None).unwrap();
159
/// # let section_data = "[core]\nautocrlf=true\"\"\nfilemode=fa\"lse\"";
160
/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
161
/// Event::SectionHeader(section_header),
162
/// Event::Newline(Cow::Borrowed("\n".into())),
163
/// Event::SectionValueName(section::ValueName::try_from("autocrlf")?),
164
/// Event::KeyValueSeparator,
165
/// Event::Value(Cow::Borrowed(r#"true"""#.into())),
166
/// Event::Newline(Cow::Borrowed("\n".into())),
167
/// Event::SectionValueName(section::ValueName::try_from("filemode")?),
168
/// Event::KeyValueSeparator,
169
/// Event::Value(Cow::Borrowed(r#"fa"lse""#.into())),
170
/// # ]);
171
/// # Ok::<_, Box<dyn std::error::Error>>(())
172
/// ```
173
///
174
/// ## Whitespace after line continuations are part of the value
175
///
176
/// Consider the following `git-config` example:
177
///
178
/// ```text
179
/// [some-section]
180
/// file=a\
181
///     c
182
/// ```
183
///
184
/// Because how `git-config` treats continuations, the whitespace preceding `c`
185
/// are in fact part of the value of `file`. The fully interpreted key/value
186
/// pair is actually `file=a    c`. As a result, the parser will provide this
187
/// split value accordingly:
188
///
189
/// ```
190
/// # use gix_config::parse::{Event, Events, section};
191
/// # use std::borrow::Cow;
192
/// # use std::convert::TryFrom;
193
/// # let section_header = section::Header::new("some-section", None).unwrap();
194
/// # let section_data = "[some-section]\nfile=a\\\n    c";
195
/// # assert_eq!(Events::from_str(section_data).unwrap().into_vec(), vec![
196
/// Event::SectionHeader(section_header),
197
/// Event::Newline(Cow::Borrowed("\n".into())),
198
/// Event::SectionValueName(section::ValueName::try_from("file")?),
199
/// Event::KeyValueSeparator,
200
/// Event::ValueNotDone(Cow::Borrowed("a".into())),
201
/// Event::Newline(Cow::Borrowed("\n".into())),
202
/// Event::ValueDone(Cow::Borrowed("    c".into())),
203
/// # ]);
204
/// # Ok::<_, Box<dyn std::error::Error>>(())
205
/// ```
206
///
207
/// [`File`]: crate::File
208
/// [`.ini` file format]: https://en.wikipedia.org/wiki/INI_file
209
/// [`git`'s documentation]: https://git-scm.com/docs/git-config#_configuration_file
210
/// [`FromStr`]: std::str::FromStr
211
/// [`From<&'_ str>`]: std::convert::From
212
#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Default)]
213
pub struct Events<'a> {
214
    /// Events seen before the first section.
215
    pub frontmatter: FrontMatterEvents<'a>,
216
    /// All parsed sections.
217
    pub sections: Vec<Section<'a>>,
218
}
219
220
impl Events<'static> {
221
    /// Parses the provided bytes, returning an [`Events`] that contains allocated
222
    /// and owned events. This is similar to [`Events::from_bytes()`], but performance
223
    /// is degraded as it requires allocation for every event.
224
    ///
225
    /// Use `filter` to only include those events for which it returns true.
226
0
    pub fn from_bytes_owned<'a>(
227
0
        input: &'a [u8],
228
0
        filter: Option<fn(&Event<'a>) -> bool>,
229
0
    ) -> Result<Events<'static>, parse::Error> {
230
0
        from_bytes(input, &|e| e.to_owned(), filter)
231
0
    }
232
}
233
234
impl<'a> Events<'a> {
235
    /// Attempt to zero-copy parse the provided bytes. On success, returns a
236
    /// [`Events`] that provides methods to accessing leading comments and sections
237
    /// of a `git-config` file and can be converted into an iterator of [`Event`]
238
    /// for higher level processing.
239
    ///
240
    /// Use `filter` to only include those events for which it returns true.
241
17.4k
    pub fn from_bytes(input: &'a [u8], filter: Option<fn(&Event<'a>) -> bool>) -> Result<Events<'a>, parse::Error> {
242
17.4k
        from_bytes(input, &std::convert::identity, filter)
243
17.4k
    }
244
245
    /// Attempt to zero-copy parse the provided `input` string.
246
    ///
247
    /// Prefer the [`from_bytes()`][Self::from_bytes()] method if UTF8 encoding
248
    /// isn't guaranteed.
249
    #[allow(clippy::should_implement_trait)]
250
6.71k
    pub fn from_str(input: &'a str) -> Result<Events<'a>, parse::Error> {
251
6.71k
        Self::from_bytes(input.as_bytes(), None)
252
6.71k
    }
253
254
    /// Consumes the parser to produce an iterator of all contained events.
255
    #[must_use = "iterators are lazy and do nothing unless consumed"]
256
    #[allow(clippy::should_implement_trait)]
257
0
    pub fn into_iter(self) -> impl std::iter::FusedIterator<Item = parse::Event<'a>> {
258
0
        self.frontmatter.into_iter().chain(
259
0
            self.sections
260
0
                .into_iter()
261
0
                .flat_map(|section| std::iter::once(parse::Event::SectionHeader(section.header)).chain(section.events)),
262
        )
263
0
    }
264
265
    /// Place all contained events into a single `Vec`.
266
0
    pub fn into_vec(self) -> Vec<parse::Event<'a>> {
267
0
        self.into_iter().collect()
268
0
    }
269
}
270
271
impl<'a> TryFrom<&'a str> for Events<'a> {
272
    type Error = parse::Error;
273
274
3.35k
    fn try_from(value: &'a str) -> Result<Self, Self::Error> {
275
3.35k
        Self::from_str(value)
276
3.35k
    }
277
}
278
279
impl<'a> TryFrom<&'a [u8]> for Events<'a> {
280
    type Error = parse::Error;
281
282
0
    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
283
0
        Events::from_bytes(value, None)
284
0
    }
285
}
286
287
17.4k
fn from_bytes<'a, 'b>(
288
17.4k
    input: &'a [u8],
289
17.4k
    convert: &dyn Fn(Event<'a>) -> Event<'b>,
290
17.4k
    filter: Option<fn(&Event<'a>) -> bool>,
291
17.4k
) -> Result<Events<'b>, parse::Error> {
292
17.4k
    let mut header = None;
293
17.4k
    let mut events = Vec::with_capacity(256);
294
17.4k
    let mut frontmatter = FrontMatterEvents::default();
295
17.4k
    let mut sections = Vec::new();
296
128M
    parse::from_bytes(input, &mut |e: Event<'_>| match e {
297
14.8M
        Event::SectionHeader(next_header) => {
298
14.8M
            match header.take() {
299
11.3k
                None => {
300
11.3k
                    frontmatter = std::mem::take(&mut events).into_iter().collect();
301
11.3k
                }
302
14.8M
                Some(prev_header) => {
303
14.8M
                    sections.push(parse::Section {
304
14.8M
                        header: prev_header,
305
14.8M
                        events: std::mem::take(&mut events),
306
14.8M
                    });
307
14.8M
                }
308
            }
309
14.8M
            header = match convert(Event::SectionHeader(next_header)) {
310
14.8M
                Event::SectionHeader(h) => h,
311
0
                _ => unreachable!("BUG: convert must not change the event type, just the lifetime"),
312
            }
313
14.8M
            .into();
314
        }
315
113M
        event => {
316
113M
            if filter.map_or(true, |f| f(&event)) {
317
113M
                events.push(convert(event));
318
113M
            }
319
        }
320
128M
    })?;
321
322
15.4k
    match header {
323
5.07k
        None => {
324
5.07k
            frontmatter = events.into_iter().collect();
325
5.07k
        }
326
10.4k
        Some(prev_header) => {
327
10.4k
            sections.push(parse::Section {
328
10.4k
                header: prev_header,
329
10.4k
                events: std::mem::take(&mut events),
330
10.4k
            });
331
10.4k
        }
332
    }
333
15.4k
    Ok(Events { frontmatter, sections })
334
17.4k
}