/rust/registry/src/index.crates.io-1949cf8c6b5b557f/regex-1.5.6/src/expand.rs
Line | Count | Source |
1 | | use std::str; |
2 | | |
3 | | use crate::find_byte::find_byte; |
4 | | |
5 | | use crate::re_bytes; |
6 | | use crate::re_unicode; |
7 | | |
8 | 0 | pub fn expand_str( |
9 | 0 | caps: &re_unicode::Captures<'_>, |
10 | 0 | mut replacement: &str, |
11 | 0 | dst: &mut String, |
12 | 0 | ) { |
13 | 0 | while !replacement.is_empty() { |
14 | 0 | match find_byte(b'$', replacement.as_bytes()) { |
15 | 0 | None => break, |
16 | 0 | Some(i) => { |
17 | 0 | dst.push_str(&replacement[..i]); |
18 | 0 | replacement = &replacement[i..]; |
19 | 0 | } |
20 | | } |
21 | 0 | if replacement.as_bytes().get(1).map_or(false, |&b| b == b'$') {Unexecuted instantiation: regex::expand::expand_str::{closure#0}Unexecuted instantiation: regex::expand::expand_str::{closure#0} |
22 | 0 | dst.push_str("$"); |
23 | 0 | replacement = &replacement[2..]; |
24 | 0 | continue; |
25 | 0 | } |
26 | 0 | debug_assert!(!replacement.is_empty()); |
27 | 0 | let cap_ref = match find_cap_ref(replacement.as_bytes()) { |
28 | 0 | Some(cap_ref) => cap_ref, |
29 | | None => { |
30 | 0 | dst.push_str("$"); |
31 | 0 | replacement = &replacement[1..]; |
32 | 0 | continue; |
33 | | } |
34 | | }; |
35 | 0 | replacement = &replacement[cap_ref.end..]; |
36 | 0 | match cap_ref.cap { |
37 | 0 | Ref::Number(i) => { |
38 | 0 | dst.push_str(caps.get(i).map(|m| m.as_str()).unwrap_or(""));Unexecuted instantiation: regex::expand::expand_str::{closure#1}Unexecuted instantiation: regex::expand::expand_str::{closure#1} |
39 | | } |
40 | 0 | Ref::Named(name) => { |
41 | 0 | dst.push_str( |
42 | 0 | caps.name(name).map(|m| m.as_str()).unwrap_or(""),Unexecuted instantiation: regex::expand::expand_str::{closure#2}Unexecuted instantiation: regex::expand::expand_str::{closure#2} |
43 | | ); |
44 | | } |
45 | | } |
46 | | } |
47 | 0 | dst.push_str(replacement); |
48 | 0 | } Unexecuted instantiation: regex::expand::expand_str Unexecuted instantiation: regex::expand::expand_str |
49 | | |
50 | 0 | pub fn expand_bytes( |
51 | 0 | caps: &re_bytes::Captures<'_>, |
52 | 0 | mut replacement: &[u8], |
53 | 0 | dst: &mut Vec<u8>, |
54 | 0 | ) { |
55 | 0 | while !replacement.is_empty() { |
56 | 0 | match find_byte(b'$', replacement) { |
57 | 0 | None => break, |
58 | 0 | Some(i) => { |
59 | 0 | dst.extend(&replacement[..i]); |
60 | 0 | replacement = &replacement[i..]; |
61 | 0 | } |
62 | | } |
63 | 0 | if replacement.get(1).map_or(false, |&b| b == b'$') {Unexecuted instantiation: regex::expand::expand_bytes::{closure#0}Unexecuted instantiation: regex::expand::expand_bytes::{closure#0} |
64 | 0 | dst.push(b'$'); |
65 | 0 | replacement = &replacement[2..]; |
66 | 0 | continue; |
67 | 0 | } |
68 | 0 | debug_assert!(!replacement.is_empty()); |
69 | 0 | let cap_ref = match find_cap_ref(replacement) { |
70 | 0 | Some(cap_ref) => cap_ref, |
71 | | None => { |
72 | 0 | dst.push(b'$'); |
73 | 0 | replacement = &replacement[1..]; |
74 | 0 | continue; |
75 | | } |
76 | | }; |
77 | 0 | replacement = &replacement[cap_ref.end..]; |
78 | 0 | match cap_ref.cap { |
79 | 0 | Ref::Number(i) => { |
80 | 0 | dst.extend(caps.get(i).map(|m| m.as_bytes()).unwrap_or(b"")); Unexecuted instantiation: regex::expand::expand_bytes::{closure#1}Unexecuted instantiation: regex::expand::expand_bytes::{closure#1} |
81 | | } |
82 | 0 | Ref::Named(name) => { |
83 | 0 | dst.extend( |
84 | 0 | caps.name(name).map(|m| m.as_bytes()).unwrap_or(b""), Unexecuted instantiation: regex::expand::expand_bytes::{closure#2}Unexecuted instantiation: regex::expand::expand_bytes::{closure#2} |
85 | | ); |
86 | | } |
87 | | } |
88 | | } |
89 | 0 | dst.extend(replacement); |
90 | 0 | } Unexecuted instantiation: regex::expand::expand_bytes Unexecuted instantiation: regex::expand::expand_bytes |
91 | | |
92 | | /// `CaptureRef` represents a reference to a capture group inside some text. |
93 | | /// The reference is either a capture group name or a number. |
94 | | /// |
95 | | /// It is also tagged with the position in the text following the |
96 | | /// capture reference. |
97 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
98 | | struct CaptureRef<'a> { |
99 | | cap: Ref<'a>, |
100 | | end: usize, |
101 | | } |
102 | | |
103 | | /// A reference to a capture group in some text. |
104 | | /// |
105 | | /// e.g., `$2`, `$foo`, `${foo}`. |
106 | | #[derive(Clone, Copy, Debug, Eq, PartialEq)] |
107 | | enum Ref<'a> { |
108 | | Named(&'a str), |
109 | | Number(usize), |
110 | | } |
111 | | |
112 | | impl<'a> From<&'a str> for Ref<'a> { |
113 | 0 | fn from(x: &'a str) -> Ref<'a> { |
114 | 0 | Ref::Named(x) |
115 | 0 | } Unexecuted instantiation: <regex::expand::Ref as core::convert::From<&str>>::from Unexecuted instantiation: <regex::expand::Ref as core::convert::From<&str>>::from |
116 | | } |
117 | | |
118 | | impl From<usize> for Ref<'static> { |
119 | 0 | fn from(x: usize) -> Ref<'static> { |
120 | 0 | Ref::Number(x) |
121 | 0 | } Unexecuted instantiation: <regex::expand::Ref as core::convert::From<usize>>::from Unexecuted instantiation: <regex::expand::Ref as core::convert::From<usize>>::from |
122 | | } |
123 | | |
124 | | /// Parses a possible reference to a capture group name in the given text, |
125 | | /// starting at the beginning of `replacement`. |
126 | | /// |
127 | | /// If no such valid reference could be found, None is returned. |
128 | 0 | fn find_cap_ref(replacement: &[u8]) -> Option<CaptureRef<'_>> { |
129 | 0 | let mut i = 0; |
130 | 0 | let rep: &[u8] = replacement.as_ref(); |
131 | 0 | if rep.len() <= 1 || rep[0] != b'$' { |
132 | 0 | return None; |
133 | 0 | } |
134 | 0 | i += 1; |
135 | 0 | if rep[i] == b'{' { |
136 | 0 | return find_cap_ref_braced(rep, i + 1); |
137 | 0 | } |
138 | 0 | let mut cap_end = i; |
139 | 0 | while rep.get(cap_end).map_or(false, is_valid_cap_letter) { |
140 | 0 | cap_end += 1; |
141 | 0 | } |
142 | 0 | if cap_end == i { |
143 | 0 | return None; |
144 | 0 | } |
145 | | // We just verified that the range 0..cap_end is valid ASCII, so it must |
146 | | // therefore be valid UTF-8. If we really cared, we could avoid this UTF-8 |
147 | | // check via an unchecked conversion or by parsing the number straight from |
148 | | // &[u8]. |
149 | 0 | let cap = |
150 | 0 | str::from_utf8(&rep[i..cap_end]).expect("valid UTF-8 capture name"); |
151 | | Some(CaptureRef { |
152 | 0 | cap: match cap.parse::<u32>() { |
153 | 0 | Ok(i) => Ref::Number(i as usize), |
154 | 0 | Err(_) => Ref::Named(cap), |
155 | | }, |
156 | 0 | end: cap_end, |
157 | | }) |
158 | 0 | } Unexecuted instantiation: regex::expand::find_cap_ref Unexecuted instantiation: regex::expand::find_cap_ref |
159 | | |
160 | 0 | fn find_cap_ref_braced(rep: &[u8], mut i: usize) -> Option<CaptureRef<'_>> { |
161 | 0 | let start = i; |
162 | 0 | while rep.get(i).map_or(false, |&b| b != b'}') {Unexecuted instantiation: regex::expand::find_cap_ref_braced::{closure#0}Unexecuted instantiation: regex::expand::find_cap_ref_braced::{closure#0} |
163 | 0 | i += 1; |
164 | 0 | } |
165 | 0 | if !rep.get(i).map_or(false, |&b| b == b'}') {Unexecuted instantiation: regex::expand::find_cap_ref_braced::{closure#1}Unexecuted instantiation: regex::expand::find_cap_ref_braced::{closure#1} |
166 | 0 | return None; |
167 | 0 | } |
168 | | // When looking at braced names, we don't put any restrictions on the name, |
169 | | // so it's possible it could be invalid UTF-8. But a capture group name |
170 | | // can never be invalid UTF-8, so if we have invalid UTF-8, then we can |
171 | | // safely return None. |
172 | 0 | let cap = match str::from_utf8(&rep[start..i]) { |
173 | 0 | Err(_) => return None, |
174 | 0 | Ok(cap) => cap, |
175 | | }; |
176 | | Some(CaptureRef { |
177 | 0 | cap: match cap.parse::<u32>() { |
178 | 0 | Ok(i) => Ref::Number(i as usize), |
179 | 0 | Err(_) => Ref::Named(cap), |
180 | | }, |
181 | 0 | end: i + 1, |
182 | | }) |
183 | 0 | } Unexecuted instantiation: regex::expand::find_cap_ref_braced Unexecuted instantiation: regex::expand::find_cap_ref_braced |
184 | | |
185 | | /// Returns true if and only if the given byte is allowed in a capture name. |
186 | 0 | fn is_valid_cap_letter(b: &u8) -> bool { |
187 | 0 | match *b { |
188 | 0 | b'0'..=b'9' | b'a'..=b'z' | b'A'..=b'Z' | b'_' => true, |
189 | 0 | _ => false, |
190 | | } |
191 | 0 | } Unexecuted instantiation: regex::expand::is_valid_cap_letter Unexecuted instantiation: regex::expand::is_valid_cap_letter |
192 | | |
193 | | #[cfg(test)] |
194 | | mod tests { |
195 | | use super::{find_cap_ref, CaptureRef}; |
196 | | |
197 | | macro_rules! find { |
198 | | ($name:ident, $text:expr) => { |
199 | | #[test] |
200 | | fn $name() { |
201 | | assert_eq!(None, find_cap_ref($text.as_bytes())); |
202 | | } |
203 | | }; |
204 | | ($name:ident, $text:expr, $capref:expr) => { |
205 | | #[test] |
206 | | fn $name() { |
207 | | assert_eq!(Some($capref), find_cap_ref($text.as_bytes())); |
208 | | } |
209 | | }; |
210 | | } |
211 | | |
212 | | macro_rules! c { |
213 | | ($name_or_number:expr, $pos:expr) => { |
214 | | CaptureRef { cap: $name_or_number.into(), end: $pos } |
215 | | }; |
216 | | } |
217 | | |
218 | | find!(find_cap_ref1, "$foo", c!("foo", 4)); |
219 | | find!(find_cap_ref2, "${foo}", c!("foo", 6)); |
220 | | find!(find_cap_ref3, "$0", c!(0, 2)); |
221 | | find!(find_cap_ref4, "$5", c!(5, 2)); |
222 | | find!(find_cap_ref5, "$10", c!(10, 3)); |
223 | | // See https://github.com/rust-lang/regex/pull/585 |
224 | | // for more on characters following numbers |
225 | | find!(find_cap_ref6, "$42a", c!("42a", 4)); |
226 | | find!(find_cap_ref7, "${42}a", c!(42, 5)); |
227 | | find!(find_cap_ref8, "${42"); |
228 | | find!(find_cap_ref9, "${42 "); |
229 | | find!(find_cap_ref10, " $0 "); |
230 | | find!(find_cap_ref11, "$"); |
231 | | find!(find_cap_ref12, " "); |
232 | | find!(find_cap_ref13, ""); |
233 | | find!(find_cap_ref14, "$1-$2", c!(1, 2)); |
234 | | find!(find_cap_ref15, "$1_$2", c!("1_", 3)); |
235 | | find!(find_cap_ref16, "$x-$y", c!("x", 2)); |
236 | | find!(find_cap_ref17, "$x_$y", c!("x_", 3)); |
237 | | find!(find_cap_ref18, "${#}", c!("#", 4)); |
238 | | find!(find_cap_ref19, "${Z[}", c!("Z[", 5)); |
239 | | } |