/rust/registry/src/index.crates.io-6f17d22bba15001f/regex-syntax-0.6.29/src/hir/interval.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::char; |
2 | | use std::cmp; |
3 | | use std::fmt::Debug; |
4 | | use std::slice; |
5 | | use std::u8; |
6 | | |
7 | | use crate::unicode; |
8 | | |
9 | | // This module contains an *internal* implementation of interval sets. |
10 | | // |
11 | | // The primary invariant that interval sets guards is canonical ordering. That |
12 | | // is, every interval set contains an ordered sequence of intervals where |
13 | | // no two intervals are overlapping or adjacent. While this invariant is |
14 | | // occasionally broken within the implementation, it should be impossible for |
15 | | // callers to observe it. |
16 | | // |
17 | | // Since case folding (as implemented below) breaks that invariant, we roll |
18 | | // that into this API even though it is a little out of place in an otherwise |
19 | | // generic interval set. (Hence the reason why the `unicode` module is imported |
20 | | // here.) |
21 | | // |
22 | | // Some of the implementation complexity here is a result of me wanting to |
23 | | // preserve the sequential representation without using additional memory. |
24 | | // In many cases, we do use linear extra memory, but it is at most 2x and it |
25 | | // is amortized. If we relaxed the memory requirements, this implementation |
26 | | // could become much simpler. The extra memory is honestly probably OK, but |
27 | | // character classes (especially of the Unicode variety) can become quite |
28 | | // large, and it would be nice to keep regex compilation snappy even in debug |
29 | | // builds. (In the past, I have been careless with this area of code and it has |
30 | | // caused slow regex compilations in debug mode, so this isn't entirely |
31 | | // unwarranted.) |
32 | | // |
33 | | // Tests on this are relegated to the public API of HIR in src/hir.rs. |
34 | | |
35 | | #[derive(Clone, Debug, Eq, PartialEq)] |
36 | | pub struct IntervalSet<I> { |
37 | | ranges: Vec<I>, |
38 | | } |
39 | | |
40 | | impl<I: Interval> IntervalSet<I> { |
41 | | /// Create a new set from a sequence of intervals. Each interval is |
42 | | /// specified as a pair of bounds, where both bounds are inclusive. |
43 | | /// |
44 | | /// The given ranges do not need to be in any specific order, and ranges |
45 | | /// may overlap. |
46 | 0 | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { |
47 | 0 | let mut set = IntervalSet { ranges: intervals.into_iter().collect() }; |
48 | 0 | set.canonicalize(); |
49 | 0 | set |
50 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<alloc::vec::Vec<regex_syntax::hir::ClassBytesRange>> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<(char, char)>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_byte_class::{closure#0}>> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<alloc::vec::Vec<regex_syntax::hir::ClassUnicodeRange>> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<(char, char)>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_unicode_class::{closure#0}>> |
51 | | |
52 | | /// Add a new interval to this set. |
53 | 0 | pub fn push(&mut self, interval: I) { |
54 | 0 | // TODO: This could be faster. e.g., Push the interval such that |
55 | 0 | // it preserves canonicalization. |
56 | 0 | self.ranges.push(interval); |
57 | 0 | self.canonicalize(); |
58 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::push Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::push |
59 | | |
60 | | /// Return an iterator over all intervals in this set. |
61 | | /// |
62 | | /// The iterator yields intervals in ascending order. |
63 | 0 | pub fn iter(&self) -> IntervalSetIter<'_, I> { |
64 | 0 | IntervalSetIter(self.ranges.iter()) |
65 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::iter Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::iter |
66 | | |
67 | | /// Return an immutable slice of intervals in this set. |
68 | | /// |
69 | | /// The sequence returned is in canonical ordering. |
70 | 0 | pub fn intervals(&self) -> &[I] { |
71 | 0 | &self.ranges |
72 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::intervals Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::intervals |
73 | | |
74 | | /// Expand this interval set such that it contains all case folded |
75 | | /// characters. For example, if this class consists of the range `a-z`, |
76 | | /// then applying case folding will result in the class containing both the |
77 | | /// ranges `a-z` and `A-Z`. |
78 | | /// |
79 | | /// This returns an error if the necessary case mapping data is not |
80 | | /// available. |
81 | 0 | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { |
82 | 0 | let len = self.ranges.len(); |
83 | 0 | for i in 0..len { |
84 | 0 | let range = self.ranges[i]; |
85 | 0 | if let Err(err) = range.case_fold_simple(&mut self.ranges) { |
86 | 0 | self.canonicalize(); |
87 | 0 | return Err(err); |
88 | 0 | } |
89 | | } |
90 | 0 | self.canonicalize(); |
91 | 0 | Ok(()) |
92 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::case_fold_simple Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::case_fold_simple |
93 | | |
94 | | /// Union this set with the given set, in place. |
95 | 0 | pub fn union(&mut self, other: &IntervalSet<I>) { |
96 | 0 | // This could almost certainly be done more efficiently. |
97 | 0 | self.ranges.extend(&other.ranges); |
98 | 0 | self.canonicalize(); |
99 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::union Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::union |
100 | | |
101 | | /// Intersect this set with the given set, in place. |
102 | 0 | pub fn intersect(&mut self, other: &IntervalSet<I>) { |
103 | 0 | if self.ranges.is_empty() { |
104 | 0 | return; |
105 | 0 | } |
106 | 0 | if other.ranges.is_empty() { |
107 | 0 | self.ranges.clear(); |
108 | 0 | return; |
109 | 0 | } |
110 | 0 |
|
111 | 0 | // There should be a way to do this in-place with constant memory, |
112 | 0 | // but I couldn't figure out a simple way to do it. So just append |
113 | 0 | // the intersection to the end of this range, and then drain it before |
114 | 0 | // we're done. |
115 | 0 | let drain_end = self.ranges.len(); |
116 | 0 |
|
117 | 0 | let mut ita = 0..drain_end; |
118 | 0 | let mut itb = 0..other.ranges.len(); |
119 | 0 | let mut a = ita.next().unwrap(); |
120 | 0 | let mut b = itb.next().unwrap(); |
121 | | loop { |
122 | 0 | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { |
123 | 0 | self.ranges.push(ab); |
124 | 0 | } |
125 | 0 | let (it, aorb) = |
126 | 0 | if self.ranges[a].upper() < other.ranges[b].upper() { |
127 | 0 | (&mut ita, &mut a) |
128 | | } else { |
129 | 0 | (&mut itb, &mut b) |
130 | | }; |
131 | 0 | match it.next() { |
132 | 0 | Some(v) => *aorb = v, |
133 | 0 | None => break, |
134 | 0 | } |
135 | 0 | } |
136 | 0 | self.ranges.drain(..drain_end); |
137 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::intersect Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::intersect |
138 | | |
139 | | /// Subtract the given set from this set, in place. |
140 | 0 | pub fn difference(&mut self, other: &IntervalSet<I>) { |
141 | 0 | if self.ranges.is_empty() || other.ranges.is_empty() { |
142 | 0 | return; |
143 | 0 | } |
144 | 0 |
|
145 | 0 | // This algorithm is (to me) surprisingly complex. A search of the |
146 | 0 | // interwebs indicate that this is a potentially interesting problem. |
147 | 0 | // Folks seem to suggest interval or segment trees, but I'd like to |
148 | 0 | // avoid the overhead (both runtime and conceptual) of that. |
149 | 0 | // |
150 | 0 | // The following is basically my Shitty First Draft. Therefore, in |
151 | 0 | // order to grok it, you probably need to read each line carefully. |
152 | 0 | // Simplifications are most welcome! |
153 | 0 | // |
154 | 0 | // Remember, we can assume the canonical format invariant here, which |
155 | 0 | // says that all ranges are sorted, not overlapping and not adjacent in |
156 | 0 | // each class. |
157 | 0 | let drain_end = self.ranges.len(); |
158 | 0 | let (mut a, mut b) = (0, 0); |
159 | 0 | 'LOOP: while a < drain_end && b < other.ranges.len() { |
160 | | // Basically, the easy cases are when neither range overlaps with |
161 | | // each other. If the `b` range is less than our current `a` |
162 | | // range, then we can skip it and move on. |
163 | 0 | if other.ranges[b].upper() < self.ranges[a].lower() { |
164 | 0 | b += 1; |
165 | 0 | continue; |
166 | 0 | } |
167 | 0 | // ... similarly for the `a` range. If it's less than the smallest |
168 | 0 | // `b` range, then we can add it as-is. |
169 | 0 | if self.ranges[a].upper() < other.ranges[b].lower() { |
170 | 0 | let range = self.ranges[a]; |
171 | 0 | self.ranges.push(range); |
172 | 0 | a += 1; |
173 | 0 | continue; |
174 | 0 | } |
175 | 0 | // Otherwise, we have overlapping ranges. |
176 | 0 | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); |
177 | | |
178 | | // This part is tricky and was non-obvious to me without looking |
179 | | // at explicit examples (see the tests). The trickiness stems from |
180 | | // two things: 1) subtracting a range from another range could |
181 | | // yield two ranges and 2) after subtracting a range, it's possible |
182 | | // that future ranges can have an impact. The loop below advances |
183 | | // the `b` ranges until they can't possible impact the current |
184 | | // range. |
185 | | // |
186 | | // For example, if our `a` range is `a-t` and our next three `b` |
187 | | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply |
188 | | // subtraction three times before moving on to the next `a` range. |
189 | 0 | let mut range = self.ranges[a]; |
190 | 0 | while b < other.ranges.len() |
191 | 0 | && !range.is_intersection_empty(&other.ranges[b]) |
192 | | { |
193 | 0 | let old_range = range; |
194 | 0 | range = match range.difference(&other.ranges[b]) { |
195 | | (None, None) => { |
196 | | // We lost the entire range, so move on to the next |
197 | | // without adding this one. |
198 | 0 | a += 1; |
199 | 0 | continue 'LOOP; |
200 | | } |
201 | 0 | (Some(range1), None) | (None, Some(range1)) => range1, |
202 | 0 | (Some(range1), Some(range2)) => { |
203 | 0 | self.ranges.push(range1); |
204 | 0 | range2 |
205 | | } |
206 | | }; |
207 | | // It's possible that the `b` range has more to contribute |
208 | | // here. In particular, if it is greater than the original |
209 | | // range, then it might impact the next `a` range *and* it |
210 | | // has impacted the current `a` range as much as possible, |
211 | | // so we can quit. We don't bump `b` so that the next `a` |
212 | | // range can apply it. |
213 | 0 | if other.ranges[b].upper() > old_range.upper() { |
214 | 0 | break; |
215 | 0 | } |
216 | 0 | // Otherwise, the next `b` range might apply to the current |
217 | 0 | // `a` range. |
218 | 0 | b += 1; |
219 | | } |
220 | 0 | self.ranges.push(range); |
221 | 0 | a += 1; |
222 | | } |
223 | 0 | while a < drain_end { |
224 | 0 | let range = self.ranges[a]; |
225 | 0 | self.ranges.push(range); |
226 | 0 | a += 1; |
227 | 0 | } |
228 | 0 | self.ranges.drain(..drain_end); |
229 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::difference Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::difference |
230 | | |
231 | | /// Compute the symmetric difference of the two sets, in place. |
232 | | /// |
233 | | /// This computes the symmetric difference of two interval sets. This |
234 | | /// removes all elements in this set that are also in the given set, |
235 | | /// but also adds all elements from the given set that aren't in this |
236 | | /// set. That is, the set will contain all elements in either set, |
237 | | /// but will not contain any elements that are in both sets. |
238 | 0 | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { |
239 | 0 | // TODO(burntsushi): Fix this so that it amortizes allocation. |
240 | 0 | let mut intersection = self.clone(); |
241 | 0 | intersection.intersect(other); |
242 | 0 | self.union(other); |
243 | 0 | self.difference(&intersection); |
244 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::symmetric_difference Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::symmetric_difference |
245 | | |
246 | | /// Negate this interval set. |
247 | | /// |
248 | | /// For all `x` where `x` is any element, if `x` was in this set, then it |
249 | | /// will not be in this set after negation. |
250 | 0 | pub fn negate(&mut self) { |
251 | 0 | if self.ranges.is_empty() { |
252 | 0 | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); |
253 | 0 | self.ranges.push(I::create(min, max)); |
254 | 0 | return; |
255 | 0 | } |
256 | 0 |
|
257 | 0 | // There should be a way to do this in-place with constant memory, |
258 | 0 | // but I couldn't figure out a simple way to do it. So just append |
259 | 0 | // the negation to the end of this range, and then drain it before |
260 | 0 | // we're done. |
261 | 0 | let drain_end = self.ranges.len(); |
262 | 0 |
|
263 | 0 | // We do checked arithmetic below because of the canonical ordering |
264 | 0 | // invariant. |
265 | 0 | if self.ranges[0].lower() > I::Bound::min_value() { |
266 | 0 | let upper = self.ranges[0].lower().decrement(); |
267 | 0 | self.ranges.push(I::create(I::Bound::min_value(), upper)); |
268 | 0 | } |
269 | 0 | for i in 1..drain_end { |
270 | 0 | let lower = self.ranges[i - 1].upper().increment(); |
271 | 0 | let upper = self.ranges[i].lower().decrement(); |
272 | 0 | self.ranges.push(I::create(lower, upper)); |
273 | 0 | } |
274 | 0 | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { |
275 | 0 | let lower = self.ranges[drain_end - 1].upper().increment(); |
276 | 0 | self.ranges.push(I::create(lower, I::Bound::max_value())); |
277 | 0 | } |
278 | 0 | self.ranges.drain(..drain_end); |
279 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::negate Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::negate |
280 | | |
281 | | /// Converts this set into a canonical ordering. |
282 | 0 | fn canonicalize(&mut self) { |
283 | 0 | if self.is_canonical() { |
284 | 0 | return; |
285 | 0 | } |
286 | 0 | self.ranges.sort(); |
287 | 0 | assert!(!self.ranges.is_empty()); |
288 | | |
289 | | // Is there a way to do this in-place with constant memory? I couldn't |
290 | | // figure out a way to do it. So just append the canonicalization to |
291 | | // the end of this range, and then drain it before we're done. |
292 | 0 | let drain_end = self.ranges.len(); |
293 | 0 | for oldi in 0..drain_end { |
294 | | // If we've added at least one new range, then check if we can |
295 | | // merge this range in the previously added range. |
296 | 0 | if self.ranges.len() > drain_end { |
297 | 0 | let (last, rest) = self.ranges.split_last_mut().unwrap(); |
298 | 0 | if let Some(union) = last.union(&rest[oldi]) { |
299 | 0 | *last = union; |
300 | 0 | continue; |
301 | 0 | } |
302 | 0 | } |
303 | 0 | let range = self.ranges[oldi]; |
304 | 0 | self.ranges.push(range); |
305 | | } |
306 | 0 | self.ranges.drain(..drain_end); |
307 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::canonicalize Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::canonicalize |
308 | | |
309 | | /// Returns true if and only if this class is in a canonical ordering. |
310 | 0 | fn is_canonical(&self) -> bool { |
311 | 0 | for pair in self.ranges.windows(2) { |
312 | 0 | if pair[0] >= pair[1] { |
313 | 0 | return false; |
314 | 0 | } |
315 | 0 | if pair[0].is_contiguous(&pair[1]) { |
316 | 0 | return false; |
317 | 0 | } |
318 | | } |
319 | 0 | true |
320 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::is_canonical Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::is_canonical |
321 | | } |
322 | | |
323 | | /// An iterator over intervals. |
324 | | #[derive(Debug)] |
325 | | pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>); |
326 | | |
327 | | impl<'a, I> Iterator for IntervalSetIter<'a, I> { |
328 | | type Item = &'a I; |
329 | | |
330 | 0 | fn next(&mut self) -> Option<&'a I> { |
331 | 0 | self.0.next() |
332 | 0 | } Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSetIter<regex_syntax::hir::ClassBytesRange> as core::iter::traits::iterator::Iterator>::next Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSetIter<regex_syntax::hir::ClassUnicodeRange> as core::iter::traits::iterator::Iterator>::next |
333 | | } |
334 | | |
335 | | pub trait Interval: |
336 | | Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord |
337 | | { |
338 | | type Bound: Bound; |
339 | | |
340 | | fn lower(&self) -> Self::Bound; |
341 | | fn upper(&self) -> Self::Bound; |
342 | | fn set_lower(&mut self, bound: Self::Bound); |
343 | | fn set_upper(&mut self, bound: Self::Bound); |
344 | | fn case_fold_simple( |
345 | | &self, |
346 | | intervals: &mut Vec<Self>, |
347 | | ) -> Result<(), unicode::CaseFoldError>; |
348 | | |
349 | | /// Create a new interval. |
350 | 0 | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { |
351 | 0 | let mut int = Self::default(); |
352 | 0 | if lower <= upper { |
353 | 0 | int.set_lower(lower); |
354 | 0 | int.set_upper(upper); |
355 | 0 | } else { |
356 | 0 | int.set_lower(upper); |
357 | 0 | int.set_upper(lower); |
358 | 0 | } |
359 | 0 | int |
360 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::create Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::create |
361 | | |
362 | | /// Union the given overlapping range into this range. |
363 | | /// |
364 | | /// If the two ranges aren't contiguous, then this returns `None`. |
365 | 0 | fn union(&self, other: &Self) -> Option<Self> { |
366 | 0 | if !self.is_contiguous(other) { |
367 | 0 | return None; |
368 | 0 | } |
369 | 0 | let lower = cmp::min(self.lower(), other.lower()); |
370 | 0 | let upper = cmp::max(self.upper(), other.upper()); |
371 | 0 | Some(Self::create(lower, upper)) |
372 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::union Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::union |
373 | | |
374 | | /// Intersect this range with the given range and return the result. |
375 | | /// |
376 | | /// If the intersection is empty, then this returns `None`. |
377 | 0 | fn intersect(&self, other: &Self) -> Option<Self> { |
378 | 0 | let lower = cmp::max(self.lower(), other.lower()); |
379 | 0 | let upper = cmp::min(self.upper(), other.upper()); |
380 | 0 | if lower <= upper { |
381 | 0 | Some(Self::create(lower, upper)) |
382 | | } else { |
383 | 0 | None |
384 | | } |
385 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::intersect Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::intersect |
386 | | |
387 | | /// Subtract the given range from this range and return the resulting |
388 | | /// ranges. |
389 | | /// |
390 | | /// If subtraction would result in an empty range, then no ranges are |
391 | | /// returned. |
392 | 0 | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { |
393 | 0 | if self.is_subset(other) { |
394 | 0 | return (None, None); |
395 | 0 | } |
396 | 0 | if self.is_intersection_empty(other) { |
397 | 0 | return (Some(self.clone()), None); |
398 | 0 | } |
399 | 0 | let add_lower = other.lower() > self.lower(); |
400 | 0 | let add_upper = other.upper() < self.upper(); |
401 | 0 | // We know this because !self.is_subset(other) and the ranges have |
402 | 0 | // a non-empty intersection. |
403 | 0 | assert!(add_lower || add_upper); |
404 | 0 | let mut ret = (None, None); |
405 | 0 | if add_lower { |
406 | 0 | let upper = other.lower().decrement(); |
407 | 0 | ret.0 = Some(Self::create(self.lower(), upper)); |
408 | 0 | } |
409 | 0 | if add_upper { |
410 | 0 | let lower = other.upper().increment(); |
411 | 0 | let range = Self::create(lower, self.upper()); |
412 | 0 | if ret.0.is_none() { |
413 | 0 | ret.0 = Some(range); |
414 | 0 | } else { |
415 | 0 | ret.1 = Some(range); |
416 | 0 | } |
417 | 0 | } |
418 | 0 | ret |
419 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::difference Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::difference |
420 | | |
421 | | /// Compute the symmetric difference the given range from this range. This |
422 | | /// returns the union of the two ranges minus its intersection. |
423 | 0 | fn symmetric_difference( |
424 | 0 | &self, |
425 | 0 | other: &Self, |
426 | 0 | ) -> (Option<Self>, Option<Self>) { |
427 | 0 | let union = match self.union(other) { |
428 | 0 | None => return (Some(self.clone()), Some(other.clone())), |
429 | 0 | Some(union) => union, |
430 | | }; |
431 | 0 | let intersection = match self.intersect(other) { |
432 | 0 | None => return (Some(self.clone()), Some(other.clone())), |
433 | 0 | Some(intersection) => intersection, |
434 | 0 | }; |
435 | 0 | union.difference(&intersection) |
436 | 0 | } |
437 | | |
438 | | /// Returns true if and only if the two ranges are contiguous. Two ranges |
439 | | /// are contiguous if and only if the ranges are either overlapping or |
440 | | /// adjacent. |
441 | 0 | fn is_contiguous(&self, other: &Self) -> bool { |
442 | 0 | let lower1 = self.lower().as_u32(); |
443 | 0 | let upper1 = self.upper().as_u32(); |
444 | 0 | let lower2 = other.lower().as_u32(); |
445 | 0 | let upper2 = other.upper().as_u32(); |
446 | 0 | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) |
447 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_contiguous Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_contiguous |
448 | | |
449 | | /// Returns true if and only if the intersection of this range and the |
450 | | /// other range is empty. |
451 | 0 | fn is_intersection_empty(&self, other: &Self) -> bool { |
452 | 0 | let (lower1, upper1) = (self.lower(), self.upper()); |
453 | 0 | let (lower2, upper2) = (other.lower(), other.upper()); |
454 | 0 | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) |
455 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_intersection_empty Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_intersection_empty |
456 | | |
457 | | /// Returns true if and only if this range is a subset of the other range. |
458 | 0 | fn is_subset(&self, other: &Self) -> bool { |
459 | 0 | let (lower1, upper1) = (self.lower(), self.upper()); |
460 | 0 | let (lower2, upper2) = (other.lower(), other.upper()); |
461 | 0 | (lower2 <= lower1 && lower1 <= upper2) |
462 | 0 | && (lower2 <= upper1 && upper1 <= upper2) |
463 | 0 | } Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_subset Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_subset |
464 | | } |
465 | | |
466 | | pub trait Bound: |
467 | | Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord |
468 | | { |
469 | | fn min_value() -> Self; |
470 | | fn max_value() -> Self; |
471 | | fn as_u32(self) -> u32; |
472 | | fn increment(self) -> Self; |
473 | | fn decrement(self) -> Self; |
474 | | } |
475 | | |
476 | | impl Bound for u8 { |
477 | 0 | fn min_value() -> Self { |
478 | 0 | u8::MIN |
479 | 0 | } |
480 | 0 | fn max_value() -> Self { |
481 | 0 | u8::MAX |
482 | 0 | } |
483 | 0 | fn as_u32(self) -> u32 { |
484 | 0 | self as u32 |
485 | 0 | } |
486 | 0 | fn increment(self) -> Self { |
487 | 0 | self.checked_add(1).unwrap() |
488 | 0 | } |
489 | 0 | fn decrement(self) -> Self { |
490 | 0 | self.checked_sub(1).unwrap() |
491 | 0 | } |
492 | | } |
493 | | |
494 | | impl Bound for char { |
495 | 0 | fn min_value() -> Self { |
496 | 0 | '\x00' |
497 | 0 | } |
498 | 0 | fn max_value() -> Self { |
499 | 0 | '\u{10FFFF}' |
500 | 0 | } |
501 | 0 | fn as_u32(self) -> u32 { |
502 | 0 | self as u32 |
503 | 0 | } |
504 | | |
505 | 0 | fn increment(self) -> Self { |
506 | 0 | match self { |
507 | 0 | '\u{D7FF}' => '\u{E000}', |
508 | 0 | c => char::from_u32((c as u32).checked_add(1).unwrap()).unwrap(), |
509 | | } |
510 | 0 | } |
511 | | |
512 | 0 | fn decrement(self) -> Self { |
513 | 0 | match self { |
514 | 0 | '\u{E000}' => '\u{D7FF}', |
515 | 0 | c => char::from_u32((c as u32).checked_sub(1).unwrap()).unwrap(), |
516 | | } |
517 | 0 | } |
518 | | } |
519 | | |
520 | | // Tests for interval sets are written in src/hir.rs against the public API. |