/src/regex/regex-syntax/src/hir/interval.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use core::{char, cmp, fmt::Debug, slice}; |
2 | | |
3 | | use alloc::vec::Vec; |
4 | | |
5 | | use crate::unicode; |
6 | | |
7 | | // This module contains an *internal* implementation of interval sets. |
8 | | // |
9 | | // The primary invariant that interval sets guards is canonical ordering. That |
10 | | // is, every interval set contains an ordered sequence of intervals where |
11 | | // no two intervals are overlapping or adjacent. While this invariant is |
12 | | // occasionally broken within the implementation, it should be impossible for |
13 | | // callers to observe it. |
14 | | // |
15 | | // Since case folding (as implemented below) breaks that invariant, we roll |
16 | | // that into this API even though it is a little out of place in an otherwise |
17 | | // generic interval set. (Hence the reason why the `unicode` module is imported |
18 | | // here.) |
19 | | // |
20 | | // Some of the implementation complexity here is a result of me wanting to |
21 | | // preserve the sequential representation without using additional memory. |
22 | | // In many cases, we do use linear extra memory, but it is at most 2x and it |
23 | | // is amortized. If we relaxed the memory requirements, this implementation |
24 | | // could become much simpler. The extra memory is honestly probably OK, but |
25 | | // character classes (especially of the Unicode variety) can become quite |
26 | | // large, and it would be nice to keep regex compilation snappy even in debug |
27 | | // builds. (In the past, I have been careless with this area of code and it has |
28 | | // caused slow regex compilations in debug mode, so this isn't entirely |
29 | | // unwarranted.) |
30 | | // |
31 | | // Tests on this are relegated to the public API of HIR in src/hir.rs. |
32 | | |
33 | | #[derive(Clone, Debug)] |
34 | | pub struct IntervalSet<I> { |
35 | | /// A sorted set of non-overlapping ranges. |
36 | | ranges: Vec<I>, |
37 | | /// While not required at all for correctness, we keep track of whether an |
38 | | /// interval set has been case folded or not. This helps us avoid doing |
39 | | /// redundant work if, for example, a set has already been cased folded. |
40 | | /// And note that whether a set is folded or not is preserved through |
41 | | /// all of the pairwise set operations. That is, if both interval sets |
42 | | /// have been case folded, then any of difference, union, intersection or |
43 | | /// symmetric difference all produce a case folded set. |
44 | | /// |
45 | | /// Note that when this is true, it *must* be the case that the set is case |
46 | | /// folded. But when it's false, the set *may* be case folded. In other |
47 | | /// words, we only set this to true when we know it to be case, but we're |
48 | | /// okay with it being false if it would otherwise be costly to determine |
49 | | /// whether it should be true. This means code cannot assume that a false |
50 | | /// value necessarily indicates that the set is not case folded. |
51 | | /// |
52 | | /// Bottom line: this is a performance optimization. |
53 | | folded: bool, |
54 | | } |
55 | | |
56 | | impl<I: Interval> Eq for IntervalSet<I> {} |
57 | | |
58 | | // We implement PartialEq manually so that we don't consider the set's internal |
59 | | // 'folded' property to be part of its identity. The 'folded' property is |
60 | | // strictly an optimization. |
61 | | impl<I: Interval> PartialEq for IntervalSet<I> { |
62 | 19.5k | fn eq(&self, other: &IntervalSet<I>) -> bool { |
63 | 19.5k | self.ranges.eq(&other.ranges) |
64 | 19.5k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange> as core::cmp::PartialEq>::eq Line | Count | Source | 62 | 3.85k | fn eq(&self, other: &IntervalSet<I>) -> bool { | 63 | 3.85k | self.ranges.eq(&other.ranges) | 64 | 3.85k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange> as core::cmp::PartialEq>::eq Line | Count | Source | 62 | 15.7k | fn eq(&self, other: &IntervalSet<I>) -> bool { | 63 | 15.7k | self.ranges.eq(&other.ranges) | 64 | 15.7k | } |
|
65 | | } |
66 | | |
67 | | impl<I: Interval> IntervalSet<I> { |
68 | | /// Create a new set from a sequence of intervals. Each interval is |
69 | | /// specified as a pair of bounds, where both bounds are inclusive. |
70 | | /// |
71 | | /// The given ranges do not need to be in any specific order, and ranges |
72 | | /// may overlap. |
73 | 1.51M | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { |
74 | 1.51M | let ranges: Vec<I> = intervals.into_iter().collect(); |
75 | 1.51M | // An empty set is case folded. |
76 | 1.51M | let folded = ranges.is_empty(); |
77 | 1.51M | let mut set = IntervalSet { ranges, folded }; |
78 | 1.51M | set.canonicalize(); |
79 | 1.51M | set |
80 | 1.51M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 1]> Line | Count | Source | 73 | 6.94k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 6.94k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 6.94k | // An empty set is case folded. | 76 | 6.94k | let folded = ranges.is_empty(); | 77 | 6.94k | let mut set = IntervalSet { ranges, folded }; | 78 | 6.94k | set.canonicalize(); | 79 | 6.94k | set | 80 | 6.94k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 2]> <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 3]> Line | Count | Source | 73 | 824 | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 824 | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 824 | // An empty set is case folded. | 76 | 824 | let folded = ranges.is_empty(); | 77 | 824 | let mut set = IntervalSet { ranges, folded }; | 78 | 824 | set.canonicalize(); | 79 | 824 | set | 80 | 824 | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<alloc::vec::Vec<regex_syntax::hir::ClassBytesRange>> Line | Count | Source | 73 | 364k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 364k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 364k | // An empty set is case folded. | 76 | 364k | let folded = ranges.is_empty(); | 77 | 364k | let mut set = IntervalSet { ranges, folded }; | 78 | 364k | set.canonicalize(); | 79 | 364k | set | 80 | 364k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<core::iter::adapters::map::Map<core::iter::adapters::copied::Copied<core::slice::iter::Iter<(u8, u8)>>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_byte_class::{closure#0}>> Line | Count | Source | 73 | 26.3k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 26.3k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 26.3k | // An empty set is case folded. | 76 | 26.3k | let folded = ranges.is_empty(); | 77 | 26.3k | let mut set = IntervalSet { ranges, folded }; | 78 | 26.3k | set.canonicalize(); | 79 | 26.3k | set | 80 | 26.3k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<regex_syntax::hir::ClassUnicodeRange>, <regex_syntax::hir::ClassUnicode>::to_byte_class::{closure#0}>> Line | Count | Source | 73 | 4.10k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 4.10k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 4.10k | // An empty set is case folded. | 76 | 4.10k | let folded = ranges.is_empty(); | 77 | 4.10k | let mut set = IntervalSet { ranges, folded }; | 78 | 4.10k | set.canonicalize(); | 79 | 4.10k | set | 80 | 4.10k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<core::iter::adapters::map::Map<alloc::vec::into_iter::IntoIter<u8>, <regex_syntax::hir::Hir>::alternation::{closure#1}>> <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 1]> Line | Count | Source | 73 | 44.6k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 44.6k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 44.6k | // An empty set is case folded. | 76 | 44.6k | let folded = ranges.is_empty(); | 77 | 44.6k | let mut set = IntervalSet { ranges, folded }; | 78 | 44.6k | set.canonicalize(); | 79 | 44.6k | set | 80 | 44.6k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 2]> <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 3]> Line | Count | Source | 73 | 3.28k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 3.28k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 3.28k | // An empty set is case folded. | 76 | 3.28k | let folded = ranges.is_empty(); | 77 | 3.28k | let mut set = IntervalSet { ranges, folded }; | 78 | 3.28k | set.canonicalize(); | 79 | 3.28k | set | 80 | 3.28k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<alloc::vec::Vec<regex_syntax::hir::ClassUnicodeRange>> Line | Count | Source | 73 | 864k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 864k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 864k | // An empty set is case folded. | 76 | 864k | let folded = ranges.is_empty(); | 77 | 864k | let mut set = IntervalSet { ranges, folded }; | 78 | 864k | set.canonicalize(); | 79 | 864k | set | 80 | 864k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<core::iter::adapters::map::Map<core::iter::adapters::map::Map<core::iter::adapters::copied::Copied<core::slice::iter::Iter<(u8, u8)>>, regex_syntax::hir::translate::ascii_class_as_chars::{closure#0}>, <regex_syntax::hir::translate::TranslatorI>::hir_ascii_unicode_class::{closure#0}>> Line | Count | Source | 73 | 55.1k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 55.1k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 55.1k | // An empty set is case folded. | 76 | 55.1k | let folded = ranges.is_empty(); | 77 | 55.1k | let mut set = IntervalSet { ranges, folded }; | 78 | 55.1k | set.canonicalize(); | 79 | 55.1k | set | 80 | 55.1k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<core::iter::adapters::map::Map<core::slice::iter::Iter<regex_syntax::hir::ClassBytesRange>, <regex_syntax::hir::ClassBytes>::to_unicode_class::{closure#0}>> Line | Count | Source | 73 | 7.70k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 7.70k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 7.70k | // An empty set is case folded. | 76 | 7.70k | let folded = ranges.is_empty(); | 77 | 7.70k | let mut set = IntervalSet { ranges, folded }; | 78 | 7.70k | set.canonicalize(); | 79 | 7.70k | set | 80 | 7.70k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<core::iter::adapters::map::Map<alloc::vec::into_iter::IntoIter<char>, <regex_syntax::hir::Hir>::alternation::{closure#0}>> Line | Count | Source | 73 | 1.27k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 1.27k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 1.27k | // An empty set is case folded. | 76 | 1.27k | let folded = ranges.is_empty(); | 77 | 1.27k | let mut set = IntervalSet { ranges, folded }; | 78 | 1.27k | set.canonicalize(); | 79 | 1.27k | set | 80 | 1.27k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 1]> Line | Count | Source | 73 | 135k | pub fn new<T: IntoIterator<Item = I>>(intervals: T) -> IntervalSet<I> { | 74 | 135k | let ranges: Vec<I> = intervals.into_iter().collect(); | 75 | 135k | // An empty set is case folded. | 76 | 135k | let folded = ranges.is_empty(); | 77 | 135k | let mut set = IntervalSet { ranges, folded }; | 78 | 135k | set.canonicalize(); | 79 | 135k | set | 80 | 135k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 2]> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::new::<[regex_syntax::hir::ClassBytesRange; 3]> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 1]> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 2]> Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::new::<[regex_syntax::hir::ClassUnicodeRange; 3]> |
81 | | |
82 | | /// Add a new interval to this set. |
83 | 1.04M | pub fn push(&mut self, interval: I) { |
84 | 1.04M | // TODO: This could be faster. e.g., Push the interval such that |
85 | 1.04M | // it preserves canonicalization. |
86 | 1.04M | self.ranges.push(interval); |
87 | 1.04M | self.canonicalize(); |
88 | 1.04M | // We don't know whether the new interval added here is considered |
89 | 1.04M | // case folded, so we conservatively assume that the entire set is |
90 | 1.04M | // no longer case folded if it was previously. |
91 | 1.04M | self.folded = false; |
92 | 1.04M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::push Line | Count | Source | 83 | 572k | pub fn push(&mut self, interval: I) { | 84 | 572k | // TODO: This could be faster. e.g., Push the interval such that | 85 | 572k | // it preserves canonicalization. | 86 | 572k | self.ranges.push(interval); | 87 | 572k | self.canonicalize(); | 88 | 572k | // We don't know whether the new interval added here is considered | 89 | 572k | // case folded, so we conservatively assume that the entire set is | 90 | 572k | // no longer case folded if it was previously. | 91 | 572k | self.folded = false; | 92 | 572k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::push Line | Count | Source | 83 | 467k | pub fn push(&mut self, interval: I) { | 84 | 467k | // TODO: This could be faster. e.g., Push the interval such that | 85 | 467k | // it preserves canonicalization. | 86 | 467k | self.ranges.push(interval); | 87 | 467k | self.canonicalize(); | 88 | 467k | // We don't know whether the new interval added here is considered | 89 | 467k | // case folded, so we conservatively assume that the entire set is | 90 | 467k | // no longer case folded if it was previously. | 91 | 467k | self.folded = false; | 92 | 467k | } |
|
93 | | |
94 | | /// Return an iterator over all intervals in this set. |
95 | | /// |
96 | | /// The iterator yields intervals in ascending order. |
97 | 9.47M | pub fn iter(&self) -> IntervalSetIter<'_, I> { |
98 | 9.47M | IntervalSetIter(self.ranges.iter()) |
99 | 9.47M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::iter Line | Count | Source | 97 | 3.71M | pub fn iter(&self) -> IntervalSetIter<'_, I> { | 98 | 3.71M | IntervalSetIter(self.ranges.iter()) | 99 | 3.71M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::iter Line | Count | Source | 97 | 5.75M | pub fn iter(&self) -> IntervalSetIter<'_, I> { | 98 | 5.75M | IntervalSetIter(self.ranges.iter()) | 99 | 5.75M | } |
|
100 | | |
101 | | /// Return an immutable slice of intervals in this set. |
102 | | /// |
103 | | /// The sequence returned is in canonical ordering. |
104 | 15.6M | pub fn intervals(&self) -> &[I] { |
105 | 15.6M | &self.ranges |
106 | 15.6M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::intervals Line | Count | Source | 104 | 5.00M | pub fn intervals(&self) -> &[I] { | 105 | 5.00M | &self.ranges | 106 | 5.00M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::intervals Line | Count | Source | 104 | 10.5M | pub fn intervals(&self) -> &[I] { | 105 | 10.5M | &self.ranges | 106 | 10.5M | } |
|
107 | | |
108 | | /// Expand this interval set such that it contains all case folded |
109 | | /// characters. For example, if this class consists of the range `a-z`, |
110 | | /// then applying case folding will result in the class containing both the |
111 | | /// ranges `a-z` and `A-Z`. |
112 | | /// |
113 | | /// This returns an error if the necessary case mapping data is not |
114 | | /// available. |
115 | 596k | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { |
116 | 596k | if self.folded { |
117 | 99.2k | return Ok(()); |
118 | 497k | } |
119 | 497k | let len = self.ranges.len(); |
120 | 5.08M | for i in 0..len { |
121 | 5.08M | let range = self.ranges[i]; |
122 | 5.08M | if let Err(err) = range.case_fold_simple(&mut self.ranges) { |
123 | 0 | self.canonicalize(); |
124 | 0 | return Err(err); |
125 | 5.08M | } |
126 | | } |
127 | 497k | self.canonicalize(); |
128 | 497k | self.folded = true; |
129 | 497k | Ok(()) |
130 | 596k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::case_fold_simple Line | Count | Source | 115 | 177k | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { | 116 | 177k | if self.folded { | 117 | 63.4k | return Ok(()); | 118 | 114k | } | 119 | 114k | let len = self.ranges.len(); | 120 | 866k | for i in 0..len { | 121 | 866k | let range = self.ranges[i]; | 122 | 866k | if let Err(err) = range.case_fold_simple(&mut self.ranges) { | 123 | 0 | self.canonicalize(); | 124 | 0 | return Err(err); | 125 | 866k | } | 126 | | } | 127 | 114k | self.canonicalize(); | 128 | 114k | self.folded = true; | 129 | 114k | Ok(()) | 130 | 177k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::case_fold_simple Line | Count | Source | 115 | 419k | pub fn case_fold_simple(&mut self) -> Result<(), unicode::CaseFoldError> { | 116 | 419k | if self.folded { | 117 | 35.7k | return Ok(()); | 118 | 383k | } | 119 | 383k | let len = self.ranges.len(); | 120 | 4.22M | for i in 0..len { | 121 | 4.22M | let range = self.ranges[i]; | 122 | 4.22M | if let Err(err) = range.case_fold_simple(&mut self.ranges) { | 123 | 0 | self.canonicalize(); | 124 | 0 | return Err(err); | 125 | 4.22M | } | 126 | | } | 127 | 383k | self.canonicalize(); | 128 | 383k | self.folded = true; | 129 | 383k | Ok(()) | 130 | 419k | } |
|
131 | | |
132 | | /// Union this set with the given set, in place. |
133 | 531k | pub fn union(&mut self, other: &IntervalSet<I>) { |
134 | 531k | if other.ranges.is_empty() || self.ranges == other.ranges { |
135 | 126k | return; |
136 | 404k | } |
137 | 404k | // This could almost certainly be done more efficiently. |
138 | 404k | self.ranges.extend(&other.ranges); |
139 | 404k | self.canonicalize(); |
140 | 404k | self.folded = self.folded && other.folded; |
141 | 531k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::union Line | Count | Source | 133 | 206k | pub fn union(&mut self, other: &IntervalSet<I>) { | 134 | 206k | if other.ranges.is_empty() || self.ranges == other.ranges { | 135 | 45.0k | return; | 136 | 161k | } | 137 | 161k | // This could almost certainly be done more efficiently. | 138 | 161k | self.ranges.extend(&other.ranges); | 139 | 161k | self.canonicalize(); | 140 | 161k | self.folded = self.folded && other.folded; | 141 | 206k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::union Line | Count | Source | 133 | 324k | pub fn union(&mut self, other: &IntervalSet<I>) { | 134 | 324k | if other.ranges.is_empty() || self.ranges == other.ranges { | 135 | 81.6k | return; | 136 | 243k | } | 137 | 243k | // This could almost certainly be done more efficiently. | 138 | 243k | self.ranges.extend(&other.ranges); | 139 | 243k | self.canonicalize(); | 140 | 243k | self.folded = self.folded && other.folded; | 141 | 324k | } |
|
142 | | |
143 | | /// Intersect this set with the given set, in place. |
144 | 97.5k | pub fn intersect(&mut self, other: &IntervalSet<I>) { |
145 | 97.5k | if self.ranges.is_empty() { |
146 | 40.0k | return; |
147 | 57.5k | } |
148 | 57.5k | if other.ranges.is_empty() { |
149 | 15.4k | self.ranges.clear(); |
150 | 15.4k | // An empty set is case folded. |
151 | 15.4k | self.folded = true; |
152 | 15.4k | return; |
153 | 42.1k | } |
154 | 42.1k | |
155 | 42.1k | // There should be a way to do this in-place with constant memory, |
156 | 42.1k | // but I couldn't figure out a simple way to do it. So just append |
157 | 42.1k | // the intersection to the end of this range, and then drain it before |
158 | 42.1k | // we're done. |
159 | 42.1k | let drain_end = self.ranges.len(); |
160 | 42.1k | |
161 | 42.1k | let mut ita = 0..drain_end; |
162 | 42.1k | let mut itb = 0..other.ranges.len(); |
163 | 42.1k | let mut a = ita.next().unwrap(); |
164 | 42.1k | let mut b = itb.next().unwrap(); |
165 | | loop { |
166 | 5.19M | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { |
167 | 2.49M | self.ranges.push(ab); |
168 | 2.69M | } |
169 | 5.19M | let (it, aorb) = |
170 | 5.19M | if self.ranges[a].upper() < other.ranges[b].upper() { |
171 | 2.73M | (&mut ita, &mut a) |
172 | | } else { |
173 | 2.45M | (&mut itb, &mut b) |
174 | | }; |
175 | 5.19M | match it.next() { |
176 | 5.15M | Some(v) => *aorb = v, |
177 | 42.1k | None => break, |
178 | 42.1k | } |
179 | 42.1k | } |
180 | 42.1k | self.ranges.drain(..drain_end); |
181 | 42.1k | self.folded = self.folded && other.folded; |
182 | 97.5k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::intersect Line | Count | Source | 144 | 49.7k | pub fn intersect(&mut self, other: &IntervalSet<I>) { | 145 | 49.7k | if self.ranges.is_empty() { | 146 | 15.8k | return; | 147 | 33.8k | } | 148 | 33.8k | if other.ranges.is_empty() { | 149 | 7.90k | self.ranges.clear(); | 150 | 7.90k | // An empty set is case folded. | 151 | 7.90k | self.folded = true; | 152 | 7.90k | return; | 153 | 25.9k | } | 154 | 25.9k | | 155 | 25.9k | // There should be a way to do this in-place with constant memory, | 156 | 25.9k | // but I couldn't figure out a simple way to do it. So just append | 157 | 25.9k | // the intersection to the end of this range, and then drain it before | 158 | 25.9k | // we're done. | 159 | 25.9k | let drain_end = self.ranges.len(); | 160 | 25.9k | | 161 | 25.9k | let mut ita = 0..drain_end; | 162 | 25.9k | let mut itb = 0..other.ranges.len(); | 163 | 25.9k | let mut a = ita.next().unwrap(); | 164 | 25.9k | let mut b = itb.next().unwrap(); | 165 | | loop { | 166 | 316k | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { | 167 | 119k | self.ranges.push(ab); | 168 | 197k | } | 169 | 316k | let (it, aorb) = | 170 | 316k | if self.ranges[a].upper() < other.ranges[b].upper() { | 171 | 194k | (&mut ita, &mut a) | 172 | | } else { | 173 | 122k | (&mut itb, &mut b) | 174 | | }; | 175 | 316k | match it.next() { | 176 | 290k | Some(v) => *aorb = v, | 177 | 25.9k | None => break, | 178 | 25.9k | } | 179 | 25.9k | } | 180 | 25.9k | self.ranges.drain(..drain_end); | 181 | 25.9k | self.folded = self.folded && other.folded; | 182 | 49.7k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::intersect Line | Count | Source | 144 | 47.8k | pub fn intersect(&mut self, other: &IntervalSet<I>) { | 145 | 47.8k | if self.ranges.is_empty() { | 146 | 24.1k | return; | 147 | 23.6k | } | 148 | 23.6k | if other.ranges.is_empty() { | 149 | 7.54k | self.ranges.clear(); | 150 | 7.54k | // An empty set is case folded. | 151 | 7.54k | self.folded = true; | 152 | 7.54k | return; | 153 | 16.1k | } | 154 | 16.1k | | 155 | 16.1k | // There should be a way to do this in-place with constant memory, | 156 | 16.1k | // but I couldn't figure out a simple way to do it. So just append | 157 | 16.1k | // the intersection to the end of this range, and then drain it before | 158 | 16.1k | // we're done. | 159 | 16.1k | let drain_end = self.ranges.len(); | 160 | 16.1k | | 161 | 16.1k | let mut ita = 0..drain_end; | 162 | 16.1k | let mut itb = 0..other.ranges.len(); | 163 | 16.1k | let mut a = ita.next().unwrap(); | 164 | 16.1k | let mut b = itb.next().unwrap(); | 165 | | loop { | 166 | 4.87M | if let Some(ab) = self.ranges[a].intersect(&other.ranges[b]) { | 167 | 2.37M | self.ranges.push(ab); | 168 | 2.49M | } | 169 | 4.87M | let (it, aorb) = | 170 | 4.87M | if self.ranges[a].upper() < other.ranges[b].upper() { | 171 | 2.54M | (&mut ita, &mut a) | 172 | | } else { | 173 | 2.33M | (&mut itb, &mut b) | 174 | | }; | 175 | 4.87M | match it.next() { | 176 | 4.85M | Some(v) => *aorb = v, | 177 | 16.1k | None => break, | 178 | 16.1k | } | 179 | 16.1k | } | 180 | 16.1k | self.ranges.drain(..drain_end); | 181 | 16.1k | self.folded = self.folded && other.folded; | 182 | 47.8k | } |
|
183 | | |
184 | | /// Subtract the given set from this set, in place. |
185 | 122k | pub fn difference(&mut self, other: &IntervalSet<I>) { |
186 | 122k | if self.ranges.is_empty() || other.ranges.is_empty() { |
187 | 70.2k | return; |
188 | 52.6k | } |
189 | 52.6k | |
190 | 52.6k | // This algorithm is (to me) surprisingly complex. A search of the |
191 | 52.6k | // interwebs indicate that this is a potentially interesting problem. |
192 | 52.6k | // Folks seem to suggest interval or segment trees, but I'd like to |
193 | 52.6k | // avoid the overhead (both runtime and conceptual) of that. |
194 | 52.6k | // |
195 | 52.6k | // The following is basically my Shitty First Draft. Therefore, in |
196 | 52.6k | // order to grok it, you probably need to read each line carefully. |
197 | 52.6k | // Simplifications are most welcome! |
198 | 52.6k | // |
199 | 52.6k | // Remember, we can assume the canonical format invariant here, which |
200 | 52.6k | // says that all ranges are sorted, not overlapping and not adjacent in |
201 | 52.6k | // each class. |
202 | 52.6k | let drain_end = self.ranges.len(); |
203 | 52.6k | let (mut a, mut b) = (0, 0); |
204 | 3.04M | 'LOOP: while a < drain_end && b < other.ranges.len() { |
205 | | // Basically, the easy cases are when neither range overlaps with |
206 | | // each other. If the `b` range is less than our current `a` |
207 | | // range, then we can skip it and move on. |
208 | 2.99M | if other.ranges[b].upper() < self.ranges[a].lower() { |
209 | 1.09M | b += 1; |
210 | 1.09M | continue; |
211 | 1.89M | } |
212 | 1.89M | // ... similarly for the `a` range. If it's less than the smallest |
213 | 1.89M | // `b` range, then we can add it as-is. |
214 | 1.89M | if self.ranges[a].upper() < other.ranges[b].lower() { |
215 | 932k | let range = self.ranges[a]; |
216 | 932k | self.ranges.push(range); |
217 | 932k | a += 1; |
218 | 932k | continue; |
219 | 964k | } |
220 | 964k | // Otherwise, we have overlapping ranges. |
221 | 964k | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); |
222 | | |
223 | | // This part is tricky and was non-obvious to me without looking |
224 | | // at explicit examples (see the tests). The trickiness stems from |
225 | | // two things: 1) subtracting a range from another range could |
226 | | // yield two ranges and 2) after subtracting a range, it's possible |
227 | | // that future ranges can have an impact. The loop below advances |
228 | | // the `b` ranges until they can't possible impact the current |
229 | | // range. |
230 | | // |
231 | | // For example, if our `a` range is `a-t` and our next three `b` |
232 | | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply |
233 | | // subtraction three times before moving on to the next `a` range. |
234 | 964k | let mut range = self.ranges[a]; |
235 | 2.50M | while b < other.ranges.len() |
236 | 2.48M | && !range.is_intersection_empty(&other.ranges[b]) |
237 | | { |
238 | 2.40M | let old_range = range; |
239 | 2.40M | range = match range.difference(&other.ranges[b]) { |
240 | | (None, None) => { |
241 | | // We lost the entire range, so move on to the next |
242 | | // without adding this one. |
243 | 862k | a += 1; |
244 | 862k | continue 'LOOP; |
245 | | } |
246 | 88.3k | (Some(range1), None) | (None, Some(range1)) => range1, |
247 | 1.45M | (Some(range1), Some(range2)) => { |
248 | 1.45M | self.ranges.push(range1); |
249 | 1.45M | range2 |
250 | | } |
251 | | }; |
252 | | // It's possible that the `b` range has more to contribute |
253 | | // here. In particular, if it is greater than the original |
254 | | // range, then it might impact the next `a` range *and* it |
255 | | // has impacted the current `a` range as much as possible, |
256 | | // so we can quit. We don't bump `b` so that the next `a` |
257 | | // range can apply it. |
258 | 1.54M | if other.ranges[b].upper() > old_range.upper() { |
259 | 3.47k | break; |
260 | 1.54M | } |
261 | 1.54M | // Otherwise, the next `b` range might apply to the current |
262 | 1.54M | // `a` range. |
263 | 1.54M | b += 1; |
264 | | } |
265 | 102k | self.ranges.push(range); |
266 | 102k | a += 1; |
267 | | } |
268 | 1.29M | while a < drain_end { |
269 | 1.24M | let range = self.ranges[a]; |
270 | 1.24M | self.ranges.push(range); |
271 | 1.24M | a += 1; |
272 | 1.24M | } |
273 | 52.6k | self.ranges.drain(..drain_end); |
274 | 52.6k | self.folded = self.folded && other.folded; |
275 | 122k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::difference Line | Count | Source | 185 | 62.9k | pub fn difference(&mut self, other: &IntervalSet<I>) { | 186 | 62.9k | if self.ranges.is_empty() || other.ranges.is_empty() { | 187 | 32.5k | return; | 188 | 30.4k | } | 189 | 30.4k | | 190 | 30.4k | // This algorithm is (to me) surprisingly complex. A search of the | 191 | 30.4k | // interwebs indicate that this is a potentially interesting problem. | 192 | 30.4k | // Folks seem to suggest interval or segment trees, but I'd like to | 193 | 30.4k | // avoid the overhead (both runtime and conceptual) of that. | 194 | 30.4k | // | 195 | 30.4k | // The following is basically my Shitty First Draft. Therefore, in | 196 | 30.4k | // order to grok it, you probably need to read each line carefully. | 197 | 30.4k | // Simplifications are most welcome! | 198 | 30.4k | // | 199 | 30.4k | // Remember, we can assume the canonical format invariant here, which | 200 | 30.4k | // says that all ranges are sorted, not overlapping and not adjacent in | 201 | 30.4k | // each class. | 202 | 30.4k | let drain_end = self.ranges.len(); | 203 | 30.4k | let (mut a, mut b) = (0, 0); | 204 | 250k | 'LOOP: while a < drain_end && b < other.ranges.len() { | 205 | | // Basically, the easy cases are when neither range overlaps with | 206 | | // each other. If the `b` range is less than our current `a` | 207 | | // range, then we can skip it and move on. | 208 | 220k | if other.ranges[b].upper() < self.ranges[a].lower() { | 209 | 39.0k | b += 1; | 210 | 39.0k | continue; | 211 | 181k | } | 212 | 181k | // ... similarly for the `a` range. If it's less than the smallest | 213 | 181k | // `b` range, then we can add it as-is. | 214 | 181k | if self.ranges[a].upper() < other.ranges[b].lower() { | 215 | 113k | let range = self.ranges[a]; | 216 | 113k | self.ranges.push(range); | 217 | 113k | a += 1; | 218 | 113k | continue; | 219 | 67.5k | } | 220 | 67.5k | // Otherwise, we have overlapping ranges. | 221 | 67.5k | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); | 222 | | | 223 | | // This part is tricky and was non-obvious to me without looking | 224 | | // at explicit examples (see the tests). The trickiness stems from | 225 | | // two things: 1) subtracting a range from another range could | 226 | | // yield two ranges and 2) after subtracting a range, it's possible | 227 | | // that future ranges can have an impact. The loop below advances | 228 | | // the `b` ranges until they can't possible impact the current | 229 | | // range. | 230 | | // | 231 | | // For example, if our `a` range is `a-t` and our next three `b` | 232 | | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply | 233 | | // subtraction three times before moving on to the next `a` range. | 234 | 67.5k | let mut range = self.ranges[a]; | 235 | 168k | while b < other.ranges.len() | 236 | 151k | && !range.is_intersection_empty(&other.ranges[b]) | 237 | | { | 238 | 130k | let old_range = range; | 239 | 130k | range = match range.difference(&other.ranges[b]) { | 240 | | (None, None) => { | 241 | | // We lost the entire range, so move on to the next | 242 | | // without adding this one. | 243 | 29.8k | a += 1; | 244 | 29.8k | continue 'LOOP; | 245 | | } | 246 | 24.9k | (Some(range1), None) | (None, Some(range1)) => range1, | 247 | 76.1k | (Some(range1), Some(range2)) => { | 248 | 76.1k | self.ranges.push(range1); | 249 | 76.1k | range2 | 250 | | } | 251 | | }; | 252 | | // It's possible that the `b` range has more to contribute | 253 | | // here. In particular, if it is greater than the original | 254 | | // range, then it might impact the next `a` range *and* it | 255 | | // has impacted the current `a` range as much as possible, | 256 | | // so we can quit. We don't bump `b` so that the next `a` | 257 | | // range can apply it. | 258 | 101k | if other.ranges[b].upper() > old_range.upper() { | 259 | 561 | break; | 260 | 100k | } | 261 | 100k | // Otherwise, the next `b` range might apply to the current | 262 | 100k | // `a` range. | 263 | 100k | b += 1; | 264 | | } | 265 | 37.7k | self.ranges.push(range); | 266 | 37.7k | a += 1; | 267 | | } | 268 | 76.0k | while a < drain_end { | 269 | 45.5k | let range = self.ranges[a]; | 270 | 45.5k | self.ranges.push(range); | 271 | 45.5k | a += 1; | 272 | 45.5k | } | 273 | 30.4k | self.ranges.drain(..drain_end); | 274 | 30.4k | self.folded = self.folded && other.folded; | 275 | 62.9k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::difference Line | Count | Source | 185 | 59.9k | pub fn difference(&mut self, other: &IntervalSet<I>) { | 186 | 59.9k | if self.ranges.is_empty() || other.ranges.is_empty() { | 187 | 37.7k | return; | 188 | 22.1k | } | 189 | 22.1k | | 190 | 22.1k | // This algorithm is (to me) surprisingly complex. A search of the | 191 | 22.1k | // interwebs indicate that this is a potentially interesting problem. | 192 | 22.1k | // Folks seem to suggest interval or segment trees, but I'd like to | 193 | 22.1k | // avoid the overhead (both runtime and conceptual) of that. | 194 | 22.1k | // | 195 | 22.1k | // The following is basically my Shitty First Draft. Therefore, in | 196 | 22.1k | // order to grok it, you probably need to read each line carefully. | 197 | 22.1k | // Simplifications are most welcome! | 198 | 22.1k | // | 199 | 22.1k | // Remember, we can assume the canonical format invariant here, which | 200 | 22.1k | // says that all ranges are sorted, not overlapping and not adjacent in | 201 | 22.1k | // each class. | 202 | 22.1k | let drain_end = self.ranges.len(); | 203 | 22.1k | let (mut a, mut b) = (0, 0); | 204 | 2.79M | 'LOOP: while a < drain_end && b < other.ranges.len() { | 205 | | // Basically, the easy cases are when neither range overlaps with | 206 | | // each other. If the `b` range is less than our current `a` | 207 | | // range, then we can skip it and move on. | 208 | 2.77M | if other.ranges[b].upper() < self.ranges[a].lower() { | 209 | 1.05M | b += 1; | 210 | 1.05M | continue; | 211 | 1.71M | } | 212 | 1.71M | // ... similarly for the `a` range. If it's less than the smallest | 213 | 1.71M | // `b` range, then we can add it as-is. | 214 | 1.71M | if self.ranges[a].upper() < other.ranges[b].lower() { | 215 | 819k | let range = self.ranges[a]; | 216 | 819k | self.ranges.push(range); | 217 | 819k | a += 1; | 218 | 819k | continue; | 219 | 897k | } | 220 | 897k | // Otherwise, we have overlapping ranges. | 221 | 897k | assert!(!self.ranges[a].is_intersection_empty(&other.ranges[b])); | 222 | | | 223 | | // This part is tricky and was non-obvious to me without looking | 224 | | // at explicit examples (see the tests). The trickiness stems from | 225 | | // two things: 1) subtracting a range from another range could | 226 | | // yield two ranges and 2) after subtracting a range, it's possible | 227 | | // that future ranges can have an impact. The loop below advances | 228 | | // the `b` ranges until they can't possible impact the current | 229 | | // range. | 230 | | // | 231 | | // For example, if our `a` range is `a-t` and our next three `b` | 232 | | // ranges are `a-c`, `g-i`, `r-t` and `x-z`, then we need to apply | 233 | | // subtraction three times before moving on to the next `a` range. | 234 | 897k | let mut range = self.ranges[a]; | 235 | 2.33M | while b < other.ranges.len() | 236 | 2.32M | && !range.is_intersection_empty(&other.ranges[b]) | 237 | | { | 238 | 2.27M | let old_range = range; | 239 | 2.27M | range = match range.difference(&other.ranges[b]) { | 240 | | (None, None) => { | 241 | | // We lost the entire range, so move on to the next | 242 | | // without adding this one. | 243 | 832k | a += 1; | 244 | 832k | continue 'LOOP; | 245 | | } | 246 | 63.3k | (Some(range1), None) | (None, Some(range1)) => range1, | 247 | 1.38M | (Some(range1), Some(range2)) => { | 248 | 1.38M | self.ranges.push(range1); | 249 | 1.38M | range2 | 250 | | } | 251 | | }; | 252 | | // It's possible that the `b` range has more to contribute | 253 | | // here. In particular, if it is greater than the original | 254 | | // range, then it might impact the next `a` range *and* it | 255 | | // has impacted the current `a` range as much as possible, | 256 | | // so we can quit. We don't bump `b` so that the next `a` | 257 | | // range can apply it. | 258 | 1.44M | if other.ranges[b].upper() > old_range.upper() { | 259 | 2.91k | break; | 260 | 1.44M | } | 261 | 1.44M | // Otherwise, the next `b` range might apply to the current | 262 | 1.44M | // `a` range. | 263 | 1.44M | b += 1; | 264 | | } | 265 | 64.7k | self.ranges.push(range); | 266 | 64.7k | a += 1; | 267 | | } | 268 | 1.22M | while a < drain_end { | 269 | 1.20M | let range = self.ranges[a]; | 270 | 1.20M | self.ranges.push(range); | 271 | 1.20M | a += 1; | 272 | 1.20M | } | 273 | 22.1k | self.ranges.drain(..drain_end); | 274 | 22.1k | self.folded = self.folded && other.folded; | 275 | 59.9k | } |
|
276 | | |
277 | | /// Compute the symmetric difference of the two sets, in place. |
278 | | /// |
279 | | /// This computes the symmetric difference of two interval sets. This |
280 | | /// removes all elements in this set that are also in the given set, |
281 | | /// but also adds all elements from the given set that aren't in this |
282 | | /// set. That is, the set will contain all elements in either set, |
283 | | /// but will not contain any elements that are in both sets. |
284 | 77.9k | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { |
285 | 77.9k | // TODO(burntsushi): Fix this so that it amortizes allocation. |
286 | 77.9k | let mut intersection = self.clone(); |
287 | 77.9k | intersection.intersect(other); |
288 | 77.9k | self.union(other); |
289 | 77.9k | self.difference(&intersection); |
290 | 77.9k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::symmetric_difference Line | Count | Source | 284 | 43.5k | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { | 285 | 43.5k | // TODO(burntsushi): Fix this so that it amortizes allocation. | 286 | 43.5k | let mut intersection = self.clone(); | 287 | 43.5k | intersection.intersect(other); | 288 | 43.5k | self.union(other); | 289 | 43.5k | self.difference(&intersection); | 290 | 43.5k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::symmetric_difference Line | Count | Source | 284 | 34.3k | pub fn symmetric_difference(&mut self, other: &IntervalSet<I>) { | 285 | 34.3k | // TODO(burntsushi): Fix this so that it amortizes allocation. | 286 | 34.3k | let mut intersection = self.clone(); | 287 | 34.3k | intersection.intersect(other); | 288 | 34.3k | self.union(other); | 289 | 34.3k | self.difference(&intersection); | 290 | 34.3k | } |
|
291 | | |
292 | | /// Negate this interval set. |
293 | | /// |
294 | | /// For all `x` where `x` is any element, if `x` was in this set, then it |
295 | | /// will not be in this set after negation. |
296 | 243k | pub fn negate(&mut self) { |
297 | 243k | if self.ranges.is_empty() { |
298 | 5.67k | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); |
299 | 5.67k | self.ranges.push(I::create(min, max)); |
300 | 5.67k | // The set containing everything must case folded. |
301 | 5.67k | self.folded = true; |
302 | 5.67k | return; |
303 | 238k | } |
304 | 238k | |
305 | 238k | // There should be a way to do this in-place with constant memory, |
306 | 238k | // but I couldn't figure out a simple way to do it. So just append |
307 | 238k | // the negation to the end of this range, and then drain it before |
308 | 238k | // we're done. |
309 | 238k | let drain_end = self.ranges.len(); |
310 | 238k | |
311 | 238k | // We do checked arithmetic below because of the canonical ordering |
312 | 238k | // invariant. |
313 | 238k | if self.ranges[0].lower() > I::Bound::min_value() { |
314 | 205k | let upper = self.ranges[0].lower().decrement(); |
315 | 205k | self.ranges.push(I::create(I::Bound::min_value(), upper)); |
316 | 205k | } |
317 | 18.0M | for i in 1..drain_end { |
318 | 18.0M | let lower = self.ranges[i - 1].upper().increment(); |
319 | 18.0M | let upper = self.ranges[i].lower().decrement(); |
320 | 18.0M | self.ranges.push(I::create(lower, upper)); |
321 | 18.0M | } |
322 | 238k | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { |
323 | 207k | let lower = self.ranges[drain_end - 1].upper().increment(); |
324 | 207k | self.ranges.push(I::create(lower, I::Bound::max_value())); |
325 | 207k | } |
326 | 238k | self.ranges.drain(..drain_end); |
327 | | // We don't need to update whether this set is folded or not, because |
328 | | // it is conservatively preserved through negation. Namely, if a set |
329 | | // is not folded, then it is possible that its negation is folded, for |
330 | | // example, [^☃]. But we're fine with assuming that the set is not |
331 | | // folded in that case. (`folded` permits false negatives but not false |
332 | | // positives.) |
333 | | // |
334 | | // But what about when a set is folded, is its negation also |
335 | | // necessarily folded? Yes. Because if a set is folded, then for every |
336 | | // character in the set, it necessarily included its equivalence class |
337 | | // of case folded characters. Negating it in turn means that all |
338 | | // equivalence classes in the set are negated, and any equivalence |
339 | | // class that was previously not in the set is now entirely in the set. |
340 | 243k | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::negate Line | Count | Source | 296 | 24.2k | pub fn negate(&mut self) { | 297 | 24.2k | if self.ranges.is_empty() { | 298 | 911 | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); | 299 | 911 | self.ranges.push(I::create(min, max)); | 300 | 911 | // The set containing everything must case folded. | 301 | 911 | self.folded = true; | 302 | 911 | return; | 303 | 23.3k | } | 304 | 23.3k | | 305 | 23.3k | // There should be a way to do this in-place with constant memory, | 306 | 23.3k | // but I couldn't figure out a simple way to do it. So just append | 307 | 23.3k | // the negation to the end of this range, and then drain it before | 308 | 23.3k | // we're done. | 309 | 23.3k | let drain_end = self.ranges.len(); | 310 | 23.3k | | 311 | 23.3k | // We do checked arithmetic below because of the canonical ordering | 312 | 23.3k | // invariant. | 313 | 23.3k | if self.ranges[0].lower() > I::Bound::min_value() { | 314 | 21.0k | let upper = self.ranges[0].lower().decrement(); | 315 | 21.0k | self.ranges.push(I::create(I::Bound::min_value(), upper)); | 316 | 21.0k | } | 317 | 31.1k | for i in 1..drain_end { | 318 | 31.1k | let lower = self.ranges[i - 1].upper().increment(); | 319 | 31.1k | let upper = self.ranges[i].lower().decrement(); | 320 | 31.1k | self.ranges.push(I::create(lower, upper)); | 321 | 31.1k | } | 322 | 23.3k | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { | 323 | 21.4k | let lower = self.ranges[drain_end - 1].upper().increment(); | 324 | 21.4k | self.ranges.push(I::create(lower, I::Bound::max_value())); | 325 | 21.4k | } | 326 | 23.3k | self.ranges.drain(..drain_end); | 327 | | // We don't need to update whether this set is folded or not, because | 328 | | // it is conservatively preserved through negation. Namely, if a set | 329 | | // is not folded, then it is possible that its negation is folded, for | 330 | | // example, [^☃]. But we're fine with assuming that the set is not | 331 | | // folded in that case. (`folded` permits false negatives but not false | 332 | | // positives.) | 333 | | // | 334 | | // But what about when a set is folded, is its negation also | 335 | | // necessarily folded? Yes. Because if a set is folded, then for every | 336 | | // character in the set, it necessarily included its equivalence class | 337 | | // of case folded characters. Negating it in turn means that all | 338 | | // equivalence classes in the set are negated, and any equivalence | 339 | | // class that was previously not in the set is now entirely in the set. | 340 | 24.2k | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::negate Line | Count | Source | 296 | 219k | pub fn negate(&mut self) { | 297 | 219k | if self.ranges.is_empty() { | 298 | 4.76k | let (min, max) = (I::Bound::min_value(), I::Bound::max_value()); | 299 | 4.76k | self.ranges.push(I::create(min, max)); | 300 | 4.76k | // The set containing everything must case folded. | 301 | 4.76k | self.folded = true; | 302 | 4.76k | return; | 303 | 214k | } | 304 | 214k | | 305 | 214k | // There should be a way to do this in-place with constant memory, | 306 | 214k | // but I couldn't figure out a simple way to do it. So just append | 307 | 214k | // the negation to the end of this range, and then drain it before | 308 | 214k | // we're done. | 309 | 214k | let drain_end = self.ranges.len(); | 310 | 214k | | 311 | 214k | // We do checked arithmetic below because of the canonical ordering | 312 | 214k | // invariant. | 313 | 214k | if self.ranges[0].lower() > I::Bound::min_value() { | 314 | 184k | let upper = self.ranges[0].lower().decrement(); | 315 | 184k | self.ranges.push(I::create(I::Bound::min_value(), upper)); | 316 | 184k | } | 317 | 18.0M | for i in 1..drain_end { | 318 | 18.0M | let lower = self.ranges[i - 1].upper().increment(); | 319 | 18.0M | let upper = self.ranges[i].lower().decrement(); | 320 | 18.0M | self.ranges.push(I::create(lower, upper)); | 321 | 18.0M | } | 322 | 214k | if self.ranges[drain_end - 1].upper() < I::Bound::max_value() { | 323 | 185k | let lower = self.ranges[drain_end - 1].upper().increment(); | 324 | 185k | self.ranges.push(I::create(lower, I::Bound::max_value())); | 325 | 185k | } | 326 | 214k | self.ranges.drain(..drain_end); | 327 | | // We don't need to update whether this set is folded or not, because | 328 | | // it is conservatively preserved through negation. Namely, if a set | 329 | | // is not folded, then it is possible that its negation is folded, for | 330 | | // example, [^☃]. But we're fine with assuming that the set is not | 331 | | // folded in that case. (`folded` permits false negatives but not false | 332 | | // positives.) | 333 | | // | 334 | | // But what about when a set is folded, is its negation also | 335 | | // necessarily folded? Yes. Because if a set is folded, then for every | 336 | | // character in the set, it necessarily included its equivalence class | 337 | | // of case folded characters. Negating it in turn means that all | 338 | | // equivalence classes in the set are negated, and any equivalence | 339 | | // class that was previously not in the set is now entirely in the set. | 340 | 219k | } |
|
341 | | |
342 | | /// Converts this set into a canonical ordering. |
343 | 3.45M | fn canonicalize(&mut self) { |
344 | 3.45M | if self.is_canonical() { |
345 | 2.12M | return; |
346 | 1.33M | } |
347 | 1.33M | self.ranges.sort(); |
348 | 1.33M | assert!(!self.ranges.is_empty()); |
349 | | |
350 | | // Is there a way to do this in-place with constant memory? I couldn't |
351 | | // figure out a way to do it. So just append the canonicalization to |
352 | | // the end of this range, and then drain it before we're done. |
353 | 1.33M | let drain_end = self.ranges.len(); |
354 | 82.6M | for oldi in 0..drain_end { |
355 | | // If we've added at least one new range, then check if we can |
356 | | // merge this range in the previously added range. |
357 | 82.6M | if self.ranges.len() > drain_end { |
358 | 81.3M | let (last, rest) = self.ranges.split_last_mut().unwrap(); |
359 | 81.3M | if let Some(union) = last.union(&rest[oldi]) { |
360 | 32.2M | *last = union; |
361 | 32.2M | continue; |
362 | 49.0M | } |
363 | 1.33M | } |
364 | 50.3M | let range = self.ranges[oldi]; |
365 | 50.3M | self.ranges.push(range); |
366 | | } |
367 | 1.33M | self.ranges.drain(..drain_end); |
368 | 3.45M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::canonicalize Line | Count | Source | 343 | 1.25M | fn canonicalize(&mut self) { | 344 | 1.25M | if self.is_canonical() { | 345 | 649k | return; | 346 | 602k | } | 347 | 602k | self.ranges.sort(); | 348 | 602k | assert!(!self.ranges.is_empty()); | 349 | | | 350 | | // Is there a way to do this in-place with constant memory? I couldn't | 351 | | // figure out a way to do it. So just append the canonicalization to | 352 | | // the end of this range, and then drain it before we're done. | 353 | 602k | let drain_end = self.ranges.len(); | 354 | 8.62M | for oldi in 0..drain_end { | 355 | | // If we've added at least one new range, then check if we can | 356 | | // merge this range in the previously added range. | 357 | 8.62M | if self.ranges.len() > drain_end { | 358 | 8.02M | let (last, rest) = self.ranges.split_last_mut().unwrap(); | 359 | 8.02M | if let Some(union) = last.union(&rest[oldi]) { | 360 | 1.00M | *last = union; | 361 | 1.00M | continue; | 362 | 7.01M | } | 363 | 602k | } | 364 | 7.61M | let range = self.ranges[oldi]; | 365 | 7.61M | self.ranges.push(range); | 366 | | } | 367 | 602k | self.ranges.drain(..drain_end); | 368 | 1.25M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::canonicalize Line | Count | Source | 343 | 2.07M | fn canonicalize(&mut self) { | 344 | 2.07M | if self.is_canonical() { | 345 | 1.33M | return; | 346 | 733k | } | 347 | 733k | self.ranges.sort(); | 348 | 733k | assert!(!self.ranges.is_empty()); | 349 | | | 350 | | // Is there a way to do this in-place with constant memory? I couldn't | 351 | | // figure out a way to do it. So just append the canonicalization to | 352 | | // the end of this range, and then drain it before we're done. | 353 | 733k | let drain_end = self.ranges.len(); | 354 | 74.0M | for oldi in 0..drain_end { | 355 | | // If we've added at least one new range, then check if we can | 356 | | // merge this range in the previously added range. | 357 | 74.0M | if self.ranges.len() > drain_end { | 358 | 73.2M | let (last, rest) = self.ranges.split_last_mut().unwrap(); | 359 | 73.2M | if let Some(union) = last.union(&rest[oldi]) { | 360 | 31.2M | *last = union; | 361 | 31.2M | continue; | 362 | 41.9M | } | 363 | 733k | } | 364 | 42.7M | let range = self.ranges[oldi]; | 365 | 42.7M | self.ranges.push(range); | 366 | | } | 367 | 733k | self.ranges.drain(..drain_end); | 368 | 2.07M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::canonicalize Line | Count | Source | 343 | 135k | fn canonicalize(&mut self) { | 344 | 135k | if self.is_canonical() { | 345 | 135k | return; | 346 | 0 | } | 347 | 0 | self.ranges.sort(); | 348 | 0 | assert!(!self.ranges.is_empty()); | 349 | | | 350 | | // Is there a way to do this in-place with constant memory? I couldn't | 351 | | // figure out a way to do it. So just append the canonicalization to | 352 | | // the end of this range, and then drain it before we're done. | 353 | 0 | let drain_end = self.ranges.len(); | 354 | 0 | for oldi in 0..drain_end { | 355 | | // If we've added at least one new range, then check if we can | 356 | | // merge this range in the previously added range. | 357 | 0 | if self.ranges.len() > drain_end { | 358 | 0 | let (last, rest) = self.ranges.split_last_mut().unwrap(); | 359 | 0 | if let Some(union) = last.union(&rest[oldi]) { | 360 | 0 | *last = union; | 361 | 0 | continue; | 362 | 0 | } | 363 | 0 | } | 364 | 0 | let range = self.ranges[oldi]; | 365 | 0 | self.ranges.push(range); | 366 | | } | 367 | 0 | self.ranges.drain(..drain_end); | 368 | 135k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::canonicalize |
369 | | |
370 | | /// Returns true if and only if this class is in a canonical ordering. |
371 | 3.45M | fn is_canonical(&self) -> bool { |
372 | 87.7M | for pair in self.ranges.windows(2) { |
373 | 87.7M | if pair[0] >= pair[1] { |
374 | 1.28M | return false; |
375 | 86.4M | } |
376 | 86.4M | if pair[0].is_contiguous(&pair[1]) { |
377 | 53.6k | return false; |
378 | 86.4M | } |
379 | | } |
380 | 2.12M | true |
381 | 3.45M | } <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::is_canonical Line | Count | Source | 371 | 1.25M | fn is_canonical(&self) -> bool { | 372 | 7.97M | for pair in self.ranges.windows(2) { | 373 | 7.97M | if pair[0] >= pair[1] { | 374 | 579k | return false; | 375 | 7.39M | } | 376 | 7.39M | if pair[0].is_contiguous(&pair[1]) { | 377 | 22.7k | return false; | 378 | 7.37M | } | 379 | | } | 380 | 649k | true | 381 | 1.25M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::is_canonical Line | Count | Source | 371 | 2.07M | fn is_canonical(&self) -> bool { | 372 | 79.7M | for pair in self.ranges.windows(2) { | 373 | 79.7M | if pair[0] >= pair[1] { | 374 | 702k | return false; | 375 | 79.0M | } | 376 | 79.0M | if pair[0].is_contiguous(&pair[1]) { | 377 | 30.8k | return false; | 378 | 79.0M | } | 379 | | } | 380 | 1.33M | true | 381 | 2.07M | } |
<regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassBytesRange>>::is_canonical Line | Count | Source | 371 | 135k | fn is_canonical(&self) -> bool { | 372 | 135k | for pair in self.ranges.windows(2) { | 373 | 0 | if pair[0] >= pair[1] { | 374 | 0 | return false; | 375 | 0 | } | 376 | 0 | if pair[0].is_contiguous(&pair[1]) { | 377 | 0 | return false; | 378 | 0 | } | 379 | | } | 380 | 135k | true | 381 | 135k | } |
Unexecuted instantiation: <regex_syntax::hir::interval::IntervalSet<regex_syntax::hir::ClassUnicodeRange>>::is_canonical |
382 | | } |
383 | | |
384 | | /// An iterator over intervals. |
385 | | #[derive(Debug)] |
386 | | pub struct IntervalSetIter<'a, I>(slice::Iter<'a, I>); |
387 | | |
388 | | impl<'a, I> Iterator for IntervalSetIter<'a, I> { |
389 | | type Item = &'a I; |
390 | | |
391 | 61.2M | fn next(&mut self) -> Option<&'a I> { |
392 | 61.2M | self.0.next() |
393 | 61.2M | } <regex_syntax::hir::interval::IntervalSetIter<regex_syntax::hir::ClassBytesRange> as core::iter::traits::iterator::Iterator>::next Line | Count | Source | 391 | 9.71M | fn next(&mut self) -> Option<&'a I> { | 392 | 9.71M | self.0.next() | 393 | 9.71M | } |
<regex_syntax::hir::interval::IntervalSetIter<regex_syntax::hir::ClassUnicodeRange> as core::iter::traits::iterator::Iterator>::next Line | Count | Source | 391 | 51.5M | fn next(&mut self) -> Option<&'a I> { | 392 | 51.5M | self.0.next() | 393 | 51.5M | } |
|
394 | | } |
395 | | |
396 | | pub trait Interval: |
397 | | Clone + Copy + Debug + Default + Eq + PartialEq + PartialOrd + Ord |
398 | | { |
399 | | type Bound: Bound; |
400 | | |
401 | | fn lower(&self) -> Self::Bound; |
402 | | fn upper(&self) -> Self::Bound; |
403 | | fn set_lower(&mut self, bound: Self::Bound); |
404 | | fn set_upper(&mut self, bound: Self::Bound); |
405 | | fn case_fold_simple( |
406 | | &self, |
407 | | intervals: &mut Vec<Self>, |
408 | | ) -> Result<(), unicode::CaseFoldError>; |
409 | | |
410 | | /// Create a new interval. |
411 | 102M | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { |
412 | 102M | let mut int = Self::default(); |
413 | 102M | if lower <= upper { |
414 | 102M | int.set_lower(lower); |
415 | 102M | int.set_upper(upper); |
416 | 102M | } else { |
417 | 32 | int.set_lower(upper); |
418 | 32 | int.set_upper(lower); |
419 | 32 | } |
420 | 102M | int |
421 | 102M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::create Line | Count | Source | 411 | 4.53M | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { | 412 | 4.53M | let mut int = Self::default(); | 413 | 4.53M | if lower <= upper { | 414 | 4.53M | int.set_lower(lower); | 415 | 4.53M | int.set_upper(upper); | 416 | 4.53M | } else { | 417 | 0 | int.set_lower(upper); | 418 | 0 | int.set_upper(lower); | 419 | 0 | } | 420 | 4.53M | int | 421 | 4.53M | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::create Line | Count | Source | 411 | 97.8M | fn create(lower: Self::Bound, upper: Self::Bound) -> Self { | 412 | 97.8M | let mut int = Self::default(); | 413 | 97.8M | if lower <= upper { | 414 | 97.8M | int.set_lower(lower); | 415 | 97.8M | int.set_upper(upper); | 416 | 97.8M | } else { | 417 | 32 | int.set_lower(upper); | 418 | 32 | int.set_upper(lower); | 419 | 32 | } | 420 | 97.8M | int | 421 | 97.8M | } |
Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::create Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::create |
422 | | |
423 | | /// Union the given overlapping range into this range. |
424 | | /// |
425 | | /// If the two ranges aren't contiguous, then this returns `None`. |
426 | 81.3M | fn union(&self, other: &Self) -> Option<Self> { |
427 | 81.3M | if !self.is_contiguous(other) { |
428 | 49.0M | return None; |
429 | 32.2M | } |
430 | 32.2M | let lower = cmp::min(self.lower(), other.lower()); |
431 | 32.2M | let upper = cmp::max(self.upper(), other.upper()); |
432 | 32.2M | Some(Self::create(lower, upper)) |
433 | 81.3M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::union Line | Count | Source | 426 | 8.02M | fn union(&self, other: &Self) -> Option<Self> { | 427 | 8.02M | if !self.is_contiguous(other) { | 428 | 7.01M | return None; | 429 | 1.00M | } | 430 | 1.00M | let lower = cmp::min(self.lower(), other.lower()); | 431 | 1.00M | let upper = cmp::max(self.upper(), other.upper()); | 432 | 1.00M | Some(Self::create(lower, upper)) | 433 | 8.02M | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::union Line | Count | Source | 426 | 73.2M | fn union(&self, other: &Self) -> Option<Self> { | 427 | 73.2M | if !self.is_contiguous(other) { | 428 | 41.9M | return None; | 429 | 31.2M | } | 430 | 31.2M | let lower = cmp::min(self.lower(), other.lower()); | 431 | 31.2M | let upper = cmp::max(self.upper(), other.upper()); | 432 | 31.2M | Some(Self::create(lower, upper)) | 433 | 73.2M | } |
Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::union Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::union |
434 | | |
435 | | /// Intersect this range with the given range and return the result. |
436 | | /// |
437 | | /// If the intersection is empty, then this returns `None`. |
438 | 5.19M | fn intersect(&self, other: &Self) -> Option<Self> { |
439 | 5.19M | let lower = cmp::max(self.lower(), other.lower()); |
440 | 5.19M | let upper = cmp::min(self.upper(), other.upper()); |
441 | 5.19M | if lower <= upper { |
442 | 2.49M | Some(Self::create(lower, upper)) |
443 | | } else { |
444 | 2.69M | None |
445 | | } |
446 | 5.19M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::intersect Line | Count | Source | 438 | 316k | fn intersect(&self, other: &Self) -> Option<Self> { | 439 | 316k | let lower = cmp::max(self.lower(), other.lower()); | 440 | 316k | let upper = cmp::min(self.upper(), other.upper()); | 441 | 316k | if lower <= upper { | 442 | 119k | Some(Self::create(lower, upper)) | 443 | | } else { | 444 | 197k | None | 445 | | } | 446 | 316k | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::intersect Line | Count | Source | 438 | 4.87M | fn intersect(&self, other: &Self) -> Option<Self> { | 439 | 4.87M | let lower = cmp::max(self.lower(), other.lower()); | 440 | 4.87M | let upper = cmp::min(self.upper(), other.upper()); | 441 | 4.87M | if lower <= upper { | 442 | 2.37M | Some(Self::create(lower, upper)) | 443 | | } else { | 444 | 2.49M | None | 445 | | } | 446 | 4.87M | } |
|
447 | | |
448 | | /// Subtract the given range from this range and return the resulting |
449 | | /// ranges. |
450 | | /// |
451 | | /// If subtraction would result in an empty range, then no ranges are |
452 | | /// returned. |
453 | 2.40M | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { |
454 | 2.40M | if self.is_subset(other) { |
455 | 862k | return (None, None); |
456 | 1.54M | } |
457 | 1.54M | if self.is_intersection_empty(other) { |
458 | 0 | return (Some(self.clone()), None); |
459 | 1.54M | } |
460 | 1.54M | let add_lower = other.lower() > self.lower(); |
461 | 1.54M | let add_upper = other.upper() < self.upper(); |
462 | 1.54M | // We know this because !self.is_subset(other) and the ranges have |
463 | 1.54M | // a non-empty intersection. |
464 | 1.54M | assert!(add_lower || add_upper); |
465 | 1.54M | let mut ret = (None, None); |
466 | 1.54M | if add_lower { |
467 | 1.50M | let upper = other.lower().decrement(); |
468 | 1.50M | ret.0 = Some(Self::create(self.lower(), upper)); |
469 | 1.50M | } |
470 | 1.54M | if add_upper { |
471 | 1.50M | let lower = other.upper().increment(); |
472 | 1.50M | let range = Self::create(lower, self.upper()); |
473 | 1.50M | if ret.0.is_none() { |
474 | 44.9k | ret.0 = Some(range); |
475 | 1.45M | } else { |
476 | 1.45M | ret.1 = Some(range); |
477 | 1.45M | } |
478 | 43.3k | } |
479 | 1.54M | ret |
480 | 2.40M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::difference Line | Count | Source | 453 | 130k | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { | 454 | 130k | if self.is_subset(other) { | 455 | 29.8k | return (None, None); | 456 | 101k | } | 457 | 101k | if self.is_intersection_empty(other) { | 458 | 0 | return (Some(self.clone()), None); | 459 | 101k | } | 460 | 101k | let add_lower = other.lower() > self.lower(); | 461 | 101k | let add_upper = other.upper() < self.upper(); | 462 | 101k | // We know this because !self.is_subset(other) and the ranges have | 463 | 101k | // a non-empty intersection. | 464 | 101k | assert!(add_lower || add_upper); | 465 | 101k | let mut ret = (None, None); | 466 | 101k | if add_lower { | 467 | 87.8k | let upper = other.lower().decrement(); | 468 | 87.8k | ret.0 = Some(Self::create(self.lower(), upper)); | 469 | 87.8k | } | 470 | 101k | if add_upper { | 471 | 89.5k | let lower = other.upper().increment(); | 472 | 89.5k | let range = Self::create(lower, self.upper()); | 473 | 89.5k | if ret.0.is_none() { | 474 | 13.3k | ret.0 = Some(range); | 475 | 76.1k | } else { | 476 | 76.1k | ret.1 = Some(range); | 477 | 76.1k | } | 478 | 11.6k | } | 479 | 101k | ret | 480 | 130k | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::difference Line | Count | Source | 453 | 2.27M | fn difference(&self, other: &Self) -> (Option<Self>, Option<Self>) { | 454 | 2.27M | if self.is_subset(other) { | 455 | 832k | return (None, None); | 456 | 1.44M | } | 457 | 1.44M | if self.is_intersection_empty(other) { | 458 | 0 | return (Some(self.clone()), None); | 459 | 1.44M | } | 460 | 1.44M | let add_lower = other.lower() > self.lower(); | 461 | 1.44M | let add_upper = other.upper() < self.upper(); | 462 | 1.44M | // We know this because !self.is_subset(other) and the ranges have | 463 | 1.44M | // a non-empty intersection. | 464 | 1.44M | assert!(add_lower || add_upper); | 465 | 1.44M | let mut ret = (None, None); | 466 | 1.44M | if add_lower { | 467 | 1.41M | let upper = other.lower().decrement(); | 468 | 1.41M | ret.0 = Some(Self::create(self.lower(), upper)); | 469 | 1.41M | } | 470 | 1.44M | if add_upper { | 471 | 1.41M | let lower = other.upper().increment(); | 472 | 1.41M | let range = Self::create(lower, self.upper()); | 473 | 1.41M | if ret.0.is_none() { | 474 | 31.6k | ret.0 = Some(range); | 475 | 1.38M | } else { | 476 | 1.38M | ret.1 = Some(range); | 477 | 1.38M | } | 478 | 31.7k | } | 479 | 1.44M | ret | 480 | 2.27M | } |
|
481 | | |
482 | | /// Returns true if and only if the two ranges are contiguous. Two ranges |
483 | | /// are contiguous if and only if the ranges are either overlapping or |
484 | | /// adjacent. |
485 | 167M | fn is_contiguous(&self, other: &Self) -> bool { |
486 | 167M | let lower1 = self.lower().as_u32(); |
487 | 167M | let upper1 = self.upper().as_u32(); |
488 | 167M | let lower2 = other.lower().as_u32(); |
489 | 167M | let upper2 = other.upper().as_u32(); |
490 | 167M | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) |
491 | 167M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_contiguous Line | Count | Source | 485 | 15.4M | fn is_contiguous(&self, other: &Self) -> bool { | 486 | 15.4M | let lower1 = self.lower().as_u32(); | 487 | 15.4M | let upper1 = self.upper().as_u32(); | 488 | 15.4M | let lower2 = other.lower().as_u32(); | 489 | 15.4M | let upper2 = other.upper().as_u32(); | 490 | 15.4M | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) | 491 | 15.4M | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_contiguous Line | Count | Source | 485 | 152M | fn is_contiguous(&self, other: &Self) -> bool { | 486 | 152M | let lower1 = self.lower().as_u32(); | 487 | 152M | let upper1 = self.upper().as_u32(); | 488 | 152M | let lower2 = other.lower().as_u32(); | 489 | 152M | let upper2 = other.upper().as_u32(); | 490 | 152M | cmp::max(lower1, lower2) <= cmp::min(upper1, upper2).saturating_add(1) | 491 | 152M | } |
Unexecuted instantiation: <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_contiguous Unexecuted instantiation: <regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_contiguous |
492 | | |
493 | | /// Returns true if and only if the intersection of this range and the |
494 | | /// other range is empty. |
495 | 6.72M | fn is_intersection_empty(&self, other: &Self) -> bool { |
496 | 6.72M | let (lower1, upper1) = (self.lower(), self.upper()); |
497 | 6.72M | let (lower2, upper2) = (other.lower(), other.upper()); |
498 | 6.72M | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) |
499 | 6.72M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_intersection_empty Line | Count | Source | 495 | 2.05M | fn is_intersection_empty(&self, other: &Self) -> bool { | 496 | 2.05M | let (lower1, upper1) = (self.lower(), self.upper()); | 497 | 2.05M | let (lower2, upper2) = (other.lower(), other.upper()); | 498 | 2.05M | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) | 499 | 2.05M | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_intersection_empty Line | Count | Source | 495 | 4.67M | fn is_intersection_empty(&self, other: &Self) -> bool { | 496 | 4.67M | let (lower1, upper1) = (self.lower(), self.upper()); | 497 | 4.67M | let (lower2, upper2) = (other.lower(), other.upper()); | 498 | 4.67M | cmp::max(lower1, lower2) > cmp::min(upper1, upper2) | 499 | 4.67M | } |
|
500 | | |
501 | | /// Returns true if and only if this range is a subset of the other range. |
502 | 2.40M | fn is_subset(&self, other: &Self) -> bool { |
503 | 2.40M | let (lower1, upper1) = (self.lower(), self.upper()); |
504 | 2.40M | let (lower2, upper2) = (other.lower(), other.upper()); |
505 | 2.40M | (lower2 <= lower1 && lower1 <= upper2) |
506 | 907k | && (lower2 <= upper1 && upper1 <= upper2) |
507 | 2.40M | } <regex_syntax::hir::ClassBytesRange as regex_syntax::hir::interval::Interval>::is_subset Line | Count | Source | 502 | 130k | fn is_subset(&self, other: &Self) -> bool { | 503 | 130k | let (lower1, upper1) = (self.lower(), self.upper()); | 504 | 130k | let (lower2, upper2) = (other.lower(), other.upper()); | 505 | 130k | (lower2 <= lower1 && lower1 <= upper2) | 506 | 43.1k | && (lower2 <= upper1 && upper1 <= upper2) | 507 | 130k | } |
<regex_syntax::hir::ClassUnicodeRange as regex_syntax::hir::interval::Interval>::is_subset Line | Count | Source | 502 | 2.27M | fn is_subset(&self, other: &Self) -> bool { | 503 | 2.27M | let (lower1, upper1) = (self.lower(), self.upper()); | 504 | 2.27M | let (lower2, upper2) = (other.lower(), other.upper()); | 505 | 2.27M | (lower2 <= lower1 && lower1 <= upper2) | 506 | 864k | && (lower2 <= upper1 && upper1 <= upper2) | 507 | 2.27M | } |
|
508 | | } |
509 | | |
510 | | pub trait Bound: |
511 | | Copy + Clone + Debug + Eq + PartialEq + PartialOrd + Ord |
512 | | { |
513 | | fn min_value() -> Self; |
514 | | fn max_value() -> Self; |
515 | | fn as_u32(self) -> u32; |
516 | | fn increment(self) -> Self; |
517 | | fn decrement(self) -> Self; |
518 | | } |
519 | | |
520 | | impl Bound for u8 { |
521 | 45.2k | fn min_value() -> Self { |
522 | 45.2k | u8::MIN |
523 | 45.2k | } |
524 | 45.7k | fn max_value() -> Self { |
525 | 45.7k | u8::MAX |
526 | 45.7k | } |
527 | 61.6M | fn as_u32(self) -> u32 { |
528 | 61.6M | u32::from(self) |
529 | 61.6M | } |
530 | 142k | fn increment(self) -> Self { |
531 | 142k | self.checked_add(1).unwrap() |
532 | 142k | } |
533 | 140k | fn decrement(self) -> Self { |
534 | 140k | self.checked_sub(1).unwrap() |
535 | 140k | } |
536 | | } |
537 | | |
538 | | impl Bound for char { |
539 | 403k | fn min_value() -> Self { |
540 | 403k | '\x00' |
541 | 403k | } |
542 | 405k | fn max_value() -> Self { |
543 | 405k | '\u{10FFFF}' |
544 | 405k | } |
545 | 609M | fn as_u32(self) -> u32 { |
546 | 609M | u32::from(self) |
547 | 609M | } |
548 | | |
549 | 19.6M | fn increment(self) -> Self { |
550 | 19.6M | match self { |
551 | 1.07k | '\u{D7FF}' => '\u{E000}', |
552 | 19.6M | c => char::from_u32(u32::from(c).checked_add(1).unwrap()).unwrap(), |
553 | | } |
554 | 19.6M | } |
555 | | |
556 | 19.6M | fn decrement(self) -> Self { |
557 | 19.6M | match self { |
558 | 4.36k | '\u{E000}' => '\u{D7FF}', |
559 | 19.6M | c => char::from_u32(u32::from(c).checked_sub(1).unwrap()).unwrap(), |
560 | | } |
561 | 19.6M | } |
562 | | } |
563 | | |
564 | | // Tests for interval sets are written in src/hir.rs against the public API. |