/rust/registry/src/index.crates.io-6f17d22bba15001f/similar-2.1.0/src/utils.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Utilities for common diff related operations. |
2 | | //! |
3 | | //! This module provides specialized utilities and simplified diff operations |
4 | | //! for common operations. It's useful when you want to work with text diffs |
5 | | //! and you're interested in getting vectors of these changes directly. |
6 | | //! |
7 | | //! # Slice Remapping |
8 | | //! |
9 | | //! When working with [`TextDiff`] it's common that one takes advantage of the |
10 | | //! built-in tokenization of the differ. This for instance lets you do |
11 | | //! grapheme level diffs. This is implemented by the differ generating rather |
12 | | //! small slices of strings and running a diff algorithm over them. |
13 | | //! |
14 | | //! The downside of this is that all the [`DiffOp`] objects produced by the |
15 | | //! diffing algorithm encode operations on these rather small slices. For |
16 | | //! a lot of use cases this is not what one wants which can make this very |
17 | | //! inconvenient. This module provides a [`TextDiffRemapper`] which lets you |
18 | | //! map from the ranges that the [`TextDiff`] returns to the original input |
19 | | //! strings. For more information see [`TextDiffRemapper`]. |
20 | | //! |
21 | | //! # Simple Diff Functions |
22 | | //! |
23 | | //! This module provides a range of common test diff functions that will |
24 | | //! produce vectors of `(change_tag, value)` tuples. They will automatically |
25 | | //! optimize towards returning the most useful slice that one would expect for |
26 | | //! the type of diff performed. |
27 | | |
28 | | use std::hash::Hash; |
29 | | use std::ops::{Index, Range}; |
30 | | |
31 | | use crate::{ |
32 | | capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff, |
33 | | }; |
34 | | |
35 | | struct SliceRemapper<'x, T: ?Sized> { |
36 | | source: &'x T, |
37 | | indexes: Vec<Range<usize>>, |
38 | | } |
39 | | |
40 | | impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> { |
41 | 0 | fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> { |
42 | 0 | let indexes = slices |
43 | 0 | .iter() |
44 | 0 | .scan(0, |state, item| { |
45 | 0 | let start = *state; |
46 | 0 | let end = start + item.len(); |
47 | 0 | *state = end; |
48 | 0 | Some(start..end) |
49 | 0 | }) |
50 | 0 | .collect(); |
51 | 0 | SliceRemapper { source, indexes } |
52 | 0 | } |
53 | | |
54 | 0 | fn slice(&self, range: Range<usize>) -> Option<&'x T> { |
55 | 0 | let start = self.indexes.get(range.start)?.start; |
56 | 0 | let end = self.indexes.get(range.end - 1)?.end; |
57 | 0 | Some(self.source.slice(start..end)) |
58 | 0 | } |
59 | | } |
60 | | |
61 | | impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> { |
62 | | type Output = T; |
63 | | |
64 | 0 | fn index(&self, range: Range<usize>) -> &Self::Output { |
65 | 0 | self.slice(range).expect("out of bounds") |
66 | 0 | } |
67 | | } |
68 | | |
69 | | /// A remapper that can remap diff ops to the original slices. |
70 | | /// |
71 | | /// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from |
72 | | /// two strings and the internal tokenization is used, this remapper can take |
73 | | /// a range in the tokenized sequences and remap it to the original string. |
74 | | /// This is particularly useful when you want to do things like character or |
75 | | /// grapheme level diffs but you want to not have to iterate over small sequences |
76 | | /// but large consequitive ones from the source. |
77 | | /// |
78 | | /// ```rust |
79 | | /// use similar::{ChangeTag, TextDiff}; |
80 | | /// use similar::utils::TextDiffRemapper; |
81 | | /// |
82 | | /// let old = "yo! foo bar baz"; |
83 | | /// let new = "yo! foo bor baz"; |
84 | | /// let diff = TextDiff::from_words(old, new); |
85 | | /// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
86 | | /// let changes: Vec<_> = diff.ops() |
87 | | /// .iter() |
88 | | /// .flat_map(move |x| remapper.iter_slices(x)) |
89 | | /// .collect(); |
90 | | /// |
91 | | /// assert_eq!(changes, vec![ |
92 | | /// (ChangeTag::Equal, "yo! foo "), |
93 | | /// (ChangeTag::Delete, "bar"), |
94 | | /// (ChangeTag::Insert, "bor"), |
95 | | /// (ChangeTag::Equal, " baz") |
96 | | /// ]); |
97 | | pub struct TextDiffRemapper<'x, T: ?Sized> { |
98 | | old: SliceRemapper<'x, T>, |
99 | | new: SliceRemapper<'x, T>, |
100 | | } |
101 | | |
102 | | impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> { |
103 | | /// Creates a new remapper from strings and slices. |
104 | 0 | pub fn new( |
105 | 0 | old_slices: &[&'x T], |
106 | 0 | new_slices: &[&'x T], |
107 | 0 | old: &'x T, |
108 | 0 | new: &'x T, |
109 | 0 | ) -> TextDiffRemapper<'x, T> { |
110 | 0 | TextDiffRemapper { |
111 | 0 | old: SliceRemapper::new(old, old_slices), |
112 | 0 | new: SliceRemapper::new(new, new_slices), |
113 | 0 | } |
114 | 0 | } |
115 | | |
116 | | /// Creates a new remapper from a text diff and the original strings. |
117 | 0 | pub fn from_text_diff<'old, 'new, 'bufs>( |
118 | 0 | diff: &TextDiff<'old, 'new, 'bufs, T>, |
119 | 0 | old: &'x T, |
120 | 0 | new: &'x T, |
121 | 0 | ) -> TextDiffRemapper<'x, T> |
122 | 0 | where |
123 | 0 | 'old: 'x, |
124 | 0 | 'new: 'x, |
125 | 0 | { |
126 | 0 | TextDiffRemapper { |
127 | 0 | old: SliceRemapper::new(old, diff.old_slices()), |
128 | 0 | new: SliceRemapper::new(new, diff.new_slices()), |
129 | 0 | } |
130 | 0 | } |
131 | | |
132 | | /// Slices into the old string. |
133 | 0 | pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> { |
134 | 0 | self.old.slice(range) |
135 | 0 | } |
136 | | |
137 | | /// Slices into the new string. |
138 | 0 | pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> { |
139 | 0 | self.new.slice(range) |
140 | 0 | } |
141 | | |
142 | | /// Given a diffop yields the changes it encodes against the original strings. |
143 | | /// |
144 | | /// This is the same as the [`DiffOp::iter_slices`] method. |
145 | | /// |
146 | | /// ## Panics |
147 | | /// |
148 | | /// This method can panic if the input strings passed to the constructor |
149 | | /// are incompatible with the input strings passed to the diffing algorithm. |
150 | 0 | pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> { |
151 | 0 | // note: this is equivalent to the code in `DiffOp::iter_slices`. It is |
152 | 0 | // a copy/paste because the slicing currently cannot be well abstracted |
153 | 0 | // because of lifetime issues caused by the `Index` trait. |
154 | 0 | match *op { |
155 | 0 | DiffOp::Equal { old_index, len, .. } => { |
156 | 0 | Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len))) |
157 | 0 | .into_iter() |
158 | 0 | .chain(None.into_iter()) |
159 | | } |
160 | | DiffOp::Insert { |
161 | 0 | new_index, new_len, .. |
162 | 0 | } => Some(( |
163 | 0 | ChangeTag::Insert, |
164 | 0 | self.new.slice(new_index..new_index + new_len), |
165 | 0 | )) |
166 | 0 | .into_iter() |
167 | 0 | .chain(None.into_iter()), |
168 | | DiffOp::Delete { |
169 | 0 | old_index, old_len, .. |
170 | 0 | } => Some(( |
171 | 0 | ChangeTag::Delete, |
172 | 0 | self.old.slice(old_index..old_index + old_len), |
173 | 0 | )) |
174 | 0 | .into_iter() |
175 | 0 | .chain(None.into_iter()), |
176 | | DiffOp::Replace { |
177 | 0 | old_index, |
178 | 0 | old_len, |
179 | 0 | new_index, |
180 | 0 | new_len, |
181 | 0 | } => Some(( |
182 | 0 | ChangeTag::Delete, |
183 | 0 | self.old.slice(old_index..old_index + old_len), |
184 | 0 | )) |
185 | 0 | .into_iter() |
186 | 0 | .chain( |
187 | 0 | Some(( |
188 | 0 | ChangeTag::Insert, |
189 | 0 | self.new.slice(new_index..new_index + new_len), |
190 | 0 | )) |
191 | 0 | .into_iter(), |
192 | 0 | ), |
193 | | } |
194 | 0 | .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds"))) |
195 | 0 | } |
196 | | } |
197 | | |
198 | | /// Shortcut for diffing two slices. |
199 | | /// |
200 | | /// This function produces the diff of two slices and returns a vector |
201 | | /// with the changes. |
202 | | /// |
203 | | /// ```rust |
204 | | /// use similar::{Algorithm, ChangeTag}; |
205 | | /// use similar::utils::diff_slices; |
206 | | /// |
207 | | /// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>(); |
208 | | /// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>(); |
209 | | /// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![ |
210 | | /// (ChangeTag::Equal, &["foo", "bar"][..]), |
211 | | /// (ChangeTag::Delete, &["baz"][..]), |
212 | | /// (ChangeTag::Insert, &["BAZ"][..]), |
213 | | /// ]); |
214 | | /// ``` |
215 | 0 | pub fn diff_slices<'x, T: PartialEq + Hash + Ord>( |
216 | 0 | alg: Algorithm, |
217 | 0 | old: &'x [T], |
218 | 0 | new: &'x [T], |
219 | 0 | ) -> Vec<(ChangeTag, &'x [T])> { |
220 | 0 | capture_diff_slices(alg, old, new) |
221 | 0 | .iter() |
222 | 0 | .flat_map(|op| op.iter_slices(old, new)) |
223 | 0 | .collect() |
224 | 0 | } |
225 | | |
226 | | /// Shortcut for making a character level diff. |
227 | | /// |
228 | | /// This function produces the diff of two strings and returns a vector |
229 | | /// with the changes. It returns connected slices into the original string |
230 | | /// rather than character level slices. |
231 | | /// |
232 | | /// ```rust |
233 | | /// use similar::{Algorithm, ChangeTag}; |
234 | | /// use similar::utils::diff_chars; |
235 | | /// |
236 | | /// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![ |
237 | | /// (ChangeTag::Equal, "foo"), |
238 | | /// (ChangeTag::Delete, "bar"), |
239 | | /// (ChangeTag::Insert, "BAR"), |
240 | | /// (ChangeTag::Equal, "baz"), |
241 | | /// ]); |
242 | | /// ``` |
243 | 0 | pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>( |
244 | 0 | alg: Algorithm, |
245 | 0 | old: &'x T, |
246 | 0 | new: &'x T, |
247 | 0 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
248 | 0 | let old = old.as_diffable_str(); |
249 | 0 | let new = new.as_diffable_str(); |
250 | 0 | let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new); |
251 | 0 | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
252 | 0 | diff.ops() |
253 | 0 | .iter() |
254 | 0 | .flat_map(move |x| remapper.iter_slices(x)) |
255 | 0 | .collect() |
256 | 0 | } |
257 | | |
258 | | /// Shortcut for making a word level diff. |
259 | | /// |
260 | | /// This function produces the diff of two strings and returns a vector |
261 | | /// with the changes. It returns connected slices into the original string |
262 | | /// rather than word level slices. |
263 | | /// |
264 | | /// ```rust |
265 | | /// use similar::{Algorithm, ChangeTag}; |
266 | | /// use similar::utils::diff_words; |
267 | | /// |
268 | | /// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![ |
269 | | /// (ChangeTag::Equal, "foo "), |
270 | | /// (ChangeTag::Delete, "bar"), |
271 | | /// (ChangeTag::Insert, "bor"), |
272 | | /// (ChangeTag::Equal, " baz"), |
273 | | /// ]); |
274 | | /// ``` |
275 | 0 | pub fn diff_words<'x, T: DiffableStrRef + ?Sized>( |
276 | 0 | alg: Algorithm, |
277 | 0 | old: &'x T, |
278 | 0 | new: &'x T, |
279 | 0 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
280 | 0 | let old = old.as_diffable_str(); |
281 | 0 | let new = new.as_diffable_str(); |
282 | 0 | let diff = TextDiff::configure().algorithm(alg).diff_words(old, new); |
283 | 0 | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
284 | 0 | diff.ops() |
285 | 0 | .iter() |
286 | 0 | .flat_map(move |x| remapper.iter_slices(x)) |
287 | 0 | .collect() |
288 | 0 | } |
289 | | |
290 | | /// Shortcut for making a unicode word level diff. |
291 | | /// |
292 | | /// This function produces the diff of two strings and returns a vector |
293 | | /// with the changes. It returns connected slices into the original string |
294 | | /// rather than word level slices. |
295 | | /// |
296 | | /// ```rust |
297 | | /// use similar::{Algorithm, ChangeTag}; |
298 | | /// use similar::utils::diff_unicode_words; |
299 | | /// |
300 | | /// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?"; |
301 | | /// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?"; |
302 | | /// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![ |
303 | | /// (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "), |
304 | | /// (ChangeTag::Delete, "32.3"), |
305 | | /// (ChangeTag::Insert, "9.84"), |
306 | | /// (ChangeTag::Equal, " "), |
307 | | /// (ChangeTag::Delete, "feet"), |
308 | | /// (ChangeTag::Insert, "meters"), |
309 | | /// (ChangeTag::Equal, ", right?") |
310 | | /// ]); |
311 | | /// ``` |
312 | | /// |
313 | | /// This requires the `unicode` feature. |
314 | | #[cfg(feature = "unicode")] |
315 | | pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>( |
316 | | alg: Algorithm, |
317 | | old: &'x T, |
318 | | new: &'x T, |
319 | | ) -> Vec<(ChangeTag, &'x T::Output)> { |
320 | | let old = old.as_diffable_str(); |
321 | | let new = new.as_diffable_str(); |
322 | | let diff = TextDiff::configure() |
323 | | .algorithm(alg) |
324 | | .diff_unicode_words(old, new); |
325 | | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
326 | | diff.ops() |
327 | | .iter() |
328 | | .flat_map(move |x| remapper.iter_slices(x)) |
329 | | .collect() |
330 | | } |
331 | | |
332 | | /// Shortcut for making a grapheme level diff. |
333 | | /// |
334 | | /// This function produces the diff of two strings and returns a vector |
335 | | /// with the changes. It returns connected slices into the original string |
336 | | /// rather than grapheme level slices. |
337 | | /// |
338 | | /// ```rust |
339 | | /// use similar::{Algorithm, ChangeTag}; |
340 | | /// use similar::utils::diff_graphemes; |
341 | | /// |
342 | | /// let old = "The flag of Austria is 🇦🇹"; |
343 | | /// let new = "The flag of Albania is 🇦🇱"; |
344 | | /// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![ |
345 | | /// (ChangeTag::Equal, "The flag of A"), |
346 | | /// (ChangeTag::Delete, "ustr"), |
347 | | /// (ChangeTag::Insert, "lban"), |
348 | | /// (ChangeTag::Equal, "ia is "), |
349 | | /// (ChangeTag::Delete, "🇦🇹"), |
350 | | /// (ChangeTag::Insert, "🇦🇱"), |
351 | | /// ]); |
352 | | /// ``` |
353 | | /// |
354 | | /// This requires the `unicode` feature. |
355 | | #[cfg(feature = "unicode")] |
356 | | pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>( |
357 | | alg: Algorithm, |
358 | | old: &'x T, |
359 | | new: &'x T, |
360 | | ) -> Vec<(ChangeTag, &'x T::Output)> { |
361 | | let old = old.as_diffable_str(); |
362 | | let new = new.as_diffable_str(); |
363 | | let diff = TextDiff::configure() |
364 | | .algorithm(alg) |
365 | | .diff_graphemes(old, new); |
366 | | let remapper = TextDiffRemapper::from_text_diff(&diff, old, new); |
367 | | diff.ops() |
368 | | .iter() |
369 | | .flat_map(move |x| remapper.iter_slices(x)) |
370 | | .collect() |
371 | | } |
372 | | |
373 | | /// Shortcut for making a line diff. |
374 | | /// |
375 | | /// This function produces the diff of two slices and returns a vector |
376 | | /// with the changes. Unlike [`diff_chars`] or [`diff_slices`] it returns a |
377 | | /// change tag for each line. |
378 | | /// |
379 | | /// ```rust |
380 | | /// use similar::{Algorithm, ChangeTag}; |
381 | | /// use similar::utils::diff_lines; |
382 | | /// |
383 | | /// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![ |
384 | | /// (ChangeTag::Equal, "foo\n"), |
385 | | /// (ChangeTag::Equal, "bar\n"), |
386 | | /// (ChangeTag::Equal, "baz\n"), |
387 | | /// (ChangeTag::Delete, "blah"), |
388 | | /// (ChangeTag::Insert, "blurgh"), |
389 | | /// ]); |
390 | | /// ``` |
391 | 0 | pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>( |
392 | 0 | alg: Algorithm, |
393 | 0 | old: &'x T, |
394 | 0 | new: &'x T, |
395 | 0 | ) -> Vec<(ChangeTag, &'x T::Output)> { |
396 | 0 | TextDiff::configure() |
397 | 0 | .algorithm(alg) |
398 | 0 | .diff_lines(old, new) |
399 | 0 | .iter_all_changes() |
400 | 0 | .map(|change| (change.tag(), change.value())) |
401 | 0 | .collect() |
402 | 0 | } |
403 | | |
404 | | #[test] |
405 | | fn test_remapper() { |
406 | | let a = "foo bar baz"; |
407 | | let words = a.tokenize_words(); |
408 | | dbg!(&words); |
409 | | let remap = SliceRemapper::new(a, &words); |
410 | | assert_eq!(remap.slice(0..3), Some("foo bar")); |
411 | | assert_eq!(remap.slice(1..3), Some(" bar")); |
412 | | assert_eq!(remap.slice(0..1), Some("foo")); |
413 | | assert_eq!(remap.slice(0..5), Some("foo bar baz")); |
414 | | assert_eq!(remap.slice(0..6), None); |
415 | | } |