Coverage Report

Created: 2023-04-25 07:07

/rust/registry/src/index.crates.io-6f17d22bba15001f/similar-2.1.0/src/utils.rs
Line
Count
Source (jump to first uncovered line)
1
//! Utilities for common diff related operations.
2
//!
3
//! This module provides specialized utilities and simplified diff operations
4
//! for common operations.  It's useful when you want to work with text diffs
5
//! and you're interested in getting vectors of these changes directly.
6
//!
7
//! # Slice Remapping
8
//!
9
//! When working with [`TextDiff`] it's common that one takes advantage of the
10
//! built-in tokenization of the differ.  This for instance lets you do
11
//! grapheme level diffs.  This is implemented by the differ generating rather
12
//! small slices of strings and running a diff algorithm over them.
13
//!
14
//! The downside of this is that all the [`DiffOp`] objects produced by the
15
//! diffing algorithm encode operations on these rather small slices.  For
16
//! a lot of use cases this is not what one wants which can make this very
17
//! inconvenient.  This module provides a [`TextDiffRemapper`] which lets you
18
//! map from the ranges that the [`TextDiff`] returns to the original input
19
//! strings.  For more information see [`TextDiffRemapper`].
20
//!
21
//! # Simple Diff Functions
22
//!
23
//! This module provides a range of common test diff functions that will
24
//! produce vectors of `(change_tag, value)` tuples.  They will automatically
25
//! optimize towards returning the most useful slice that one would expect for
26
//! the type of diff performed.
27
28
use std::hash::Hash;
29
use std::ops::{Index, Range};
30
31
use crate::{
32
    capture_diff_slices, Algorithm, ChangeTag, DiffOp, DiffableStr, DiffableStrRef, TextDiff,
33
};
34
35
struct SliceRemapper<'x, T: ?Sized> {
36
    source: &'x T,
37
    indexes: Vec<Range<usize>>,
38
}
39
40
impl<'x, 'slices, T: DiffableStr + ?Sized> SliceRemapper<'x, T> {
41
0
    fn new(source: &'x T, slices: &[&'x T]) -> SliceRemapper<'x, T> {
42
0
        let indexes = slices
43
0
            .iter()
44
0
            .scan(0, |state, item| {
45
0
                let start = *state;
46
0
                let end = start + item.len();
47
0
                *state = end;
48
0
                Some(start..end)
49
0
            })
50
0
            .collect();
51
0
        SliceRemapper { source, indexes }
52
0
    }
53
54
0
    fn slice(&self, range: Range<usize>) -> Option<&'x T> {
55
0
        let start = self.indexes.get(range.start)?.start;
56
0
        let end = self.indexes.get(range.end - 1)?.end;
57
0
        Some(self.source.slice(start..end))
58
0
    }
59
}
60
61
impl<'x, T: DiffableStr + ?Sized> Index<Range<usize>> for SliceRemapper<'x, T> {
62
    type Output = T;
63
64
0
    fn index(&self, range: Range<usize>) -> &Self::Output {
65
0
        self.slice(range).expect("out of bounds")
66
0
    }
67
}
68
69
/// A remapper that can remap diff ops to the original slices.
70
///
71
/// The idea here is that when a [`TextDiff`](crate::TextDiff) is created from
72
/// two strings and the internal tokenization is used, this remapper can take
73
/// a range in the tokenized sequences and remap it to the original string.
74
/// This is particularly useful when you want to do things like character or
75
/// grapheme level diffs but you want to not have to iterate over small sequences
76
/// but large consequitive ones from the source.
77
///
78
/// ```rust
79
/// use similar::{ChangeTag, TextDiff};
80
/// use similar::utils::TextDiffRemapper;
81
///
82
/// let old = "yo! foo bar baz";
83
/// let new = "yo! foo bor baz";
84
/// let diff = TextDiff::from_words(old, new);
85
/// let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
86
/// let changes: Vec<_> = diff.ops()
87
///     .iter()
88
///     .flat_map(move |x| remapper.iter_slices(x))
89
///     .collect();
90
///
91
/// assert_eq!(changes, vec![
92
///     (ChangeTag::Equal, "yo! foo "),
93
///     (ChangeTag::Delete, "bar"),
94
///     (ChangeTag::Insert, "bor"),
95
///     (ChangeTag::Equal, " baz")
96
/// ]);
97
pub struct TextDiffRemapper<'x, T: ?Sized> {
98
    old: SliceRemapper<'x, T>,
99
    new: SliceRemapper<'x, T>,
100
}
101
102
impl<'x, 'slices, T: DiffableStr + ?Sized> TextDiffRemapper<'x, T> {
103
    /// Creates a new remapper from strings and slices.
104
0
    pub fn new(
105
0
        old_slices: &[&'x T],
106
0
        new_slices: &[&'x T],
107
0
        old: &'x T,
108
0
        new: &'x T,
109
0
    ) -> TextDiffRemapper<'x, T> {
110
0
        TextDiffRemapper {
111
0
            old: SliceRemapper::new(old, old_slices),
112
0
            new: SliceRemapper::new(new, new_slices),
113
0
        }
114
0
    }
115
116
    /// Creates a new remapper from a text diff and the original strings.
117
0
    pub fn from_text_diff<'old, 'new, 'bufs>(
118
0
        diff: &TextDiff<'old, 'new, 'bufs, T>,
119
0
        old: &'x T,
120
0
        new: &'x T,
121
0
    ) -> TextDiffRemapper<'x, T>
122
0
    where
123
0
        'old: 'x,
124
0
        'new: 'x,
125
0
    {
126
0
        TextDiffRemapper {
127
0
            old: SliceRemapper::new(old, diff.old_slices()),
128
0
            new: SliceRemapper::new(new, diff.new_slices()),
129
0
        }
130
0
    }
131
132
    /// Slices into the old string.
133
0
    pub fn slice_old(&self, range: Range<usize>) -> Option<&'x T> {
134
0
        self.old.slice(range)
135
0
    }
136
137
    /// Slices into the new string.
138
0
    pub fn slice_new(&self, range: Range<usize>) -> Option<&'x T> {
139
0
        self.new.slice(range)
140
0
    }
141
142
    /// Given a diffop yields the changes it encodes against the original strings.
143
    ///
144
    /// This is the same as the [`DiffOp::iter_slices`] method.
145
    ///
146
    /// ## Panics
147
    ///
148
    /// This method can panic if the input strings passed to the constructor
149
    /// are incompatible with the input strings passed to the diffing algorithm.
150
0
    pub fn iter_slices(&self, op: &DiffOp) -> impl Iterator<Item = (ChangeTag, &'x T)> {
151
0
        // note: this is equivalent to the code in `DiffOp::iter_slices`.  It is
152
0
        // a copy/paste because the slicing currently cannot be well abstracted
153
0
        // because of lifetime issues caused by the `Index` trait.
154
0
        match *op {
155
0
            DiffOp::Equal { old_index, len, .. } => {
156
0
                Some((ChangeTag::Equal, self.old.slice(old_index..old_index + len)))
157
0
                    .into_iter()
158
0
                    .chain(None.into_iter())
159
            }
160
            DiffOp::Insert {
161
0
                new_index, new_len, ..
162
0
            } => Some((
163
0
                ChangeTag::Insert,
164
0
                self.new.slice(new_index..new_index + new_len),
165
0
            ))
166
0
            .into_iter()
167
0
            .chain(None.into_iter()),
168
            DiffOp::Delete {
169
0
                old_index, old_len, ..
170
0
            } => Some((
171
0
                ChangeTag::Delete,
172
0
                self.old.slice(old_index..old_index + old_len),
173
0
            ))
174
0
            .into_iter()
175
0
            .chain(None.into_iter()),
176
            DiffOp::Replace {
177
0
                old_index,
178
0
                old_len,
179
0
                new_index,
180
0
                new_len,
181
0
            } => Some((
182
0
                ChangeTag::Delete,
183
0
                self.old.slice(old_index..old_index + old_len),
184
0
            ))
185
0
            .into_iter()
186
0
            .chain(
187
0
                Some((
188
0
                    ChangeTag::Insert,
189
0
                    self.new.slice(new_index..new_index + new_len),
190
0
                ))
191
0
                .into_iter(),
192
0
            ),
193
        }
194
0
        .map(|(tag, opt_val)| (tag, opt_val.expect("slice out of bounds")))
195
0
    }
196
}
197
198
/// Shortcut for diffing two slices.
199
///
200
/// This function produces the diff of two slices and returns a vector
201
/// with the changes.
202
///
203
/// ```rust
204
/// use similar::{Algorithm, ChangeTag};
205
/// use similar::utils::diff_slices;
206
///
207
/// let old = "foo\nbar\nbaz".lines().collect::<Vec<_>>();
208
/// let new = "foo\nbar\nBAZ".lines().collect::<Vec<_>>();
209
/// assert_eq!(diff_slices(Algorithm::Myers, &old, &new), vec![
210
///     (ChangeTag::Equal, &["foo", "bar"][..]),
211
///     (ChangeTag::Delete, &["baz"][..]),
212
///     (ChangeTag::Insert, &["BAZ"][..]),
213
/// ]);
214
/// ```
215
0
pub fn diff_slices<'x, T: PartialEq + Hash + Ord>(
216
0
    alg: Algorithm,
217
0
    old: &'x [T],
218
0
    new: &'x [T],
219
0
) -> Vec<(ChangeTag, &'x [T])> {
220
0
    capture_diff_slices(alg, old, new)
221
0
        .iter()
222
0
        .flat_map(|op| op.iter_slices(old, new))
223
0
        .collect()
224
0
}
225
226
/// Shortcut for making a character level diff.
227
///
228
/// This function produces the diff of two strings and returns a vector
229
/// with the changes.  It returns connected slices into the original string
230
/// rather than character level slices.
231
///
232
/// ```rust
233
/// use similar::{Algorithm, ChangeTag};
234
/// use similar::utils::diff_chars;
235
///
236
/// assert_eq!(diff_chars(Algorithm::Myers, "foobarbaz", "fooBARbaz"), vec![
237
///     (ChangeTag::Equal, "foo"),
238
///     (ChangeTag::Delete, "bar"),
239
///     (ChangeTag::Insert, "BAR"),
240
///     (ChangeTag::Equal, "baz"),
241
/// ]);
242
/// ```
243
0
pub fn diff_chars<'x, T: DiffableStrRef + ?Sized>(
244
0
    alg: Algorithm,
245
0
    old: &'x T,
246
0
    new: &'x T,
247
0
) -> Vec<(ChangeTag, &'x T::Output)> {
248
0
    let old = old.as_diffable_str();
249
0
    let new = new.as_diffable_str();
250
0
    let diff = TextDiff::configure().algorithm(alg).diff_chars(old, new);
251
0
    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
252
0
    diff.ops()
253
0
        .iter()
254
0
        .flat_map(move |x| remapper.iter_slices(x))
255
0
        .collect()
256
0
}
257
258
/// Shortcut for making a word level diff.
259
///
260
/// This function produces the diff of two strings and returns a vector
261
/// with the changes.  It returns connected slices into the original string
262
/// rather than word level slices.
263
///
264
/// ```rust
265
/// use similar::{Algorithm, ChangeTag};
266
/// use similar::utils::diff_words;
267
///
268
/// assert_eq!(diff_words(Algorithm::Myers, "foo bar baz", "foo bor baz"), vec![
269
///     (ChangeTag::Equal, "foo "),
270
///     (ChangeTag::Delete, "bar"),
271
///     (ChangeTag::Insert, "bor"),
272
///     (ChangeTag::Equal, " baz"),
273
/// ]);
274
/// ```
275
0
pub fn diff_words<'x, T: DiffableStrRef + ?Sized>(
276
0
    alg: Algorithm,
277
0
    old: &'x T,
278
0
    new: &'x T,
279
0
) -> Vec<(ChangeTag, &'x T::Output)> {
280
0
    let old = old.as_diffable_str();
281
0
    let new = new.as_diffable_str();
282
0
    let diff = TextDiff::configure().algorithm(alg).diff_words(old, new);
283
0
    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
284
0
    diff.ops()
285
0
        .iter()
286
0
        .flat_map(move |x| remapper.iter_slices(x))
287
0
        .collect()
288
0
}
289
290
/// Shortcut for making a unicode word level diff.
291
///
292
/// This function produces the diff of two strings and returns a vector
293
/// with the changes.  It returns connected slices into the original string
294
/// rather than word level slices.
295
///
296
/// ```rust
297
/// use similar::{Algorithm, ChangeTag};
298
/// use similar::utils::diff_unicode_words;
299
///
300
/// let old = "The quick (\"brown\") fox can't jump 32.3 feet, right?";
301
/// let new = "The quick (\"brown\") fox can't jump 9.84 meters, right?";
302
/// assert_eq!(diff_unicode_words(Algorithm::Myers, old, new), vec![
303
///     (ChangeTag::Equal, "The quick (\"brown\") fox can\'t jump "),
304
///     (ChangeTag::Delete, "32.3"),
305
///     (ChangeTag::Insert, "9.84"),
306
///     (ChangeTag::Equal, " "),
307
///     (ChangeTag::Delete, "feet"),
308
///     (ChangeTag::Insert, "meters"),
309
///     (ChangeTag::Equal, ", right?")
310
/// ]);
311
/// ```
312
///
313
/// This requires the `unicode` feature.
314
#[cfg(feature = "unicode")]
315
pub fn diff_unicode_words<'x, T: DiffableStrRef + ?Sized>(
316
    alg: Algorithm,
317
    old: &'x T,
318
    new: &'x T,
319
) -> Vec<(ChangeTag, &'x T::Output)> {
320
    let old = old.as_diffable_str();
321
    let new = new.as_diffable_str();
322
    let diff = TextDiff::configure()
323
        .algorithm(alg)
324
        .diff_unicode_words(old, new);
325
    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
326
    diff.ops()
327
        .iter()
328
        .flat_map(move |x| remapper.iter_slices(x))
329
        .collect()
330
}
331
332
/// Shortcut for making a grapheme level diff.
333
///
334
/// This function produces the diff of two strings and returns a vector
335
/// with the changes.  It returns connected slices into the original string
336
/// rather than grapheme level slices.
337
///
338
/// ```rust
339
/// use similar::{Algorithm, ChangeTag};
340
/// use similar::utils::diff_graphemes;
341
///
342
/// let old = "The flag of Austria is 🇦🇹";
343
/// let new = "The flag of Albania is 🇦🇱";
344
/// assert_eq!(diff_graphemes(Algorithm::Myers, old, new), vec![
345
///     (ChangeTag::Equal, "The flag of A"),
346
///     (ChangeTag::Delete, "ustr"),
347
///     (ChangeTag::Insert, "lban"),
348
///     (ChangeTag::Equal, "ia is "),
349
///     (ChangeTag::Delete, "🇦🇹"),
350
///     (ChangeTag::Insert, "🇦🇱"),
351
/// ]);
352
/// ```
353
///
354
/// This requires the `unicode` feature.
355
#[cfg(feature = "unicode")]
356
pub fn diff_graphemes<'x, T: DiffableStrRef + ?Sized>(
357
    alg: Algorithm,
358
    old: &'x T,
359
    new: &'x T,
360
) -> Vec<(ChangeTag, &'x T::Output)> {
361
    let old = old.as_diffable_str();
362
    let new = new.as_diffable_str();
363
    let diff = TextDiff::configure()
364
        .algorithm(alg)
365
        .diff_graphemes(old, new);
366
    let remapper = TextDiffRemapper::from_text_diff(&diff, old, new);
367
    diff.ops()
368
        .iter()
369
        .flat_map(move |x| remapper.iter_slices(x))
370
        .collect()
371
}
372
373
/// Shortcut for making a line diff.
374
///
375
/// This function produces the diff of two slices and returns a vector
376
/// with the changes.  Unlike [`diff_chars`] or [`diff_slices`] it returns a
377
/// change tag for each line.
378
///
379
/// ```rust
380
/// use similar::{Algorithm, ChangeTag};
381
/// use similar::utils::diff_lines;
382
///
383
/// assert_eq!(diff_lines(Algorithm::Myers, "foo\nbar\nbaz\nblah", "foo\nbar\nbaz\nblurgh"), vec![
384
///     (ChangeTag::Equal, "foo\n"),
385
///     (ChangeTag::Equal, "bar\n"),
386
///     (ChangeTag::Equal, "baz\n"),
387
///     (ChangeTag::Delete, "blah"),
388
///     (ChangeTag::Insert, "blurgh"),
389
/// ]);
390
/// ```
391
0
pub fn diff_lines<'x, T: DiffableStrRef + ?Sized>(
392
0
    alg: Algorithm,
393
0
    old: &'x T,
394
0
    new: &'x T,
395
0
) -> Vec<(ChangeTag, &'x T::Output)> {
396
0
    TextDiff::configure()
397
0
        .algorithm(alg)
398
0
        .diff_lines(old, new)
399
0
        .iter_all_changes()
400
0
        .map(|change| (change.tag(), change.value()))
401
0
        .collect()
402
0
}
403
404
#[test]
405
fn test_remapper() {
406
    let a = "foo bar baz";
407
    let words = a.tokenize_words();
408
    dbg!(&words);
409
    let remap = SliceRemapper::new(a, &words);
410
    assert_eq!(remap.slice(0..3), Some("foo bar"));
411
    assert_eq!(remap.slice(1..3), Some(" bar"));
412
    assert_eq!(remap.slice(0..1), Some("foo"));
413
    assert_eq!(remap.slice(0..5), Some("foo bar baz"));
414
    assert_eq!(remap.slice(0..6), None);
415
}