Coverage Report

Created: 2026-02-14 07:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/imara-diff-0.1.8/src/myers.rs
Line
Count
Source
1
use std::ptr::NonNull;
2
3
use crate::intern::Token;
4
use crate::myers::middle_snake::{MiddleSnakeSearch, SearchResult};
5
use crate::myers::preprocess::PreprocessedFile;
6
use crate::myers::slice::FileSlice;
7
use crate::util::sqrt;
8
use crate::Sink;
9
10
mod middle_snake;
11
mod preprocess;
12
mod slice;
13
14
pub struct Myers {
15
    kvec: NonNull<[i32]>,
16
    kforward: NonNull<i32>,
17
    kbackward: NonNull<i32>,
18
    max_cost: u32,
19
}
20
21
155k
pub fn diff<S: Sink>(
22
155k
    before: &[Token],
23
155k
    after: &[Token],
24
155k
    _num_tokens: u32,
25
155k
    mut sink: S,
26
155k
    minimal: bool,
27
155k
) -> S::Out {
28
    // preprocess the files by removing parts of the file that are not contained in the other file at all
29
    // this process remaps the token indices and therefore requires us to track changed files in a char array
30
    // PERF use a bitset?
31
155k
    let (mut before, mut after) = preprocess::preprocess(before, after);
32
33
    // Perform the actual diff
34
155k
    Myers::new(before.tokens.len(), after.tokens.len()).run(
35
155k
        FileSlice::new(&mut before),
36
155k
        FileSlice::new(&mut after),
37
155k
        minimal,
38
    );
39
40
155k
    process_changes_with_sink(&before, &after, &mut sink);
41
155k
    sink.finish()
42
155k
}
imara_diff::myers::diff::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>
Line
Count
Source
21
16.6k
pub fn diff<S: Sink>(
22
16.6k
    before: &[Token],
23
16.6k
    after: &[Token],
24
16.6k
    _num_tokens: u32,
25
16.6k
    mut sink: S,
26
16.6k
    minimal: bool,
27
16.6k
) -> S::Out {
28
    // preprocess the files by removing parts of the file that are not contained in the other file at all
29
    // this process remaps the token indices and therefore requires us to track changed files in a char array
30
    // PERF use a bitset?
31
16.6k
    let (mut before, mut after) = preprocess::preprocess(before, after);
32
33
    // Perform the actual diff
34
16.6k
    Myers::new(before.tokens.len(), after.tokens.len()).run(
35
16.6k
        FileSlice::new(&mut before),
36
16.6k
        FileSlice::new(&mut after),
37
16.6k
        minimal,
38
    );
39
40
16.6k
    process_changes_with_sink(&before, &after, &mut sink);
41
16.6k
    sink.finish()
42
16.6k
}
imara_diff::myers::diff::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>
Line
Count
Source
21
138k
pub fn diff<S: Sink>(
22
138k
    before: &[Token],
23
138k
    after: &[Token],
24
138k
    _num_tokens: u32,
25
138k
    mut sink: S,
26
138k
    minimal: bool,
27
138k
) -> S::Out {
28
    // preprocess the files by removing parts of the file that are not contained in the other file at all
29
    // this process remaps the token indices and therefore requires us to track changed files in a char array
30
    // PERF use a bitset?
31
138k
    let (mut before, mut after) = preprocess::preprocess(before, after);
32
33
    // Perform the actual diff
34
138k
    Myers::new(before.tokens.len(), after.tokens.len()).run(
35
138k
        FileSlice::new(&mut before),
36
138k
        FileSlice::new(&mut after),
37
138k
        minimal,
38
    );
39
40
138k
    process_changes_with_sink(&before, &after, &mut sink);
41
138k
    sink.finish()
42
138k
}
Unexecuted instantiation: imara_diff::myers::diff::<_>
43
44
const HEUR_MIN_COST: u32 = 256;
45
const MAX_COST_MIN: u32 = 256;
46
47
impl Drop for Myers {
48
155k
    fn drop(&mut self) {
49
155k
        unsafe { drop(Box::from_raw(self.kvec.as_ptr())) }
50
155k
    }
51
}
52
53
impl Myers {
54
155k
    fn new(len1: usize, len2: usize) -> Self {
55
155k
        let ndiags = len1 + len2 + 3;
56
155k
        let kvec: *mut [i32] = Box::into_raw(vec![0; 2 * ndiags + 2].into_boxed_slice());
57
155k
        let (kforward, kbackward) = unsafe {
58
155k
            (
59
155k
                NonNull::new_unchecked((kvec as *mut i32).add(len2 + 1)),
60
155k
                NonNull::new_unchecked((kvec as *mut i32).add(ndiags + len2 + 1)),
61
155k
            )
62
155k
        };
63
155k
        Self {
64
155k
            kvec: unsafe { NonNull::new_unchecked(kvec) },
65
155k
            kforward,
66
155k
            kbackward,
67
155k
            max_cost: sqrt(ndiags).max(MAX_COST_MIN),
68
155k
        }
69
155k
    }
70
71
2.34M
    fn run<'f>(&mut self, mut file1: FileSlice<'f>, mut file2: FileSlice<'f>, mut need_min: bool) {
72
        loop {
73
4.52M
            file1.strip_common(&mut file2);
74
75
4.52M
            if file1.is_empty() {
76
1.11M
                file2.mark_changed();
77
1.11M
                return;
78
3.41M
            } else if file2.is_empty() {
79
1.23M
                file1.mark_changed();
80
1.23M
                return;
81
2.18M
            }
82
83
2.18M
            let split = self.split(&file1, &file2, need_min);
84
2.18M
            self.run(
85
2.18M
                file1.borrow().slice(..split.token_idx1 as u32),
86
2.18M
                file2.borrow().slice(..split.token_idx2 as u32),
87
2.18M
                split.minimized_lo,
88
            );
89
90
2.18M
            file1 = file1.slice(split.token_idx1 as u32..);
91
2.18M
            file2 = file2.slice(split.token_idx2 as u32..);
92
2.18M
            need_min = split.minimized_hi
93
        }
94
2.34M
    }
95
96
    /// See "An O(ND) Difference Algorithm and its Variations", by Eugene Myers.
97
    /// Basically considers a "box" (off1, off2, lim1, lim2) and scan from both
98
    /// the forward diagonal starting from (off1, off2) and the backward diagonal
99
    /// starting from (lim1, lim2). If the K values on the same diagonal crosses
100
    /// returns the furthest point of reach. We might encounter expensive edge cases
101
    /// using this algorithm, so a little bit of heuristic is needed to cut the
102
    /// search and to return a suboptimal point.
103
2.18M
    fn split(&mut self, file1: &FileSlice, file2: &FileSlice, need_min: bool) -> Split {
104
2.18M
        let mut forward_search =
105
2.18M
            unsafe { MiddleSnakeSearch::<false>::new(self.kforward, file1, file2) };
106
2.18M
        let mut backwards_search =
107
2.18M
            unsafe { MiddleSnakeSearch::<true>::new(self.kbackward, file1, file2) };
108
2.18M
        let is_odd = (file2.len() - file2.len()) & 1 != 0;
109
110
2.18M
        let mut ec = 0;
111
112
28.7M
        while ec <= self.max_cost {
113
28.7M
            let mut found_snake = false;
114
28.7M
            forward_search.next_d();
115
28.7M
            if is_odd {
116
0
                if let Some(res) = forward_search.run(file1, file2, |k, token_idx1| {
117
0
                    backwards_search.contains(k)
118
0
                        && backwards_search.x_pos_at_diagonal(k) <= token_idx1
119
0
                }) {
120
0
                    match res {
121
0
                        SearchResult::Snake => found_snake = true,
122
                        SearchResult::Found {
123
0
                            token_idx1,
124
0
                            token_idx2,
125
                        } => {
126
0
                            return Split {
127
0
                                token_idx1,
128
0
                                token_idx2,
129
0
                                minimized_lo: true,
130
0
                                minimized_hi: true,
131
0
                            };
132
                        }
133
                    }
134
0
                }
135
            } else {
136
28.7M
                found_snake |= forward_search.run(file1, file2, |_, _| false).is_some()
137
            };
138
139
28.7M
            backwards_search.next_d();
140
28.7M
            if !is_odd {
141
2.08G
                if let Some(res) = backwards_search.run(file1, file2, |k, token_idx1| {
142
2.08G
                    forward_search.contains(k) && token_idx1 <= forward_search.x_pos_at_diagonal(k)
143
2.08G
                }) {
144
2.63M
                    match res {
145
457k
                        SearchResult::Snake => found_snake = true,
146
                        SearchResult::Found {
147
2.17M
                            token_idx1,
148
2.17M
                            token_idx2,
149
                        } => {
150
2.17M
                            return Split {
151
2.17M
                                token_idx1,
152
2.17M
                                token_idx2,
153
2.17M
                                minimized_lo: true,
154
2.17M
                                minimized_hi: true,
155
2.17M
                            };
156
                        }
157
                    }
158
26.1M
                }
159
            } else {
160
0
                found_snake |= backwards_search.run(file1, file2, |_, _| false).is_some()
161
            };
162
163
26.6M
            if need_min {
164
22.7M
                continue;
165
3.83M
            }
166
167
            // If the edit cost is above the heuristic trigger and if
168
            // we got a good snake, we sample current diagonals to see
169
            // if some of them have reached an "interesting" path. Our
170
            // measure is a function of the distance from the diagonal
171
            // corner (i1 + i2) penalized with the distance from the
172
            // mid diagonal itself. If this value is above the current
173
            // edit cost times a magic factor (XDL_K_HEUR) we consider
174
            // it interesting.
175
3.83M
            if found_snake && ec > HEUR_MIN_COST {
176
0
                if let Some((token_idx1, token_idx2)) = forward_search.found_snake(ec, file1, file2)
177
                {
178
0
                    return Split {
179
0
                        token_idx1,
180
0
                        token_idx2,
181
0
                        minimized_lo: true,
182
0
                        minimized_hi: false,
183
0
                    };
184
0
                }
185
186
0
                if let Some((token_idx1, token_idx2)) =
187
0
                    backwards_search.found_snake(ec, file1, file2)
188
                {
189
0
                    return Split {
190
0
                        token_idx1,
191
0
                        token_idx2,
192
0
                        minimized_lo: false,
193
0
                        minimized_hi: true,
194
0
                    };
195
0
                }
196
3.83M
            }
197
198
3.83M
            ec += 1;
199
        }
200
201
9.59k
        let (distance_forward, token_idx1_forward) = forward_search.best_position(file1, file2);
202
9.59k
        let (distance_backwards, token_idx1_backwards) =
203
9.59k
            backwards_search.best_position(file1, file2);
204
9.59k
        if distance_forward > file1.len() as isize + file2.len() as isize - distance_backwards {
205
2.46k
            Split {
206
2.46k
                token_idx1: token_idx1_forward,
207
2.46k
                token_idx2: (distance_forward - token_idx1_forward as isize) as i32,
208
2.46k
                minimized_lo: true,
209
2.46k
                minimized_hi: false,
210
2.46k
            }
211
        } else {
212
7.13k
            Split {
213
7.13k
                token_idx1: token_idx1_backwards,
214
7.13k
                token_idx2: (distance_backwards - token_idx1_backwards as isize) as i32,
215
7.13k
                minimized_lo: false,
216
7.13k
                minimized_hi: true,
217
7.13k
            }
218
        }
219
2.18M
    }
220
}
221
222
#[derive(Debug)]
223
struct Split {
224
    token_idx1: i32,
225
    token_idx2: i32,
226
    minimized_lo: bool,
227
    minimized_hi: bool,
228
}
229
230
/// the mapping performed during preprocessing makes it impossible to directly call
231
/// the `sink` during the diff itself. Instead `file.changed` is set to true for all
232
/// tokens that are changed
233
/// below these arrays are used to call the sink function
234
155k
fn process_changes_with_sink(
235
155k
    before: &PreprocessedFile,
236
155k
    after: &PreprocessedFile,
237
155k
    sink: &mut impl Sink,
238
155k
) {
239
155k
    let before_end = before.is_changed.len() as u32 + before.offset;
240
155k
    let after_end = after.is_changed.len() as u32 + after.offset;
241
242
155k
    let mut before = before
243
155k
        .is_changed
244
155k
        .iter()
245
155k
        .enumerate()
246
239M
        .map(|(i, removed)| (i as u32 + before.offset, *removed));
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#0}
Line
Count
Source
246
23.8M
        .map(|(i, removed)| (i as u32 + before.offset, *removed));
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}
Line
Count
Source
246
216M
        .map(|(i, removed)| (i as u32 + before.offset, *removed));
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#0}
247
248
155k
    let mut after = after
249
155k
        .is_changed
250
155k
        .iter()
251
155k
        .enumerate()
252
55.3M
        .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#1}
Line
Count
Source
252
2.66M
        .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#1}
Line
Count
Source
252
52.6M
        .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#1}
253
254
155k
    let mut next1 = before.next();
255
155k
    let mut next2 = after.next();
256
257
16.0M
    while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
258
15.9M
        if !(removed | inserted) {
259
13.0M
            next1 = before.next();
260
13.0M
            next2 = after.next();
261
13.0M
            continue;
262
2.82M
        }
263
264
2.82M
        let mut hunk_before = before_pos..before_pos;
265
2.82M
        let mut hunk_after = after_pos..after_pos;
266
2.82M
        if removed {
267
226M
            let end = before.find(|(_, changed)| !changed);
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#2}
Line
Count
Source
267
22.9M
            let end = before.find(|(_, changed)| !changed);
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#2}
Line
Count
Source
267
203M
            let end = before.find(|(_, changed)| !changed);
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#2}
268
1.80M
            next1 = end.map(|(end, _)| (end, false));
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#3}
Line
Count
Source
268
169k
            next1 = end.map(|(end, _)| (end, false));
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#3}
Line
Count
Source
268
1.56M
            next1 = end.map(|(end, _)| (end, false));
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#3}
269
1.80M
            hunk_before.end = end.map_or(before_end, |(end, _)| end);
270
1.01M
        };
271
272
2.82M
        if inserted {
273
42.1M
            let end = after.find(|(_, changed)| !changed);
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#5}
Line
Count
Source
273
1.73M
            let end = after.find(|(_, changed)| !changed);
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#5}
Line
Count
Source
273
40.4M
            let end = after.find(|(_, changed)| !changed);
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#5}
274
1.63M
            next2 = end.map(|(end, _)| (end, false));
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>::{closure#6}
Line
Count
Source
274
153k
            next2 = end.map(|(end, _)| (end, false));
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#6}
Line
Count
Source
274
1.41M
            next2 = end.map(|(end, _)| (end, false));
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>::{closure#6}
275
1.63M
            hunk_after.end = end.map_or(after_end, |(end, _)| end);
276
1.19M
        }
277
278
2.82M
        sink.process_change(hunk_before, hunk_after);
279
    }
280
281
155k
    if let Some((before_pos, _)) = next1 {
282
57.9k
        sink.process_change(before_pos..before_end, after_end..after_end);
283
97.7k
    } else if let Some((after_pos, _)) = next2 {
284
26.5k
        sink.process_change(before_end..before_end, after_pos..after_end);
285
71.1k
    }
286
155k
}
imara_diff::myers::process_changes_with_sink::<<imara_diff::histogram::Histogram>::run<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}>
Line
Count
Source
234
16.6k
fn process_changes_with_sink(
235
16.6k
    before: &PreprocessedFile,
236
16.6k
    after: &PreprocessedFile,
237
16.6k
    sink: &mut impl Sink,
238
16.6k
) {
239
16.6k
    let before_end = before.is_changed.len() as u32 + before.offset;
240
16.6k
    let after_end = after.is_changed.len() as u32 + after.offset;
241
242
16.6k
    let mut before = before
243
16.6k
        .is_changed
244
16.6k
        .iter()
245
16.6k
        .enumerate()
246
16.6k
        .map(|(i, removed)| (i as u32 + before.offset, *removed));
247
248
16.6k
    let mut after = after
249
16.6k
        .is_changed
250
16.6k
        .iter()
251
16.6k
        .enumerate()
252
16.6k
        .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
253
254
16.6k
    let mut next1 = before.next();
255
16.6k
    let mut next2 = after.next();
256
257
1.22M
    while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
258
1.20M
        if !(removed | inserted) {
259
915k
            next1 = before.next();
260
915k
            next2 = after.next();
261
915k
            continue;
262
293k
        }
263
264
293k
        let mut hunk_before = before_pos..before_pos;
265
293k
        let mut hunk_after = after_pos..after_pos;
266
293k
        if removed {
267
178k
            let end = before.find(|(_, changed)| !changed);
268
178k
            next1 = end.map(|(end, _)| (end, false));
269
178k
            hunk_before.end = end.map_or(before_end, |(end, _)| end);
270
114k
        };
271
272
293k
        if inserted {
273
162k
            let end = after.find(|(_, changed)| !changed);
274
162k
            next2 = end.map(|(end, _)| (end, false));
275
162k
            hunk_after.end = end.map_or(after_end, |(end, _)| end);
276
130k
        }
277
278
293k
        sink.process_change(hunk_before, hunk_after);
279
    }
280
281
16.6k
    if let Some((before_pos, _)) = next1 {
282
6.56k
        sink.process_change(before_pos..before_end, after_end..after_end);
283
10.1k
    } else if let Some((after_pos, _)) = next2 {
284
1.05k
        sink.process_change(before_end..before_end, after_pos..after_end);
285
9.08k
    }
286
16.6k
}
imara_diff::myers::process_changes_with_sink::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>
Line
Count
Source
234
138k
fn process_changes_with_sink(
235
138k
    before: &PreprocessedFile,
236
138k
    after: &PreprocessedFile,
237
138k
    sink: &mut impl Sink,
238
138k
) {
239
138k
    let before_end = before.is_changed.len() as u32 + before.offset;
240
138k
    let after_end = after.is_changed.len() as u32 + after.offset;
241
242
138k
    let mut before = before
243
138k
        .is_changed
244
138k
        .iter()
245
138k
        .enumerate()
246
138k
        .map(|(i, removed)| (i as u32 + before.offset, *removed));
247
248
138k
    let mut after = after
249
138k
        .is_changed
250
138k
        .iter()
251
138k
        .enumerate()
252
138k
        .map(|(i, inserted)| (i as u32 + after.offset, *inserted));
253
254
138k
    let mut next1 = before.next();
255
138k
    let mut next2 = after.next();
256
257
14.8M
    while let (Some((before_pos, removed)), Some((after_pos, inserted))) = (next1, next2) {
258
14.6M
        if !(removed | inserted) {
259
12.1M
            next1 = before.next();
260
12.1M
            next2 = after.next();
261
12.1M
            continue;
262
2.53M
        }
263
264
2.53M
        let mut hunk_before = before_pos..before_pos;
265
2.53M
        let mut hunk_after = after_pos..after_pos;
266
2.53M
        if removed {
267
1.63M
            let end = before.find(|(_, changed)| !changed);
268
1.63M
            next1 = end.map(|(end, _)| (end, false));
269
1.63M
            hunk_before.end = end.map_or(before_end, |(end, _)| end);
270
902k
        };
271
272
2.53M
        if inserted {
273
1.47M
            let end = after.find(|(_, changed)| !changed);
274
1.47M
            next2 = end.map(|(end, _)| (end, false));
275
1.47M
            hunk_after.end = end.map_or(after_end, |(end, _)| end);
276
1.06M
        }
277
278
2.53M
        sink.process_change(hunk_before, hunk_after);
279
    }
280
281
138k
    if let Some((before_pos, _)) = next1 {
282
51.3k
        sink.process_change(before_pos..before_end, after_end..after_end);
283
87.6k
    } else if let Some((after_pos, _)) = next2 {
284
25.5k
        sink.process_change(before_end..before_end, after_pos..after_end);
285
62.0k
    }
286
138k
}
Unexecuted instantiation: imara_diff::myers::process_changes_with_sink::<_>