/rust/registry/src/index.crates.io-1949cf8c6b5b557f/imara-diff-0.1.8/src/histogram.rs
Line | Count | Source |
1 | | use std::ops::Range; |
2 | | |
3 | | use crate::histogram::lcs::find_lcs; |
4 | | use crate::histogram::list_pool::{ListHandle, ListPool}; |
5 | | use crate::intern::Token; |
6 | | use crate::util::{strip_common_postfix, strip_common_prefix}; |
7 | | use crate::{myers, Sink}; |
8 | | |
9 | | mod lcs; |
10 | | mod list_pool; |
11 | | |
12 | | const MAX_CHAIN_LEN: u32 = 63; |
13 | | |
14 | | struct Histogram { |
15 | | token_occurances: Vec<ListHandle>, |
16 | | pool: ListPool, |
17 | | } |
18 | | |
19 | 71.7k | pub fn diff<S: Sink>( |
20 | 71.7k | mut before: &[Token], |
21 | 71.7k | mut after: &[Token], |
22 | 71.7k | num_tokens: u32, |
23 | 71.7k | mut sink: S, |
24 | 71.7k | ) -> S::Out { |
25 | 71.7k | let mut histogram = Histogram::new(num_tokens); |
26 | 71.7k | let prefix = strip_common_prefix(&mut before, &mut after); |
27 | 71.7k | strip_common_postfix(&mut before, &mut after); |
28 | 71.7k | histogram.run(before, prefix, after, prefix, &mut sink); |
29 | 71.7k | sink.finish() |
30 | 71.7k | } imara_diff::histogram::diff::<gix_merge::blob::builtin_driver::text::utils::CollectHunks> Line | Count | Source | 19 | 71.7k | pub fn diff<S: Sink>( | 20 | 71.7k | mut before: &[Token], | 21 | 71.7k | mut after: &[Token], | 22 | 71.7k | num_tokens: u32, | 23 | 71.7k | mut sink: S, | 24 | 71.7k | ) -> S::Out { | 25 | 71.7k | let mut histogram = Histogram::new(num_tokens); | 26 | 71.7k | let prefix = strip_common_prefix(&mut before, &mut after); | 27 | 71.7k | strip_common_postfix(&mut before, &mut after); | 28 | 71.7k | histogram.run(before, prefix, after, prefix, &mut sink); | 29 | 71.7k | sink.finish() | 30 | 71.7k | } |
Unexecuted instantiation: imara_diff::histogram::diff::<_> |
31 | | |
32 | | impl Histogram { |
33 | 71.7k | fn new(num_buckets: u32) -> Histogram { |
34 | 71.7k | Histogram { |
35 | 71.7k | token_occurances: vec![ListHandle::default(); num_buckets as usize], |
36 | 71.7k | pool: ListPool::new(2 * num_buckets), |
37 | 71.7k | } |
38 | 71.7k | } |
39 | | |
40 | 499k | fn clear(&mut self) { |
41 | 499k | self.pool.clear(); |
42 | 499k | } |
43 | | |
44 | 12.1M | fn token_occurances(&self, token: Token) -> &[u32] { |
45 | 12.1M | self.token_occurances[token.0 as usize].as_slice(&self.pool) |
46 | 12.1M | } |
47 | | |
48 | 221M | fn num_token_occurances(&self, token: Token) -> u32 { |
49 | 221M | self.token_occurances[token.0 as usize].len(&self.pool) |
50 | 221M | } |
51 | | |
52 | 499k | fn populate(&mut self, file: &[Token]) { |
53 | 416M | for (i, &token) in file.iter().enumerate() { |
54 | 416M | self.token_occurances[token.0 as usize].push(i as u32, &mut self.pool); |
55 | 416M | } |
56 | 499k | } |
57 | | |
58 | 380k | fn run( |
59 | 380k | &mut self, |
60 | 380k | mut before: &[Token], |
61 | 380k | mut before_off: u32, |
62 | 380k | mut after: &[Token], |
63 | 380k | mut after_off: u32, |
64 | 380k | sink: &mut impl Sink, |
65 | 380k | ) { |
66 | | loop { |
67 | 689k | if before.is_empty() { |
68 | 61.7k | if !after.is_empty() { |
69 | 61.4k | sink.process_change( |
70 | 61.4k | before_off..before_off, |
71 | 61.4k | after_off..after_off + after.len() as u32, |
72 | 61.4k | ); |
73 | 61.4k | } |
74 | 61.7k | return; |
75 | 627k | } else if after.is_empty() { |
76 | 128k | sink.process_change( |
77 | 128k | before_off..before_off + before.len() as u32, |
78 | 128k | after_off..after_off, |
79 | | ); |
80 | 128k | return; |
81 | 499k | } |
82 | | |
83 | 499k | self.populate(before); |
84 | 499k | match find_lcs(before, after, self) { |
85 | | // no lcs was found, that means that file1 and file2 two have nothing in common |
86 | 479k | Some(lcs) if lcs.len == 0 => { |
87 | 170k | sink.process_change( |
88 | 170k | before_off..before_off + before.len() as u32, |
89 | 170k | after_off..after_off + after.len() as u32, |
90 | | ); |
91 | 170k | return; |
92 | | } |
93 | 308k | Some(lcs) => { |
94 | 308k | self.run( |
95 | 308k | &before[..lcs.before_start as usize], |
96 | 308k | before_off, |
97 | 308k | &after[..lcs.after_start as usize], |
98 | 308k | after_off, |
99 | 308k | sink, |
100 | 308k | ); |
101 | 308k | |
102 | 308k | // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons |
103 | 308k | let before_end = lcs.before_start + lcs.len; |
104 | 308k | before = &before[before_end as usize..]; |
105 | 308k | before_off += before_end; |
106 | 308k | |
107 | 308k | let after_end = lcs.after_start + lcs.len; |
108 | 308k | after = &after[after_end as usize..]; |
109 | 308k | after_off += after_end; |
110 | 308k | } |
111 | | None => { |
112 | | // we are diffing two extremly large repetitive file |
113 | | // this is a worst case for histogram diff with O(N^2) performance |
114 | | // fallback to myers to maintain linear time complxity |
115 | 19.8k | myers::diff( |
116 | 19.8k | before, |
117 | 19.8k | after, |
118 | | 0, // not used by myers |
119 | 335k | |mut before: Range<u32>, mut after: Range<u32>| { |
120 | 335k | before.start += before_off; |
121 | 335k | before.end += before_off; |
122 | 335k | after.start += after_off; |
123 | 335k | after.end += after_off; |
124 | 335k | sink.process_change(before, after) |
125 | 335k | }, <imara_diff::histogram::Histogram>::run::<gix_merge::blob::builtin_driver::text::utils::CollectHunks>::{closure#0}Line | Count | Source | 119 | 335k | |mut before: Range<u32>, mut after: Range<u32>| { | 120 | 335k | before.start += before_off; | 121 | 335k | before.end += before_off; | 122 | 335k | after.start += after_off; | 123 | 335k | after.end += after_off; | 124 | 335k | sink.process_change(before, after) | 125 | 335k | }, |
Unexecuted instantiation: <imara_diff::histogram::Histogram>::run::<_>::{closure#0} |
126 | | false, |
127 | | ); |
128 | 19.8k | return; |
129 | | } |
130 | | } |
131 | | } |
132 | 380k | } <imara_diff::histogram::Histogram>::run::<gix_merge::blob::builtin_driver::text::utils::CollectHunks> Line | Count | Source | 58 | 380k | fn run( | 59 | 380k | &mut self, | 60 | 380k | mut before: &[Token], | 61 | 380k | mut before_off: u32, | 62 | 380k | mut after: &[Token], | 63 | 380k | mut after_off: u32, | 64 | 380k | sink: &mut impl Sink, | 65 | 380k | ) { | 66 | | loop { | 67 | 689k | if before.is_empty() { | 68 | 61.7k | if !after.is_empty() { | 69 | 61.4k | sink.process_change( | 70 | 61.4k | before_off..before_off, | 71 | 61.4k | after_off..after_off + after.len() as u32, | 72 | 61.4k | ); | 73 | 61.4k | } | 74 | 61.7k | return; | 75 | 627k | } else if after.is_empty() { | 76 | 128k | sink.process_change( | 77 | 128k | before_off..before_off + before.len() as u32, | 78 | 128k | after_off..after_off, | 79 | | ); | 80 | 128k | return; | 81 | 499k | } | 82 | | | 83 | 499k | self.populate(before); | 84 | 499k | match find_lcs(before, after, self) { | 85 | | // no lcs was found, that means that file1 and file2 two have nothing in common | 86 | 479k | Some(lcs) if lcs.len == 0 => { | 87 | 170k | sink.process_change( | 88 | 170k | before_off..before_off + before.len() as u32, | 89 | 170k | after_off..after_off + after.len() as u32, | 90 | | ); | 91 | 170k | return; | 92 | | } | 93 | 308k | Some(lcs) => { | 94 | 308k | self.run( | 95 | 308k | &before[..lcs.before_start as usize], | 96 | 308k | before_off, | 97 | 308k | &after[..lcs.after_start as usize], | 98 | 308k | after_off, | 99 | 308k | sink, | 100 | 308k | ); | 101 | 308k | | 102 | 308k | // this is equivalent to (tail) recursion but implement as a loop for efficeny reasons | 103 | 308k | let before_end = lcs.before_start + lcs.len; | 104 | 308k | before = &before[before_end as usize..]; | 105 | 308k | before_off += before_end; | 106 | 308k | | 107 | 308k | let after_end = lcs.after_start + lcs.len; | 108 | 308k | after = &after[after_end as usize..]; | 109 | 308k | after_off += after_end; | 110 | 308k | } | 111 | | None => { | 112 | | // we are diffing two extremly large repetitive file | 113 | | // this is a worst case for histogram diff with O(N^2) performance | 114 | | // fallback to myers to maintain linear time complxity | 115 | 19.8k | myers::diff( | 116 | 19.8k | before, | 117 | 19.8k | after, | 118 | | 0, // not used by myers | 119 | | |mut before: Range<u32>, mut after: Range<u32>| { | 120 | | before.start += before_off; | 121 | | before.end += before_off; | 122 | | after.start += after_off; | 123 | | after.end += after_off; | 124 | | sink.process_change(before, after) | 125 | | }, | 126 | | false, | 127 | | ); | 128 | 19.8k | return; | 129 | | } | 130 | | } | 131 | | } | 132 | 380k | } |
Unexecuted instantiation: <imara_diff::histogram::Histogram>::run::<_> |
133 | | } |