/src/gitoxide/gix-features/src/hash.rs
Line | Count | Source (jump to first uncovered line) |
1 | | //! Hash functions and hash utilities |
2 | | //! |
3 | | //! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support |
4 | | //! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence. |
5 | | //! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance. |
6 | | #[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))] |
7 | | mod _impl { |
8 | | use super::Sha1Digest; |
9 | | |
10 | | /// A implementation of the Sha1 hash, which can be used once. |
11 | | #[derive(Default, Clone)] |
12 | | pub struct Sha1(sha1_smol::Sha1); |
13 | | |
14 | | impl Sha1 { |
15 | | /// Digest the given `bytes`. |
16 | 0 | pub fn update(&mut self, bytes: &[u8]) { |
17 | 0 | self.0.update(bytes) |
18 | 0 | } Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update |
19 | | /// Finalize the hash and produce a digest. |
20 | 0 | pub fn digest(self) -> Sha1Digest { |
21 | 0 | self.0.digest().bytes() |
22 | 0 | } Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest |
23 | | } |
24 | | } |
25 | | |
26 | | /// A 20 bytes digest produced by a [`Sha1`] hash implementation. |
27 | | #[cfg(any(feature = "fast-sha1", feature = "rustsha1"))] |
28 | | pub type Sha1Digest = [u8; 20]; |
29 | | |
30 | | #[cfg(feature = "fast-sha1")] |
31 | | mod _impl { |
32 | | use sha1::Digest; |
33 | | |
34 | | use super::Sha1Digest; |
35 | | |
36 | | /// A implementation of the Sha1 hash, which can be used once. |
37 | | #[derive(Default, Clone)] |
38 | | pub struct Sha1(sha1::Sha1); |
39 | | |
40 | | impl Sha1 { |
41 | | /// Digest the given `bytes`. |
42 | | pub fn update(&mut self, bytes: &[u8]) { |
43 | | self.0.update(bytes) |
44 | | } |
45 | | /// Finalize the hash and produce a digest. |
46 | | pub fn digest(self) -> Sha1Digest { |
47 | | self.0.finalize().into() |
48 | | } |
49 | | } |
50 | | } |
51 | | |
52 | | #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
53 | | pub use _impl::Sha1; |
54 | | |
55 | | /// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash. |
56 | | /// |
57 | | /// When calling this function for the first time, `previous_value` should be `0`. Otherwise it |
58 | | /// should be the previous return value of this function to provide a hash of multiple sequential |
59 | | /// chunks of `bytes`. |
60 | | #[cfg(feature = "crc32")] |
61 | | pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 { |
62 | | let mut h = crc32fast::Hasher::new_with_initial(previous_value); |
63 | | h.update(bytes); |
64 | | h.finalize() |
65 | | } |
66 | | |
67 | | /// Compute a CRC32 value of the given input `bytes`. |
68 | | /// |
69 | | /// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead. |
70 | | #[cfg(feature = "crc32")] |
71 | | pub fn crc32(bytes: &[u8]) -> u32 { |
72 | | let mut h = crc32fast::Hasher::new(); |
73 | | h.update(bytes); |
74 | | h.finalize() |
75 | | } |
76 | | |
77 | | /// Produce a hasher suitable for the given kind of hash. |
78 | | #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
79 | 0 | pub fn hasher(kind: gix_hash::Kind) -> Sha1 { |
80 | 0 | match kind { |
81 | 0 | gix_hash::Kind::Sha1 => Sha1::default(), |
82 | 0 | } |
83 | 0 | } Unexecuted instantiation: gix_features::hash::hasher Unexecuted instantiation: gix_features::hash::hasher Unexecuted instantiation: gix_features::hash::hasher |
84 | | |
85 | | /// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start` |
86 | | /// while initializing and calling `progress`. |
87 | | /// |
88 | | /// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself, |
89 | | /// denoting the amount of bytes to hash starting from the beginning of the file. |
90 | | /// |
91 | | /// # Note |
92 | | /// |
93 | | /// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the |
94 | | /// [`gix_hash::ObjectId`] return value. |
95 | | /// * [Interrupts][crate::interrupt] are supported. |
96 | | #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
97 | 0 | pub fn bytes_of_file( |
98 | 0 | path: &std::path::Path, |
99 | 0 | num_bytes_from_start: u64, |
100 | 0 | kind: gix_hash::Kind, |
101 | 0 | progress: &mut dyn crate::progress::Progress, |
102 | 0 | should_interrupt: &std::sync::atomic::AtomicBool, |
103 | 0 | ) -> std::io::Result<gix_hash::ObjectId> { |
104 | 0 | bytes( |
105 | 0 | &mut std::fs::File::open(path)?, |
106 | 0 | num_bytes_from_start, |
107 | 0 | kind, |
108 | 0 | progress, |
109 | 0 | should_interrupt, |
110 | | ) |
111 | 0 | } Unexecuted instantiation: gix_features::hash::bytes_of_file Unexecuted instantiation: gix_features::hash::bytes_of_file |
112 | | |
113 | | /// Similar to [`bytes_of_file`], but operates on a stream of bytes. |
114 | | #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
115 | 0 | pub fn bytes( |
116 | 0 | read: &mut dyn std::io::Read, |
117 | 0 | num_bytes_from_start: u64, |
118 | 0 | kind: gix_hash::Kind, |
119 | 0 | progress: &mut dyn crate::progress::Progress, |
120 | 0 | should_interrupt: &std::sync::atomic::AtomicBool, |
121 | 0 | ) -> std::io::Result<gix_hash::ObjectId> { |
122 | 0 | bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt) |
123 | 0 | } Unexecuted instantiation: gix_features::hash::bytes Unexecuted instantiation: gix_features::hash::bytes |
124 | | |
125 | | /// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind. |
126 | | #[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))] |
127 | 0 | pub fn bytes_with_hasher( |
128 | 0 | read: &mut dyn std::io::Read, |
129 | 0 | num_bytes_from_start: u64, |
130 | 0 | mut hasher: Sha1, |
131 | 0 | progress: &mut dyn crate::progress::Progress, |
132 | 0 | should_interrupt: &std::sync::atomic::AtomicBool, |
133 | 0 | ) -> std::io::Result<gix_hash::ObjectId> { |
134 | 0 | let start = std::time::Instant::now(); |
135 | 0 | // init progress before the possibility for failure, as convenience in case people want to recover |
136 | 0 | progress.init( |
137 | 0 | Some(num_bytes_from_start as prodash::progress::Step), |
138 | 0 | crate::progress::bytes(), |
139 | 0 | ); |
140 | 0 |
|
141 | 0 | const BUF_SIZE: usize = u16::MAX as usize; |
142 | 0 | let mut buf = [0u8; BUF_SIZE]; |
143 | 0 | let mut bytes_left = num_bytes_from_start; |
144 | | |
145 | 0 | while bytes_left > 0 { |
146 | 0 | let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)]; |
147 | 0 | read.read_exact(out)?; |
148 | 0 | bytes_left -= out.len() as u64; |
149 | 0 | progress.inc_by(out.len()); |
150 | 0 | hasher.update(out); |
151 | 0 | if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) { |
152 | 0 | return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted")); |
153 | 0 | } |
154 | | } |
155 | | |
156 | 0 | let id = gix_hash::ObjectId::from(hasher.digest()); |
157 | 0 | progress.show_throughput(start); |
158 | 0 | Ok(id) |
159 | 0 | } Unexecuted instantiation: gix_features::hash::bytes_with_hasher Unexecuted instantiation: gix_features::hash::bytes_with_hasher |
160 | | |
161 | | #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
162 | | mod write { |
163 | | use crate::hash::Sha1; |
164 | | |
165 | | /// A utility to automatically generate a hash while writing into an inner writer. |
166 | | pub struct Write<T> { |
167 | | /// The hash implementation. |
168 | | pub hash: Sha1, |
169 | | /// The inner writer. |
170 | | pub inner: T, |
171 | | } |
172 | | |
173 | | impl<T> std::io::Write for Write<T> |
174 | | where |
175 | | T: std::io::Write, |
176 | | { |
177 | 0 | fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> { |
178 | 0 | let written = self.inner.write(buf)?; |
179 | 0 | self.hash.update(&buf[..written]); |
180 | 0 | Ok(written) |
181 | 0 | } Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write |
182 | | |
183 | 0 | fn flush(&mut self) -> std::io::Result<()> { |
184 | 0 | self.inner.flush() |
185 | 0 | } Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush |
186 | | } |
187 | | |
188 | | impl<T> Write<T> |
189 | | where |
190 | | T: std::io::Write, |
191 | | { |
192 | | /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`. |
193 | 0 | pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self { |
194 | 0 | match object_hash { |
195 | 0 | gix_hash::Kind::Sha1 => Write { |
196 | 0 | inner, |
197 | 0 | hash: Sha1::default(), |
198 | 0 | }, |
199 | 0 | } |
200 | 0 | } Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new |
201 | | } |
202 | | } |
203 | | #[cfg(any(feature = "rustsha1", feature = "fast-sha1"))] |
204 | | pub use write::Write; |