Coverage Report

Created: 2024-08-22 06:13

/src/gitoxide/gix-features/src/hash.rs
Line
Count
Source (jump to first uncovered line)
1
//! Hash functions and hash utilities
2
//!
3
//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
4
//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
5
//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
6
#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
7
mod _impl {
8
    use super::Sha1Digest;
9
10
    /// A implementation of the Sha1 hash, which can be used once.
11
    #[derive(Default, Clone)]
12
    pub struct Sha1(sha1_smol::Sha1);
13
14
    impl Sha1 {
15
        /// Digest the given `bytes`.
16
0
        pub fn update(&mut self, bytes: &[u8]) {
17
0
            self.0.update(bytes)
18
0
        }
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update
19
        /// Finalize the hash and produce a digest.
20
0
        pub fn digest(self) -> Sha1Digest {
21
0
            self.0.digest().bytes()
22
0
        }
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest
Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest
23
    }
24
}
25
26
/// A 20 bytes digest produced by a [`Sha1`] hash implementation.
27
#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
28
pub type Sha1Digest = [u8; 20];
29
30
#[cfg(feature = "fast-sha1")]
31
mod _impl {
32
    use sha1::Digest;
33
34
    use super::Sha1Digest;
35
36
    /// A implementation of the Sha1 hash, which can be used once.
37
    #[derive(Default, Clone)]
38
    pub struct Sha1(sha1::Sha1);
39
40
    impl Sha1 {
41
        /// Digest the given `bytes`.
42
        pub fn update(&mut self, bytes: &[u8]) {
43
            self.0.update(bytes)
44
        }
45
        /// Finalize the hash and produce a digest.
46
        pub fn digest(self) -> Sha1Digest {
47
            self.0.finalize().into()
48
        }
49
    }
50
}
51
52
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
53
pub use _impl::Sha1;
54
55
/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
56
///
57
/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
58
/// should be the previous return value of this function to provide a hash of multiple sequential
59
/// chunks of `bytes`.
60
#[cfg(feature = "crc32")]
61
pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
62
    let mut h = crc32fast::Hasher::new_with_initial(previous_value);
63
    h.update(bytes);
64
    h.finalize()
65
}
66
67
/// Compute a CRC32 value of the given input `bytes`.
68
///
69
/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
70
#[cfg(feature = "crc32")]
71
pub fn crc32(bytes: &[u8]) -> u32 {
72
    let mut h = crc32fast::Hasher::new();
73
    h.update(bytes);
74
    h.finalize()
75
}
76
77
/// Produce a hasher suitable for the given kind of hash.
78
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
79
0
pub fn hasher(kind: gix_hash::Kind) -> Sha1 {
80
0
    match kind {
81
0
        gix_hash::Kind::Sha1 => Sha1::default(),
82
0
    }
83
0
}
Unexecuted instantiation: gix_features::hash::hasher
Unexecuted instantiation: gix_features::hash::hasher
Unexecuted instantiation: gix_features::hash::hasher
84
85
/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
86
/// while initializing and calling `progress`.
87
///
88
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
89
/// denoting the amount of bytes to hash starting from the beginning of the file.
90
///
91
/// # Note
92
///
93
/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
94
///   [`gix_hash::ObjectId`] return value.
95
/// * [Interrupts][crate::interrupt] are supported.
96
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
97
0
pub fn bytes_of_file(
98
0
    path: &std::path::Path,
99
0
    num_bytes_from_start: u64,
100
0
    kind: gix_hash::Kind,
101
0
    progress: &mut dyn crate::progress::Progress,
102
0
    should_interrupt: &std::sync::atomic::AtomicBool,
103
0
) -> std::io::Result<gix_hash::ObjectId> {
104
0
    bytes(
105
0
        &mut std::fs::File::open(path)?,
106
0
        num_bytes_from_start,
107
0
        kind,
108
0
        progress,
109
0
        should_interrupt,
110
    )
111
0
}
Unexecuted instantiation: gix_features::hash::bytes_of_file
Unexecuted instantiation: gix_features::hash::bytes_of_file
112
113
/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
114
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
115
0
pub fn bytes(
116
0
    read: &mut dyn std::io::Read,
117
0
    num_bytes_from_start: u64,
118
0
    kind: gix_hash::Kind,
119
0
    progress: &mut dyn crate::progress::Progress,
120
0
    should_interrupt: &std::sync::atomic::AtomicBool,
121
0
) -> std::io::Result<gix_hash::ObjectId> {
122
0
    bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
123
0
}
Unexecuted instantiation: gix_features::hash::bytes
Unexecuted instantiation: gix_features::hash::bytes
124
125
/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
126
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
127
0
pub fn bytes_with_hasher(
128
0
    read: &mut dyn std::io::Read,
129
0
    num_bytes_from_start: u64,
130
0
    mut hasher: Sha1,
131
0
    progress: &mut dyn crate::progress::Progress,
132
0
    should_interrupt: &std::sync::atomic::AtomicBool,
133
0
) -> std::io::Result<gix_hash::ObjectId> {
134
0
    let start = std::time::Instant::now();
135
0
    // init progress before the possibility for failure, as convenience in case people want to recover
136
0
    progress.init(
137
0
        Some(num_bytes_from_start as prodash::progress::Step),
138
0
        crate::progress::bytes(),
139
0
    );
140
0
141
0
    const BUF_SIZE: usize = u16::MAX as usize;
142
0
    let mut buf = [0u8; BUF_SIZE];
143
0
    let mut bytes_left = num_bytes_from_start;
144
145
0
    while bytes_left > 0 {
146
0
        let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
147
0
        read.read_exact(out)?;
148
0
        bytes_left -= out.len() as u64;
149
0
        progress.inc_by(out.len());
150
0
        hasher.update(out);
151
0
        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
152
0
            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
153
0
        }
154
    }
155
156
0
    let id = gix_hash::ObjectId::from(hasher.digest());
157
0
    progress.show_throughput(start);
158
0
    Ok(id)
159
0
}
Unexecuted instantiation: gix_features::hash::bytes_with_hasher
Unexecuted instantiation: gix_features::hash::bytes_with_hasher
160
161
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
162
mod write {
163
    use crate::hash::Sha1;
164
165
    /// A utility to automatically generate a hash while writing into an inner writer.
166
    pub struct Write<T> {
167
        /// The hash implementation.
168
        pub hash: Sha1,
169
        /// The inner writer.
170
        pub inner: T,
171
    }
172
173
    impl<T> std::io::Write for Write<T>
174
    where
175
        T: std::io::Write,
176
    {
177
0
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
178
0
            let written = self.inner.write(buf)?;
179
0
            self.hash.update(&buf[..written]);
180
0
            Ok(written)
181
0
        }
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write
182
183
0
        fn flush(&mut self) -> std::io::Result<()> {
184
0
            self.inner.flush()
185
0
        }
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush
Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush
186
    }
187
188
    impl<T> Write<T>
189
    where
190
        T: std::io::Write,
191
    {
192
        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
193
0
        pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
194
0
            match object_hash {
195
0
                gix_hash::Kind::Sha1 => Write {
196
0
                    inner,
197
0
                    hash: Sha1::default(),
198
0
                },
199
0
            }
200
0
        }
Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new
Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new
Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new
201
    }
202
}
203
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
204
pub use write::Write;