/src/gitoxide/gix-features/src/hash.rs

Source (jump to first uncovered line)
//! Hash functions and hash utilities
//!
//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
mod _impl {
    use super::Sha1Digest;

    /// A implementation of the Sha1 hash, which can be used once.
    #[derive(Default, Clone)]
    pub struct Sha1(sha1_smol::Sha1);

    impl Sha1 {
        /// Digest the given `bytes`.
        pub fn update(&mut self, bytes: &[u8]) {
            self.0.update(bytes)
        }
        /// Finalize the hash and produce a digest.
        pub fn digest(self) -> Sha1Digest {
            self.0.digest().bytes()
        }
    }
}

/// A 20 bytes digest produced by a [`Sha1`] hash implementation.
#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
pub type Sha1Digest = [u8; 20];

#[cfg(feature = "fast-sha1")]
mod _impl {
    use sha1::Digest;

    use super::Sha1Digest;

    /// A implementation of the Sha1 hash, which can be used once.
    #[derive(Default, Clone)]
    pub struct Sha1(sha1::Sha1);

    impl Sha1 {
        /// Digest the given `bytes`.
        pub fn update(&mut self, bytes: &[u8]) {
            self.0.update(bytes)
        }
        /// Finalize the hash and produce a digest.
        pub fn digest(self) -> Sha1Digest {
            self.0.finalize().into()
        }
    }
}

#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use _impl::Sha1;

/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
///
/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
/// should be the previous return value of this function to provide a hash of multiple sequential
/// chunks of `bytes`.
#[cfg(feature = "crc32")]
pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
    let mut h = crc32fast::Hasher::new_with_initial(previous_value);
    h.update(bytes);
    h.finalize()
}

/// Compute a CRC32 value of the given input `bytes`.
///
/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
#[cfg(feature = "crc32")]
pub fn crc32(bytes: &[u8]) -> u32 {
    let mut h = crc32fast::Hasher::new();
    h.update(bytes);
    h.finalize()
}

/// Produce a hasher suitable for the given kind of hash.
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub fn hasher(kind: gix_hash::Kind) -> Sha1 {
    match kind {
        gix_hash::Kind::Sha1 => Sha1::default(),
    }
}

/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
/// while initializing and calling `progress`.
///
/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
/// denoting the amount of bytes to hash starting from the beginning of the file.
///
/// # Note
///
/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
///   [`gix_hash::ObjectId`] return value.
/// * [Interrupts][crate::interrupt] are supported.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_of_file(
    path: &std::path::Path,
    num_bytes_from_start: u64,
    kind: gix_hash::Kind,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    bytes(
        &mut std::fs::File::open(path)?,
        num_bytes_from_start,
        kind,
        progress,
        should_interrupt,
    )
}

/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes(
    read: &mut dyn std::io::Read,
    num_bytes_from_start: u64,
    kind: gix_hash::Kind,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
}

/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
pub fn bytes_with_hasher(
    read: &mut dyn std::io::Read,
    num_bytes_from_start: u64,
    mut hasher: Sha1,
    progress: &mut dyn crate::progress::Progress,
    should_interrupt: &std::sync::atomic::AtomicBool,
) -> std::io::Result<gix_hash::ObjectId> {
    let start = std::time::Instant::now();
    // init progress before the possibility for failure, as convenience in case people want to recover
    progress.init(
        Some(num_bytes_from_start as prodash::progress::Step),
        crate::progress::bytes(),
    );

    const BUF_SIZE: usize = u16::MAX as usize;
    let mut buf = [0u8; BUF_SIZE];
    let mut bytes_left = num_bytes_from_start;

    while bytes_left > 0 {
        let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
        read.read_exact(out)?;
        bytes_left -= out.len() as u64;
        progress.inc_by(out.len());
        hasher.update(out);
        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
        }
    }

    let id = gix_hash::ObjectId::from(hasher.digest());
    progress.show_throughput(start);
    Ok(id)
}

#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
mod write {
    use crate::hash::Sha1;

    /// A utility to automatically generate a hash while writing into an inner writer.
    pub struct Write<T> {
        /// The hash implementation.
        pub hash: Sha1,
        /// The inner writer.
        pub inner: T,
    }

    impl<T> std::io::Write for Write<T>
    where
        T: std::io::Write,
    {
        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
            let written = self.inner.write(buf)?;
            self.hash.update(&buf[..written]);
            Ok(written)
        }

        fn flush(&mut self) -> std::io::Result<()> {
            self.inner.flush()
        }
    }

    impl<T> Write<T>
    where
        T: std::io::Write,
    {
        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
        pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
            match object_hash {
                gix_hash::Kind::Sha1 => Write {
                    inner,
                    hash: Sha1::default(),
                },
            }
        }
    }
}
#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
pub use write::Write;

Coverage Report

Created: 2024-08-22 06:13

Line	Count	Source (jump to first uncovered line)
1		//! Hash functions and hash utilities
2		//!
3		//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
4		//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
5		//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
6		#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
7		mod _impl {
8		use super::Sha1Digest;
9
10		/// A implementation of the Sha1 hash, which can be used once.
11		#[derive(Default, Clone)]
12		pub struct Sha1(sha1_smol::Sha1);
13
14		impl Sha1 {
15		/// Digest the given `bytes`.
16	0	pub fn update(&mut self, bytes: &[u8]) {
17	0	self.0.update(bytes)
18	0	} Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::update
19		/// Finalize the hash and produce a digest.
20	0	pub fn digest(self) -> Sha1Digest {
21	0	self.0.digest().bytes()
22	0	} Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest Unexecuted instantiation: <gix_features::hash::_impl::Sha1>::digest
23		}
24		}
25
26		/// A 20 bytes digest produced by a [`Sha1`] hash implementation.
27		#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
28		pub type Sha1Digest = [u8; 20];
29
30		#[cfg(feature = "fast-sha1")]
31		mod _impl {
32		use sha1::Digest;
33
34		use super::Sha1Digest;
35
36		/// A implementation of the Sha1 hash, which can be used once.
37		#[derive(Default, Clone)]
38		pub struct Sha1(sha1::Sha1);
39
40		impl Sha1 {
41		/// Digest the given `bytes`.
42		pub fn update(&mut self, bytes: &[u8]) {
43		self.0.update(bytes)
44		}
45		/// Finalize the hash and produce a digest.
46		pub fn digest(self) -> Sha1Digest {
47		self.0.finalize().into()
48		}
49		}
50		}
51
52		#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
53		pub use _impl::Sha1;
54
55		/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
56		///
57		/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
58		/// should be the previous return value of this function to provide a hash of multiple sequential
59		/// chunks of `bytes`.
60		#[cfg(feature = "crc32")]
61		pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
62		let mut h = crc32fast::Hasher::new_with_initial(previous_value);
63		h.update(bytes);
64		h.finalize()
65		}
66
67		/// Compute a CRC32 value of the given input `bytes`.
68		///
69		/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
70		#[cfg(feature = "crc32")]
71		pub fn crc32(bytes: &[u8]) -> u32 {
72		let mut h = crc32fast::Hasher::new();
73		h.update(bytes);
74		h.finalize()
75		}
76
77		/// Produce a hasher suitable for the given kind of hash.
78		#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
79	0	pub fn hasher(kind: gix_hash::Kind) -> Sha1 {
80	0	match kind {
81	0	gix_hash::Kind::Sha1 => Sha1::default(),
82	0	}
83	0	} Unexecuted instantiation: gix_features::hash::hasher Unexecuted instantiation: gix_features::hash::hasher Unexecuted instantiation: gix_features::hash::hasher
84
85		/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
86		/// while initializing and calling `progress`.
87		///
88		/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
89		/// denoting the amount of bytes to hash starting from the beginning of the file.
90		///
91		/// # Note
92		///
93		/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
94		/// [`gix_hash::ObjectId`] return value.
95		/// * [Interrupts][crate::interrupt] are supported.
96		#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
97	0	pub fn bytes_of_file(
98	0	path: &std::path::Path,
99	0	num_bytes_from_start: u64,
100	0	kind: gix_hash::Kind,
101	0	progress: &mut dyn crate::progress::Progress,
102	0	should_interrupt: &std::sync::atomic::AtomicBool,
103	0	) -> std::io::Result<gix_hash::ObjectId> {
104	0	bytes(
105	0	&mut std::fs::File::open(path)?,
106	0	num_bytes_from_start,
107	0	kind,
108	0	progress,
109	0	should_interrupt,
110		)
111	0	} Unexecuted instantiation: gix_features::hash::bytes_of_file Unexecuted instantiation: gix_features::hash::bytes_of_file
112
113		/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
114		#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
115	0	pub fn bytes(
116	0	read: &mut dyn std::io::Read,
117	0	num_bytes_from_start: u64,
118	0	kind: gix_hash::Kind,
119	0	progress: &mut dyn crate::progress::Progress,
120	0	should_interrupt: &std::sync::atomic::AtomicBool,
121	0	) -> std::io::Result<gix_hash::ObjectId> {
122	0	bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
123	0	} Unexecuted instantiation: gix_features::hash::bytes Unexecuted instantiation: gix_features::hash::bytes
124
125		/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
126		#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
127	0	pub fn bytes_with_hasher(
128	0	read: &mut dyn std::io::Read,
129	0	num_bytes_from_start: u64,
130	0	mut hasher: Sha1,
131	0	progress: &mut dyn crate::progress::Progress,
132	0	should_interrupt: &std::sync::atomic::AtomicBool,
133	0	) -> std::io::Result<gix_hash::ObjectId> {
134	0	let start = std::time::Instant::now();
135	0	// init progress before the possibility for failure, as convenience in case people want to recover
136	0	progress.init(
137	0	Some(num_bytes_from_start as prodash::progress::Step),
138	0	crate::progress::bytes(),
139	0	);
140	0
141	0	const BUF_SIZE: usize = u16::MAX as usize;
142	0	let mut buf = [0u8; BUF_SIZE];
143	0	let mut bytes_left = num_bytes_from_start;
144
145	0	while bytes_left > 0 {
146	0	let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
147	0	read.read_exact(out)?;
148	0	bytes_left -= out.len() as u64;
149	0	progress.inc_by(out.len());
150	0	hasher.update(out);
151	0	if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
152	0	return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
153	0	}
154		}
155
156	0	let id = gix_hash::ObjectId::from(hasher.digest());
157	0	progress.show_throughput(start);
158	0	Ok(id)
159	0	} Unexecuted instantiation: gix_features::hash::bytes_with_hasher Unexecuted instantiation: gix_features::hash::bytes_with_hasher
160
161		#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
162		mod write {
163		use crate::hash::Sha1;
164
165		/// A utility to automatically generate a hash while writing into an inner writer.
166		pub struct Write<T> {
167		/// The hash implementation.
168		pub hash: Sha1,
169		/// The inner writer.
170		pub inner: T,
171		}
172
173		impl<T> std::io::Write for Write<T>
174		where
175		T: std::io::Write,
176		{
177	0	fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
178	0	let written = self.inner.write(buf)?;
179	0	self.hash.update(&buf[..written]);
180	0	Ok(written)
181	0	} Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::write
182
183	0	fn flush(&mut self) -> std::io::Result<()> {
184	0	self.inner.flush()
185	0	} Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush Unexecuted instantiation: <gix_features::hash::write::Write<_> as std::io::Write>::flush
186		}
187
188		impl<T> Write<T>
189		where
190		T: std::io::Write,
191		{
192		/// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
193	0	pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
194	0	match object_hash {
195	0	gix_hash::Kind::Sha1 => Write {
196	0	inner,
197	0	hash: Sha1::default(),
198	0	},
199	0	}
200	0	} Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new Unexecuted instantiation: <gix_features::hash::write::Write<_>>::new
201		}
202		}
203		#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
204		pub use write::Write;