/src/gitoxide/gix-index/src/lib.rs

Source
//! ## Feature Flags
#![cfg_attr(
    all(doc, feature = "document-features"),
    doc = ::document_features::document_features!()
)]
#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
#![deny(unsafe_code, missing_docs, rust_2018_idioms)]

use std::{ops::Range, path::PathBuf};

use bstr::{BStr, ByteSlice};
use filetime::FileTime;
/// `gix_hash` is made available as it's part of the public API in various places.
pub use gix_hash as hash;
/// A re-export to allow calling [`State::from_tree()`].
pub use gix_validate as validate;

///
pub mod file;

///
pub mod extension;

///
pub mod entry;

mod access;

///
pub mod init;

///
pub mod decode;

///
pub mod verify;

///
pub mod write;

pub mod fs;

/// All known versions of a git index file.
#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum Version {
    /// Supports entries and various extensions.
    V2 = 2,
    /// Adds support for additional flags for each entry, called extended entries.
    V3 = 3,
    /// Supports deltified entry paths.
    V4 = 4,
}

/// An entry in the index, identifying a non-tree item on disk.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Entry {
    /// The filesystem stat information for the file on disk.
    pub stat: entry::Stat,
    /// The object id for this entry's ODB representation (assuming it's up-to-date with it).
    pub id: gix_hash::ObjectId,
    /// Additional flags for use in algorithms and for efficiently storing stage information.
    pub flags: entry::Flags,
    /// The kind of item this entry represents - it's not all blobs in the index anymore.
    pub mode: entry::Mode,
    /// The range to lookup in the path backing to obtain the entry path relative to the repository.
    /// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
    path: Range<usize>,
}

/// An index file whose state was read from a file on disk.
#[derive(Clone)]
pub struct File {
    /// The state containing the actual index data.
    pub(crate) state: State,
    /// The path from which the index was read or to which it is supposed to be written.
    pub(crate) path: PathBuf,
    /// The checksum of all bytes prior to the checksum itself.
    pub(crate) checksum: Option<gix_hash::ObjectId>,
}

/// The type to use and store paths to all entries.
pub type PathStorage = Vec<u8>;
/// The type to use and store paths to all entries, as reference
pub type PathStorageRef = [u8];

struct DirEntry<'a> {
    /// The first entry in the directory
    entry: &'a Entry,
    /// One past the last byte of the directory in the path-backing
    dir_end: usize,
}

impl DirEntry<'_> {
    fn path<'a>(&self, state: &'a State) -> &'a BStr {
        let range = self.entry.path.start..self.dir_end;
        state.path_backing[range].as_bstr()
    }
}

/// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
pub struct AccelerateLookup<'a> {
    /// The entries themselves, hashed by their full icase path.
    /// Icase-clashes are handled in order of occurrence and are all available for iteration.
    icase_entries: hashbrown::HashTable<&'a Entry>,
    /// Each hash in this table corresponds to a directory containing one or more entries.
    icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
}

/// An in-memory cache of a fully parsed git index file.
///
/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
/// We treat index and its state synonymous.
///
/// # A note on safety
///
/// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
/// by the caller, without applying any level of validation.
///
/// This means that before using these paths to recreate files on disk, *they must be validated*.
///
/// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
/// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
/// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
/// path is presented in the index and its consumer must validate each path component before usage.
///
/// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
/// one can validate component names with `gix_validate::path::component()`.
#[derive(Clone)]
pub struct State {
    /// The kind of object hash used when storing the underlying file.
    ///
    /// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
    object_hash: gix_hash::Kind,
    /// The time at which the state was created, indicating its freshness compared to other files on disk.
    ///
    /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
    /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
    timestamp: FileTime,
    version: Version,
    entries: Vec<Entry>,
    /// A memory area keeping all index paths, in full length, independently of the index version.
    ///
    /// Ranges into this storage are referred to by parts of `entries`.
    path_backing: PathStorage,
    /// True if one entry in the index has a special marker mode
    is_sparse: bool,

    // Extensions
    end_of_index_at_decode_time: bool,
    offset_table_at_decode_time: bool,
    tree: Option<extension::Tree>,
    link: Option<extension::Link>,
    resolve_undo: Option<extension::resolve_undo::Paths>,
    untracked: Option<extension::UntrackedCache>,
    fs_monitor: Option<extension::FsMonitor>,
}

mod impls {
    use std::fmt::{Debug, Formatter};

    use crate::{entry::Stage, State};

    impl Debug for State {
        fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
            for entry in &self.entries {
                writeln!(
                    f,
                    "{} {}{:?} {} {}",
                    match entry.flags.stage() {
                        Stage::Unconflicted => "       ",
                        Stage::Base => "BASE   ",
                        Stage::Ours => "OURS   ",
                        Stage::Theirs => "THEIRS ",
                    },
                    if entry.flags.is_empty() {
                        "".to_string()
                    } else {
                        format!("{:?} ", entry.flags)
                    },
                    entry.mode,
                    entry.id,
                    entry.path(self)
                )?;
            }
            Ok(())
        }
    }
}

pub(crate) mod util {
    #[inline]
    pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
        let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
        let data = &data[consumed..];
        (num, data).into()
    }

    #[inline]
    pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
        data.split_at_checked(4)
            .map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data))
    }

    #[inline]
    pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
        data.split_at_checked(8)
            .map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data))
    }

    #[inline]
    pub fn from_be_u32(b: &[u8]) -> u32 {
        u32::from_be_bytes(b.try_into().unwrap())
    }

    #[inline]
    pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
        if data.len() < 2 {
            return None;
        }
        data.iter().enumerate().find_map(|(idx, b)| {
            (*b == byte).then(|| {
                if idx == 0 {
                    (&[] as &[u8], &data[1..])
                } else {
                    let (a, b) = data.split_at(idx);
                    (a, &b[1..])
                }
            })
        })
    }
}

Coverage Report

Created: 2025-11-16 07:09

Line	Count	Source
1		//! ## Feature Flags
2		#![cfg_attr(
3		all(doc, feature = "document-features"),
4		doc = ::document_features::document_features!()
5		)]
6		#![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))]
7		#![deny(unsafe_code, missing_docs, rust_2018_idioms)]
8
9		use std::{ops::Range, path::PathBuf};
10
11		use bstr::{BStr, ByteSlice};
12		use filetime::FileTime;
13		/// `gix_hash` is made available as it's part of the public API in various places.
14		pub use gix_hash as hash;
15		/// A re-export to allow calling [`State::from_tree()`].
16		pub use gix_validate as validate;
17
18		///
19		pub mod file;
20
21		///
22		pub mod extension;
23
24		///
25		pub mod entry;
26
27		mod access;
28
29		///
30		pub mod init;
31
32		///
33		pub mod decode;
34
35		///
36		pub mod verify;
37
38		///
39		pub mod write;
40
41		pub mod fs;
42
43		/// All known versions of a git index file.
44		#[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)]
45		#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46		pub enum Version {
47		/// Supports entries and various extensions.
48		V2 = 2,
49		/// Adds support for additional flags for each entry, called extended entries.
50		V3 = 3,
51		/// Supports deltified entry paths.
52		V4 = 4,
53		}
54
55		/// An entry in the index, identifying a non-tree item on disk.
56		#[derive(Debug, Clone, Eq, PartialEq)]
57		pub struct Entry {
58		/// The filesystem stat information for the file on disk.
59		pub stat: entry::Stat,
60		/// The object id for this entry's ODB representation (assuming it's up-to-date with it).
61		pub id: gix_hash::ObjectId,
62		/// Additional flags for use in algorithms and for efficiently storing stage information.
63		pub flags: entry::Flags,
64		/// The kind of item this entry represents - it's not all blobs in the index anymore.
65		pub mode: entry::Mode,
66		/// The range to lookup in the path backing to obtain the entry path relative to the repository.
67		/// This costs additional memory but is probably worth it given that paths can stay in one big allocation.
68		path: Range<usize>,
69		}
70
71		/// An index file whose state was read from a file on disk.
72		#[derive(Clone)]
73		pub struct File {
74		/// The state containing the actual index data.
75		pub(crate) state: State,
76		/// The path from which the index was read or to which it is supposed to be written.
77		pub(crate) path: PathBuf,
78		/// The checksum of all bytes prior to the checksum itself.
79		pub(crate) checksum: Option<gix_hash::ObjectId>,
80		}
81
82		/// The type to use and store paths to all entries.
83		pub type PathStorage = Vec<u8>;
84		/// The type to use and store paths to all entries, as reference
85		pub type PathStorageRef = [u8];
86
87		struct DirEntry<'a> {
88		/// The first entry in the directory
89		entry: &'a Entry,
90		/// One past the last byte of the directory in the path-backing
91		dir_end: usize,
92		}
93
94		impl DirEntry<'_> {
95	0	fn path<'a>(&self, state: &'a State) -> &'a BStr {
96	0	let range = self.entry.path.start..self.dir_end;
97	0	state.path_backing[range].as_bstr()
98	0	}
99		}
100
101		/// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner.
102		pub struct AccelerateLookup<'a> {
103		/// The entries themselves, hashed by their full icase path.
104		/// Icase-clashes are handled in order of occurrence and are all available for iteration.
105		icase_entries: hashbrown::HashTable<&'a Entry>,
106		/// Each hash in this table corresponds to a directory containing one or more entries.
107		icase_dirs: hashbrown::HashTable<DirEntry<'a>>,
108		}
109
110		/// An in-memory cache of a fully parsed git index file.
111		///
112		/// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree.
113		/// We treat index and its state synonymous.
114		///
115		/// # A note on safety
116		///
117		/// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled
118		/// by the caller, without applying any level of validation.
119		///
120		/// This means that before using these paths to recreate files on disk, they must be validated.
121		///
122		/// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit`
123		/// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single
124		/// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid
125		/// path is presented in the index and its consumer must validate each path component before usage.
126		///
127		/// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively
128		/// one can validate component names with `gix_validate::path::component()`.
129		#[derive(Clone)]
130		pub struct State {
131		/// The kind of object hash used when storing the underlying file.
132		///
133		/// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible.
134		object_hash: gix_hash::Kind,
135		/// The time at which the state was created, indicating its freshness compared to other files on disk.
136		///
137		/// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the
138		/// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened.
139		timestamp: FileTime,
140		version: Version,
141		entries: Vec<Entry>,
142		/// A memory area keeping all index paths, in full length, independently of the index version.
143		///
144		/// Ranges into this storage are referred to by parts of `entries`.
145		path_backing: PathStorage,
146		/// True if one entry in the index has a special marker mode
147		is_sparse: bool,
148
149		// Extensions
150		end_of_index_at_decode_time: bool,
151		offset_table_at_decode_time: bool,
152		tree: Option<extension::Tree>,
153		link: Option<extension::Link>,
154		resolve_undo: Option<extension::resolve_undo::Paths>,
155		untracked: Option<extension::UntrackedCache>,
156		fs_monitor: Option<extension::FsMonitor>,
157		}
158
159		mod impls {
160		use std::fmt::{Debug, Formatter};
161
162		use crate::{entry::Stage, State};
163
164		impl Debug for State {
165	0	fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
166	0	for entry in &self.entries {
167	0	writeln!(
168	0	f,
169		"{} {}{:?} {} {}",
170	0	match entry.flags.stage() {
171	0	Stage::Unconflicted => " ",
172	0	Stage::Base => "BASE ",
173	0	Stage::Ours => "OURS ",
174	0	Stage::Theirs => "THEIRS ",
175		},
176	0	if entry.flags.is_empty() {
177	0	"".to_string()
178		} else {
179	0	format!("{:?} ", entry.flags)
180		},
181		entry.mode,
182		entry.id,
183	0	entry.path(self)
184	0	)?;
185		}
186	0	Ok(())
187	0	}
188		}
189		}
190
191		pub(crate) mod util {
192		#[inline]
193	0	pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> {
194	0	let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?;
195	0	let data = &data[consumed..];
196	0	(num, data).into()
197	0	}
198
199		#[inline]
200	0	pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> {
201	0	data.split_at_checked(4)
202	0	.map(\|(num, data)\| (u32::from_be_bytes(num.try_into().unwrap()), data))
203	0	}
204
205		#[inline]
206	0	pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> {
207	0	data.split_at_checked(8)
208	0	.map(\|(num, data)\| (u64::from_be_bytes(num.try_into().unwrap()), data))
209	0	}
210
211		#[inline]
212	0	pub fn from_be_u32(b: &[u8]) -> u32 {
213	0	u32::from_be_bytes(b.try_into().unwrap())
214	0	}
215
216		#[inline]
217	0	pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> {
218	0	if data.len() < 2 {
219	0	return None;
220	0	}
221	0	data.iter().enumerate().find_map(\|(idx, b)\| {
222	0	(*b == byte).then(\|\| {
223	0	if idx == 0 {
224	0	(&[] as &[u8], &data[1..])
225		} else {
226	0	let (a, b) = data.split_at(idx);
227	0	(a, &b[1..])
228		}
229	0	})
230	0	})
231	0	}
232		}