/src/gitoxide/gix-index/src/lib.rs
Line | Count | Source |
1 | | //! ## Feature Flags |
2 | | #![cfg_attr( |
3 | | all(doc, feature = "document-features"), |
4 | | doc = ::document_features::document_features!() |
5 | | )] |
6 | | #![cfg_attr(all(doc, feature = "document-features"), feature(doc_cfg))] |
7 | | #![deny(unsafe_code, missing_docs, rust_2018_idioms)] |
8 | | |
9 | | use std::{ops::Range, path::PathBuf}; |
10 | | |
11 | | use bstr::{BStr, ByteSlice}; |
12 | | use filetime::FileTime; |
13 | | /// `gix_hash` is made available as it's part of the public API in various places. |
14 | | pub use gix_hash as hash; |
15 | | /// A re-export to allow calling [`State::from_tree()`]. |
16 | | pub use gix_validate as validate; |
17 | | |
18 | | /// |
19 | | pub mod file; |
20 | | |
21 | | /// |
22 | | pub mod extension; |
23 | | |
24 | | /// |
25 | | pub mod entry; |
26 | | |
27 | | mod access; |
28 | | |
29 | | /// |
30 | | pub mod init; |
31 | | |
32 | | /// |
33 | | pub mod decode; |
34 | | |
35 | | /// |
36 | | pub mod verify; |
37 | | |
38 | | /// |
39 | | pub mod write; |
40 | | |
41 | | pub mod fs; |
42 | | |
43 | | /// All known versions of a git index file. |
44 | | #[derive(PartialEq, Eq, Debug, Hash, Ord, PartialOrd, Clone, Copy)] |
45 | | #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))] |
46 | | pub enum Version { |
47 | | /// Supports entries and various extensions. |
48 | | V2 = 2, |
49 | | /// Adds support for additional flags for each entry, called extended entries. |
50 | | V3 = 3, |
51 | | /// Supports deltified entry paths. |
52 | | V4 = 4, |
53 | | } |
54 | | |
55 | | /// An entry in the index, identifying a non-tree item on disk. |
56 | | #[derive(Debug, Clone, Eq, PartialEq)] |
57 | | pub struct Entry { |
58 | | /// The filesystem stat information for the file on disk. |
59 | | pub stat: entry::Stat, |
60 | | /// The object id for this entry's ODB representation (assuming it's up-to-date with it). |
61 | | pub id: gix_hash::ObjectId, |
62 | | /// Additional flags for use in algorithms and for efficiently storing stage information. |
63 | | pub flags: entry::Flags, |
64 | | /// The kind of item this entry represents - it's not all blobs in the index anymore. |
65 | | pub mode: entry::Mode, |
66 | | /// The range to lookup in the path backing to obtain the entry path relative to the repository. |
67 | | /// This costs additional memory but is probably worth it given that paths can stay in one big allocation. |
68 | | path: Range<usize>, |
69 | | } |
70 | | |
71 | | /// An index file whose state was read from a file on disk. |
72 | | #[derive(Clone)] |
73 | | pub struct File { |
74 | | /// The state containing the actual index data. |
75 | | pub(crate) state: State, |
76 | | /// The path from which the index was read or to which it is supposed to be written. |
77 | | pub(crate) path: PathBuf, |
78 | | /// The checksum of all bytes prior to the checksum itself. |
79 | | pub(crate) checksum: Option<gix_hash::ObjectId>, |
80 | | } |
81 | | |
82 | | /// The type to use and store paths to all entries. |
83 | | pub type PathStorage = Vec<u8>; |
84 | | /// The type to use and store paths to all entries, as reference |
85 | | pub type PathStorageRef = [u8]; |
86 | | |
87 | | struct DirEntry<'a> { |
88 | | /// The first entry in the directory |
89 | | entry: &'a Entry, |
90 | | /// One past the last byte of the directory in the path-backing |
91 | | dir_end: usize, |
92 | | } |
93 | | |
94 | | impl DirEntry<'_> { |
95 | 0 | fn path<'a>(&self, state: &'a State) -> &'a BStr { |
96 | 0 | let range = self.entry.path.start..self.dir_end; |
97 | 0 | state.path_backing[range].as_bstr() |
98 | 0 | } |
99 | | } |
100 | | |
101 | | /// A backing store for accelerating lookups of entries in a case-sensitive and case-insensitive manner. |
102 | | pub struct AccelerateLookup<'a> { |
103 | | /// The entries themselves, hashed by their full icase path. |
104 | | /// Icase-clashes are handled in order of occurrence and are all available for iteration. |
105 | | icase_entries: hashbrown::HashTable<&'a Entry>, |
106 | | /// Each hash in this table corresponds to a directory containing one or more entries. |
107 | | icase_dirs: hashbrown::HashTable<DirEntry<'a>>, |
108 | | } |
109 | | |
110 | | /// An in-memory cache of a fully parsed git index file. |
111 | | /// |
112 | | /// As opposed to a snapshot, it's meant to be altered and eventually be written back to disk or converted into a tree. |
113 | | /// We treat index and its state synonymous. |
114 | | /// |
115 | | /// # A note on safety |
116 | | /// |
117 | | /// An index (i.e. [`State`]) created by hand is not guaranteed to have valid entry paths as they are entirely controlled |
118 | | /// by the caller, without applying any level of validation. |
119 | | /// |
120 | | /// This means that before using these paths to recreate files on disk, *they must be validated*. |
121 | | /// |
122 | | /// It's notable that it's possible to manufacture tree objects which contain names like `.git/hooks/pre-commit` |
123 | | /// which then will look like `.git/hooks/pre-commit` in the index, which doesn't care that the name came from a single |
124 | | /// tree instead of from trees named `.git`, `hooks` and a blob named `pre-commit`. The effect is still the same - an invalid |
125 | | /// path is presented in the index and its consumer must validate each path component before usage. |
126 | | /// |
127 | | /// It's recommended to do that using `gix_worktree::Stack` which has it built-in if it's created `for_checkout()`. Alternatively |
128 | | /// one can validate component names with `gix_validate::path::component()`. |
129 | | #[derive(Clone)] |
130 | | pub struct State { |
131 | | /// The kind of object hash used when storing the underlying file. |
132 | | /// |
133 | | /// Empty states for example won't have a single object id, so deduction of the hash used isn't always possible. |
134 | | object_hash: gix_hash::Kind, |
135 | | /// The time at which the state was created, indicating its freshness compared to other files on disk. |
136 | | /// |
137 | | /// Note that on platforms that only have a precisions of a second for this time, we will treat all entries with the |
138 | | /// same timestamp as this as potentially changed, checking more thoroughly if a change actually happened. |
139 | | timestamp: FileTime, |
140 | | version: Version, |
141 | | entries: Vec<Entry>, |
142 | | /// A memory area keeping all index paths, in full length, independently of the index version. |
143 | | /// |
144 | | /// Ranges into this storage are referred to by parts of `entries`. |
145 | | path_backing: PathStorage, |
146 | | /// True if one entry in the index has a special marker mode |
147 | | is_sparse: bool, |
148 | | |
149 | | // Extensions |
150 | | end_of_index_at_decode_time: bool, |
151 | | offset_table_at_decode_time: bool, |
152 | | tree: Option<extension::Tree>, |
153 | | link: Option<extension::Link>, |
154 | | resolve_undo: Option<extension::resolve_undo::Paths>, |
155 | | untracked: Option<extension::UntrackedCache>, |
156 | | fs_monitor: Option<extension::FsMonitor>, |
157 | | } |
158 | | |
159 | | mod impls { |
160 | | use std::fmt::{Debug, Formatter}; |
161 | | |
162 | | use crate::{entry::Stage, State}; |
163 | | |
164 | | impl Debug for State { |
165 | 0 | fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { |
166 | 0 | for entry in &self.entries { |
167 | 0 | writeln!( |
168 | 0 | f, |
169 | | "{} {}{:?} {} {}", |
170 | 0 | match entry.flags.stage() { |
171 | 0 | Stage::Unconflicted => " ", |
172 | 0 | Stage::Base => "BASE ", |
173 | 0 | Stage::Ours => "OURS ", |
174 | 0 | Stage::Theirs => "THEIRS ", |
175 | | }, |
176 | 0 | if entry.flags.is_empty() { |
177 | 0 | "".to_string() |
178 | | } else { |
179 | 0 | format!("{:?} ", entry.flags) |
180 | | }, |
181 | | entry.mode, |
182 | | entry.id, |
183 | 0 | entry.path(self) |
184 | 0 | )?; |
185 | | } |
186 | 0 | Ok(()) |
187 | 0 | } |
188 | | } |
189 | | } |
190 | | |
191 | | pub(crate) mod util { |
192 | | #[inline] |
193 | 0 | pub fn var_int(data: &[u8]) -> Option<(u64, &[u8])> { |
194 | 0 | let (num, consumed) = gix_features::decode::leb64_from_read(data).ok()?; |
195 | 0 | let data = &data[consumed..]; |
196 | 0 | (num, data).into() |
197 | 0 | } |
198 | | |
199 | | #[inline] |
200 | 0 | pub fn read_u32(data: &[u8]) -> Option<(u32, &[u8])> { |
201 | 0 | data.split_at_checked(4) |
202 | 0 | .map(|(num, data)| (u32::from_be_bytes(num.try_into().unwrap()), data)) |
203 | 0 | } |
204 | | |
205 | | #[inline] |
206 | 0 | pub fn read_u64(data: &[u8]) -> Option<(u64, &[u8])> { |
207 | 0 | data.split_at_checked(8) |
208 | 0 | .map(|(num, data)| (u64::from_be_bytes(num.try_into().unwrap()), data)) |
209 | 0 | } |
210 | | |
211 | | #[inline] |
212 | 0 | pub fn from_be_u32(b: &[u8]) -> u32 { |
213 | 0 | u32::from_be_bytes(b.try_into().unwrap()) |
214 | 0 | } |
215 | | |
216 | | #[inline] |
217 | 0 | pub fn split_at_byte_exclusive(data: &[u8], byte: u8) -> Option<(&[u8], &[u8])> { |
218 | 0 | if data.len() < 2 { |
219 | 0 | return None; |
220 | 0 | } |
221 | 0 | data.iter().enumerate().find_map(|(idx, b)| { |
222 | 0 | (*b == byte).then(|| { |
223 | 0 | if idx == 0 { |
224 | 0 | (&[] as &[u8], &data[1..]) |
225 | | } else { |
226 | 0 | let (a, b) = data.split_at(idx); |
227 | 0 | (a, &b[1..]) |
228 | | } |
229 | 0 | }) |
230 | 0 | }) |
231 | 0 | } |
232 | | } |