/rust/registry/src/index.crates.io-1949cf8c6b5b557f/vart-0.9.3/src/lib.rs
Line | Count | Source |
1 | | // #[allow(warnings)] |
2 | | pub mod art; |
3 | | pub mod iter; |
4 | | pub mod node; |
5 | | |
6 | | use std::cmp::{Ord, Ordering, PartialOrd}; |
7 | | use std::error::Error; |
8 | | use std::fmt; |
9 | | use std::fmt::Debug; |
10 | | use std::str::FromStr; |
11 | | |
12 | | // "Partial" in the Adaptive Radix Tree paper refers to "partial keys", a technique employed |
13 | | // for prefix compression in this data structure. Instead of storing entire keys in the nodes, |
14 | | // ART nodes often only store partial keys, which are the differing prefixes of the keys. |
15 | | // This approach significantly reduces the memory requirements of the data structure. |
16 | | // Key is a trait that provides an abstraction for partial keys. |
17 | | pub trait Key { |
18 | | fn at(&self, pos: usize) -> u8; |
19 | | fn len(&self) -> usize; |
20 | | fn prefix_before(&self, length: usize) -> &[u8]; |
21 | | fn prefix_after(&self, start: usize) -> &[u8]; |
22 | | fn longest_common_prefix(&self, slice: &[u8]) -> usize; |
23 | | fn as_slice(&self) -> &[u8]; |
24 | | fn extend(&self, other: &Self) -> Self; |
25 | 0 | fn is_empty(&self) -> bool { |
26 | 0 | self.len() == 0 |
27 | 0 | } |
28 | | } |
29 | | |
30 | | pub trait KeyTrait: Key + Clone + Ord + Debug + for<'a> From<&'a [u8]> {} |
31 | | impl<T: Key + Clone + Ord + Debug + for<'a> From<&'a [u8]>> KeyTrait for T {} |
32 | | |
33 | | /* |
34 | | Key trait implementations |
35 | | */ |
36 | | |
37 | | // Source: https://www.the-paper-trail.org/post/art-paper-notes/ |
38 | | // |
39 | | // Keys can be of two types: |
40 | | // 1. Fixed-length datatypes such as 128-bit integers, or strings of exactly 64-bytes, |
41 | | // don’t have any problem because there can, by construction, never be any key that’s |
42 | | // a prefix of any other. |
43 | | // |
44 | | // 2. Variable-length datatypes such as general strings, can be transformed into types |
45 | | // where no key is the prefix of any other by a simple trick: append the NULL byte to every key. |
46 | | // The NULL byte, as it does in C-style strings, indicates that this is the end of the key, and |
47 | | // no characters can come after it. Therefore no string with a null-byte can be a prefix of any other, |
48 | | // because no string can have any characters after the NULL byte! |
49 | | // |
50 | | #[derive(Clone, Debug, Eq)] |
51 | | pub struct FixedSizeKey<const SIZE: usize> { |
52 | | content: [u8; SIZE], |
53 | | len: usize, |
54 | | } |
55 | | |
56 | | impl<const SIZE: usize> PartialEq for FixedSizeKey<SIZE> { |
57 | 0 | fn eq(&self, other: &Self) -> bool { |
58 | 0 | self.content[..self.len] == other.content[..other.len] |
59 | 0 | } |
60 | | } |
61 | | |
62 | | impl<const SIZE: usize> PartialOrd for FixedSizeKey<SIZE> { |
63 | 0 | fn partial_cmp(&self, other: &Self) -> Option<Ordering> { |
64 | 0 | Some(self.cmp(other)) |
65 | 0 | } |
66 | | } |
67 | | impl<const SIZE: usize> Ord for FixedSizeKey<SIZE> { |
68 | 0 | fn cmp(&self, other: &Self) -> Ordering { |
69 | 0 | self.content[..self.len].cmp(&other.content[..other.len]) |
70 | 0 | } |
71 | | } |
72 | | |
73 | | impl<const SIZE: usize> FixedSizeKey<SIZE> { |
74 | | // Create new instance with data ending in zero byte |
75 | 0 | pub fn create_key(src: &[u8]) -> Self { |
76 | 0 | debug_assert!(src.len() < SIZE); |
77 | 0 | let mut content = [0; SIZE]; |
78 | 0 | content[..src.len()].copy_from_slice(src); |
79 | 0 | content[src.len()] = 0; |
80 | 0 | Self { |
81 | 0 | content, |
82 | 0 | len: src.len() + 1, |
83 | 0 | } |
84 | 0 | } |
85 | | |
86 | | // Create new instance from slice |
87 | 0 | pub fn from_slice(src: &[u8]) -> Self { |
88 | 0 | debug_assert!(src.len() <= SIZE); |
89 | 0 | let mut content = [0; SIZE]; |
90 | 0 | content[..src.len()].copy_from_slice(src); |
91 | 0 | Self { |
92 | 0 | content, |
93 | 0 | len: src.len(), |
94 | 0 | } |
95 | 0 | } |
96 | | |
97 | 0 | pub fn from_string(s: &String) -> Self { |
98 | 0 | assert!(s.len() < SIZE, "data length is greater than array length"); |
99 | 0 | let mut arr = [0; SIZE]; |
100 | 0 | arr[..s.len()].copy_from_slice(s.as_bytes()); |
101 | 0 | Self { |
102 | 0 | content: arr, |
103 | 0 | len: s.len() + 1, |
104 | 0 | } |
105 | 0 | } |
106 | | } |
107 | | |
108 | | impl<const SIZE: usize> Key for FixedSizeKey<SIZE> { |
109 | | // Returns slice of the internal data up to the actual length |
110 | 0 | fn as_slice(&self) -> &[u8] { |
111 | 0 | &self.content[..self.len] |
112 | 0 | } |
113 | | |
114 | | // Creates a new instance of FixedSizeKey consisting only of the initial part of the content |
115 | 0 | fn prefix_before(&self, length: usize) -> &[u8] { |
116 | 0 | assert!(length <= self.len); |
117 | 0 | &self.content[..length] |
118 | 0 | } |
119 | | |
120 | | // Creates a new instance of FixedSizeKey excluding the initial part of the content |
121 | 0 | fn prefix_after(&self, start: usize) -> &[u8] { |
122 | 0 | assert!(start <= self.len); |
123 | 0 | &self.content[start..self.len] |
124 | 0 | } |
125 | | |
126 | | #[inline(always)] |
127 | 0 | fn at(&self, pos: usize) -> u8 { |
128 | 0 | assert!(pos < self.len); |
129 | 0 | self.content[pos] |
130 | 0 | } |
131 | | |
132 | | #[inline(always)] |
133 | 0 | fn len(&self) -> usize { |
134 | 0 | self.len |
135 | 0 | } |
136 | | |
137 | | // Returns the length of the longest common prefix between this object's content and the given byte slice |
138 | 0 | fn longest_common_prefix(&self, key: &[u8]) -> usize { |
139 | 0 | let len = self.len.min(key.len()).min(SIZE); |
140 | 0 | self.content[..len] |
141 | 0 | .iter() |
142 | 0 | .zip(key) |
143 | 0 | .take_while(|&(a, &b)| *a == b) |
144 | 0 | .count() |
145 | 0 | } |
146 | | |
147 | 0 | fn extend(&self, other: &Self) -> Self { |
148 | 0 | assert!(self.len + other.len < SIZE); |
149 | 0 | let mut content = [0; SIZE]; |
150 | 0 | content[..self.len].copy_from_slice(&self.content[..self.len]); |
151 | 0 | content[self.len..self.len + other.len].copy_from_slice(&other.content[..other.len]); |
152 | 0 | Self { |
153 | 0 | content, |
154 | 0 | len: self.len + other.len, |
155 | 0 | } |
156 | 0 | } |
157 | | } |
158 | | |
159 | | impl<const SIZE: usize> FromStr for FixedSizeKey<SIZE> { |
160 | | type Err = TrieError; |
161 | | |
162 | 0 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
163 | 0 | if s.len() >= SIZE { |
164 | 0 | return Err(TrieError::FixedSizeKeyLengthExceeded); |
165 | 0 | } |
166 | 0 | let mut arr = [0; SIZE]; |
167 | 0 | arr[..s.len()].copy_from_slice(s.as_bytes()); |
168 | 0 | Ok(Self { |
169 | 0 | content: arr, |
170 | 0 | len: s.len() + 1, |
171 | 0 | }) |
172 | 0 | } |
173 | | } |
174 | | |
175 | | impl<const SIZE: usize> From<&[u8]> for FixedSizeKey<SIZE> { |
176 | 0 | fn from(src: &[u8]) -> Self { |
177 | 0 | Self::from_slice(src) |
178 | 0 | } |
179 | | } |
180 | | |
181 | | impl<const N: usize> From<u8> for FixedSizeKey<N> { |
182 | 0 | fn from(data: u8) -> Self { |
183 | 0 | Self::from_slice(data.to_be_bytes().as_ref()) |
184 | 0 | } |
185 | | } |
186 | | |
187 | | impl<const N: usize> From<u16> for FixedSizeKey<N> { |
188 | 0 | fn from(data: u16) -> Self { |
189 | 0 | Self::from_slice(data.to_be_bytes().as_ref()) |
190 | 0 | } |
191 | | } |
192 | | |
193 | | impl<const N: usize> From<u64> for FixedSizeKey<N> { |
194 | 0 | fn from(data: u64) -> Self { |
195 | 0 | Self::from_slice(data.to_be_bytes().as_ref()) |
196 | 0 | } |
197 | | } |
198 | | |
199 | | impl<const N: usize> From<&str> for FixedSizeKey<N> { |
200 | 0 | fn from(data: &str) -> Self { |
201 | 0 | Self::from_str(data).unwrap() |
202 | 0 | } |
203 | | } |
204 | | |
205 | | impl<const N: usize> From<String> for FixedSizeKey<N> { |
206 | 0 | fn from(data: String) -> Self { |
207 | 0 | Self::from_string(&data) |
208 | 0 | } |
209 | | } |
210 | | impl<const N: usize> From<&String> for FixedSizeKey<N> { |
211 | 0 | fn from(data: &String) -> Self { |
212 | 0 | Self::from_string(data) |
213 | 0 | } |
214 | | } |
215 | | |
216 | | // A VariableSizeKey is a variable-length datatype with NULL byte appended to it. |
217 | | #[derive(Clone, PartialEq, PartialOrd, Ord, Eq, Debug)] |
218 | | pub struct VariableSizeKey { |
219 | | data: Vec<u8>, |
220 | | } |
221 | | |
222 | | impl VariableSizeKey { |
223 | 0 | pub fn key(src: &[u8]) -> Self { |
224 | 0 | Self::from_slice(src) |
225 | 0 | } |
226 | | |
227 | 0 | pub fn from_slice(src: &[u8]) -> Self { |
228 | 0 | Self { |
229 | 0 | data: Vec::from(src), |
230 | 0 | } |
231 | 0 | } |
232 | | |
233 | 0 | pub fn to_slice(&self) -> &[u8] { |
234 | 0 | &self.data |
235 | 0 | } |
236 | | |
237 | 0 | pub fn from_string(s: &String) -> Self { |
238 | 0 | Self::from_slice(s.as_bytes()) |
239 | 0 | } |
240 | | |
241 | 0 | pub fn from(data: Vec<u8>) -> Self { |
242 | 0 | Self { data } |
243 | 0 | } |
244 | | } |
245 | | |
246 | | impl FromStr for VariableSizeKey { |
247 | | type Err = (); |
248 | | |
249 | 0 | fn from_str(s: &str) -> Result<Self, Self::Err> { |
250 | 0 | let k = Self::from_slice(s.as_bytes()); |
251 | 0 | Ok(k) |
252 | 0 | } |
253 | | } |
254 | | |
255 | | impl From<&[u8]> for VariableSizeKey { |
256 | 0 | fn from(src: &[u8]) -> Self { |
257 | 0 | Self::from_slice(src) |
258 | 0 | } |
259 | | } |
260 | | |
261 | | impl Key for VariableSizeKey { |
262 | 0 | fn prefix_before(&self, length: usize) -> &[u8] { |
263 | 0 | assert!(length <= self.data.len()); |
264 | 0 | &self.data[..length] |
265 | 0 | } |
266 | | |
267 | 0 | fn prefix_after(&self, start: usize) -> &[u8] { |
268 | 0 | assert!(start <= self.data.len()); |
269 | 0 | &self.data[start..self.data.len()] |
270 | 0 | } |
271 | | |
272 | | #[inline(always)] |
273 | 0 | fn at(&self, pos: usize) -> u8 { |
274 | 0 | assert!(pos < self.data.len()); |
275 | 0 | self.data[pos] |
276 | 0 | } |
277 | | |
278 | | #[inline(always)] |
279 | 0 | fn len(&self) -> usize { |
280 | 0 | self.data.len() |
281 | 0 | } |
282 | | |
283 | | // Returns the length of the longest common prefix between this object's content and the given byte slice |
284 | 0 | fn longest_common_prefix(&self, key: &[u8]) -> usize { |
285 | 0 | let len = self.data.len().min(key.len()); |
286 | 0 | self.data[..len] |
287 | 0 | .iter() |
288 | 0 | .zip(key) |
289 | 0 | .take_while(|&(a, &b)| *a == b) |
290 | 0 | .count() |
291 | 0 | } |
292 | | |
293 | 0 | fn as_slice(&self) -> &[u8] { |
294 | 0 | &self.data[..self.data.len()] |
295 | 0 | } |
296 | | |
297 | 0 | fn extend(&self, other: &Self) -> Self { |
298 | 0 | let mut data = Vec::with_capacity(self.data.len() + other.data.len()); |
299 | 0 | data.extend_from_slice(&self.data); |
300 | 0 | data.extend_from_slice(&other.data); |
301 | 0 | Self { data } |
302 | 0 | } |
303 | | } |
304 | | |
305 | | // Define a custom error enum representing different error cases for the Trie |
306 | | #[derive(Clone, Debug)] |
307 | | pub enum TrieError { |
308 | | FixedSizeKeyLengthExceeded, |
309 | | VersionIsOld, |
310 | | RootIsNotUniquelyOwned, |
311 | | SnapshotOlderThanRoot, |
312 | | } |
313 | | |
314 | | impl Error for TrieError {} |
315 | | |
316 | | // Implement the Display trait to define how the error should be formatted as a string |
317 | | impl fmt::Display for TrieError { |
318 | 0 | fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { |
319 | 0 | match *self { |
320 | 0 | TrieError::FixedSizeKeyLengthExceeded => write!(f, "Fixed key length exceeded"), |
321 | | TrieError::VersionIsOld => { |
322 | 0 | write!(f, "Given version is older than root's current version") |
323 | | } |
324 | 0 | TrieError::RootIsNotUniquelyOwned => write!(f, "Root arc is not uniquely owned"), |
325 | 0 | TrieError::SnapshotOlderThanRoot => write!(f, "Snapshot is older than root"), |
326 | | } |
327 | 0 | } |
328 | | } |