/rust/registry/src/index.crates.io-6f17d22bba15001f/itertools-0.11.0/src/groupbylazy.rs
Line | Count | Source (jump to first uncovered line) |
1 | | use std::cell::{Cell, RefCell}; |
2 | | use alloc::vec::{self, Vec}; |
3 | | |
4 | | /// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks` |
5 | | trait KeyFunction<A> { |
6 | | type Key; |
7 | | fn call_mut(&mut self, arg: A) -> Self::Key; |
8 | | } |
9 | | |
10 | | impl<A, K, F: ?Sized> KeyFunction<A> for F |
11 | | where F: FnMut(A) -> K |
12 | | { |
13 | | type Key = K; |
14 | | #[inline] |
15 | 0 | fn call_mut(&mut self, arg: A) -> Self::Key { |
16 | 0 | (*self)(arg) |
17 | 0 | } |
18 | | } |
19 | | |
20 | | |
21 | | /// `ChunkIndex` acts like the grouping key function for `IntoChunks` |
22 | 0 | #[derive(Debug, Clone)] |
23 | | struct ChunkIndex { |
24 | | size: usize, |
25 | | index: usize, |
26 | | key: usize, |
27 | | } |
28 | | |
29 | | impl ChunkIndex { |
30 | | #[inline(always)] |
31 | 0 | fn new(size: usize) -> Self { |
32 | 0 | ChunkIndex { |
33 | 0 | size, |
34 | 0 | index: 0, |
35 | 0 | key: 0, |
36 | 0 | } |
37 | 0 | } |
38 | | } |
39 | | |
40 | | impl<A> KeyFunction<A> for ChunkIndex { |
41 | | type Key = usize; |
42 | | #[inline(always)] |
43 | 0 | fn call_mut(&mut self, _arg: A) -> Self::Key { |
44 | 0 | if self.index == self.size { |
45 | 0 | self.key += 1; |
46 | 0 | self.index = 0; |
47 | 0 | } |
48 | 0 | self.index += 1; |
49 | 0 | self.key |
50 | 0 | } |
51 | | } |
52 | | |
53 | 0 | #[derive(Clone)] |
54 | | struct GroupInner<K, I, F> |
55 | | where I: Iterator |
56 | | { |
57 | | key: F, |
58 | | iter: I, |
59 | | current_key: Option<K>, |
60 | | current_elt: Option<I::Item>, |
61 | | /// flag set if iterator is exhausted |
62 | | done: bool, |
63 | | /// Index of group we are currently buffering or visiting |
64 | | top_group: usize, |
65 | | /// Least index for which we still have elements buffered |
66 | | oldest_buffered_group: usize, |
67 | | /// Group index for `buffer[0]` -- the slots |
68 | | /// bottom_group..oldest_buffered_group are unused and will be erased when |
69 | | /// that range is large enough. |
70 | | bottom_group: usize, |
71 | | /// Buffered groups, from `bottom_group` (index 0) to `top_group`. |
72 | | buffer: Vec<vec::IntoIter<I::Item>>, |
73 | | /// index of last group iter that was dropped, usize::MAX == none |
74 | | dropped_group: usize, |
75 | | } |
76 | | |
77 | | impl<K, I, F> GroupInner<K, I, F> |
78 | | where I: Iterator, |
79 | | F: for<'a> KeyFunction<&'a I::Item, Key=K>, |
80 | | K: PartialEq, |
81 | | { |
82 | | /// `client`: Index of group that requests next element |
83 | | #[inline(always)] |
84 | 0 | fn step(&mut self, client: usize) -> Option<I::Item> { |
85 | 0 | /* |
86 | 0 | println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]", |
87 | 0 | client, self.bottom_group, self.oldest_buffered_group, |
88 | 0 | self.top_group, |
89 | 0 | self.buffer.iter().map(|elt| elt.len()).format(", ")); |
90 | 0 | */ |
91 | 0 | if client < self.oldest_buffered_group { |
92 | 0 | None |
93 | 0 | } else if client < self.top_group || |
94 | 0 | (client == self.top_group && |
95 | 0 | self.buffer.len() > self.top_group - self.bottom_group) |
96 | | { |
97 | 0 | self.lookup_buffer(client) |
98 | 0 | } else if self.done { |
99 | 0 | None |
100 | 0 | } else if self.top_group == client { |
101 | 0 | self.step_current() |
102 | | } else { |
103 | 0 | self.step_buffering(client) |
104 | | } |
105 | 0 | } |
106 | | |
107 | | #[inline(never)] |
108 | 0 | fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> { |
109 | 0 | // if `bufidx` doesn't exist in self.buffer, it might be empty |
110 | 0 | let bufidx = client - self.bottom_group; |
111 | 0 | if client < self.oldest_buffered_group { |
112 | 0 | return None; |
113 | 0 | } |
114 | 0 | let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next()); |
115 | 0 | if elt.is_none() && client == self.oldest_buffered_group { |
116 | | // FIXME: VecDeque is unfortunately not zero allocation when empty, |
117 | | // so we do this job manually. |
118 | | // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it. |
119 | 0 | self.oldest_buffered_group += 1; |
120 | | // skip forward further empty queues too |
121 | 0 | while self.buffer.get(self.oldest_buffered_group - self.bottom_group) |
122 | 0 | .map_or(false, |buf| buf.len() == 0) |
123 | 0 | { |
124 | 0 | self.oldest_buffered_group += 1; |
125 | 0 | } |
126 | | |
127 | 0 | let nclear = self.oldest_buffered_group - self.bottom_group; |
128 | 0 | if nclear > 0 && nclear >= self.buffer.len() / 2 { |
129 | 0 | let mut i = 0; |
130 | 0 | self.buffer.retain(|buf| { |
131 | 0 | i += 1; |
132 | 0 | debug_assert!(buf.len() == 0 || i > nclear); |
133 | 0 | i > nclear |
134 | 0 | }); |
135 | 0 | self.bottom_group = self.oldest_buffered_group; |
136 | 0 | } |
137 | 0 | } |
138 | 0 | elt |
139 | 0 | } |
140 | | |
141 | | /// Take the next element from the iterator, and set the done |
142 | | /// flag if exhausted. Must not be called after done. |
143 | | #[inline(always)] |
144 | 0 | fn next_element(&mut self) -> Option<I::Item> { |
145 | 0 | debug_assert!(!self.done); |
146 | 0 | match self.iter.next() { |
147 | 0 | None => { self.done = true; None } |
148 | 0 | otherwise => otherwise, |
149 | | } |
150 | 0 | } |
151 | | |
152 | | |
153 | | #[inline(never)] |
154 | 0 | fn step_buffering(&mut self, client: usize) -> Option<I::Item> { |
155 | | // requested a later group -- walk through the current group up to |
156 | | // the requested group index, and buffer the elements (unless |
157 | | // the group is marked as dropped). |
158 | | // Because the `Groups` iterator is always the first to request |
159 | | // each group index, client is the next index efter top_group. |
160 | 0 | debug_assert!(self.top_group + 1 == client); |
161 | 0 | let mut group = Vec::new(); |
162 | | |
163 | 0 | if let Some(elt) = self.current_elt.take() { |
164 | 0 | if self.top_group != self.dropped_group { |
165 | 0 | group.push(elt); |
166 | 0 | } |
167 | 0 | } |
168 | 0 | let mut first_elt = None; // first element of the next group |
169 | | |
170 | 0 | while let Some(elt) = self.next_element() { |
171 | 0 | let key = self.key.call_mut(&elt); |
172 | 0 | match self.current_key.take() { |
173 | 0 | None => {} |
174 | 0 | Some(old_key) => if old_key != key { |
175 | 0 | self.current_key = Some(key); |
176 | 0 | first_elt = Some(elt); |
177 | 0 | break; |
178 | 0 | }, |
179 | | } |
180 | 0 | self.current_key = Some(key); |
181 | 0 | if self.top_group != self.dropped_group { |
182 | 0 | group.push(elt); |
183 | 0 | } |
184 | | } |
185 | | |
186 | 0 | if self.top_group != self.dropped_group { |
187 | 0 | self.push_next_group(group); |
188 | 0 | } |
189 | 0 | if first_elt.is_some() { |
190 | 0 | self.top_group += 1; |
191 | 0 | debug_assert!(self.top_group == client); |
192 | 0 | } |
193 | 0 | first_elt |
194 | 0 | } |
195 | | |
196 | 0 | fn push_next_group(&mut self, group: Vec<I::Item>) { |
197 | | // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group |
198 | 0 | while self.top_group - self.bottom_group > self.buffer.len() { |
199 | 0 | if self.buffer.is_empty() { |
200 | 0 | self.bottom_group += 1; |
201 | 0 | self.oldest_buffered_group += 1; |
202 | 0 | } else { |
203 | 0 | self.buffer.push(Vec::new().into_iter()); |
204 | 0 | } |
205 | | } |
206 | 0 | self.buffer.push(group.into_iter()); |
207 | 0 | debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len()); |
208 | 0 | } |
209 | | |
210 | | /// This is the immediate case, where we use no buffering |
211 | | #[inline] |
212 | 0 | fn step_current(&mut self) -> Option<I::Item> { |
213 | 0 | debug_assert!(!self.done); |
214 | 0 | if let elt @ Some(..) = self.current_elt.take() { |
215 | 0 | return elt; |
216 | 0 | } |
217 | 0 | match self.next_element() { |
218 | 0 | None => None, |
219 | 0 | Some(elt) => { |
220 | 0 | let key = self.key.call_mut(&elt); |
221 | 0 | match self.current_key.take() { |
222 | 0 | None => {} |
223 | 0 | Some(old_key) => if old_key != key { |
224 | 0 | self.current_key = Some(key); |
225 | 0 | self.current_elt = Some(elt); |
226 | 0 | self.top_group += 1; |
227 | 0 | return None; |
228 | 0 | }, |
229 | | } |
230 | 0 | self.current_key = Some(key); |
231 | 0 | Some(elt) |
232 | | } |
233 | | } |
234 | 0 | } |
235 | | |
236 | | /// Request the just started groups' key. |
237 | | /// |
238 | | /// `client`: Index of group |
239 | | /// |
240 | | /// **Panics** if no group key is available. |
241 | 0 | fn group_key(&mut self, client: usize) -> K { |
242 | | // This can only be called after we have just returned the first |
243 | | // element of a group. |
244 | | // Perform this by simply buffering one more element, grabbing the |
245 | | // next key. |
246 | 0 | debug_assert!(!self.done); |
247 | 0 | debug_assert!(client == self.top_group); |
248 | 0 | debug_assert!(self.current_key.is_some()); |
249 | 0 | debug_assert!(self.current_elt.is_none()); |
250 | 0 | let old_key = self.current_key.take().unwrap(); |
251 | 0 | if let Some(elt) = self.next_element() { |
252 | 0 | let key = self.key.call_mut(&elt); |
253 | 0 | if old_key != key { |
254 | 0 | self.top_group += 1; |
255 | 0 | } |
256 | 0 | self.current_key = Some(key); |
257 | 0 | self.current_elt = Some(elt); |
258 | 0 | } |
259 | 0 | old_key |
260 | 0 | } |
261 | | } |
262 | | |
263 | | impl<K, I, F> GroupInner<K, I, F> |
264 | | where I: Iterator, |
265 | | { |
266 | | /// Called when a group is dropped |
267 | 0 | fn drop_group(&mut self, client: usize) { |
268 | 0 | // It's only useful to track the maximal index |
269 | 0 | if self.dropped_group == !0 || client > self.dropped_group { |
270 | 0 | self.dropped_group = client; |
271 | 0 | } |
272 | 0 | } |
273 | | } |
274 | | |
275 | | /// `GroupBy` is the storage for the lazy grouping operation. |
276 | | /// |
277 | | /// If the groups are consumed in their original order, or if each |
278 | | /// group is dropped without keeping it around, then `GroupBy` uses |
279 | | /// no allocations. It needs allocations only if several group iterators |
280 | | /// are alive at the same time. |
281 | | /// |
282 | | /// This type implements [`IntoIterator`] (it is **not** an iterator |
283 | | /// itself), because the group iterators need to borrow from this |
284 | | /// value. It should be stored in a local variable or temporary and |
285 | | /// iterated. |
286 | | /// |
287 | | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
288 | | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
289 | | pub struct GroupBy<K, I, F> |
290 | | where I: Iterator, |
291 | | { |
292 | | inner: RefCell<GroupInner<K, I, F>>, |
293 | | // the group iterator's current index. Keep this in the main value |
294 | | // so that simultaneous iterators all use the same state. |
295 | | index: Cell<usize>, |
296 | | } |
297 | | |
298 | | /// Create a new |
299 | 0 | pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F> |
300 | 0 | where J: IntoIterator, |
301 | 0 | F: FnMut(&J::Item) -> K, |
302 | 0 | { |
303 | 0 | GroupBy { |
304 | 0 | inner: RefCell::new(GroupInner { |
305 | 0 | key: f, |
306 | 0 | iter: iter.into_iter(), |
307 | 0 | current_key: None, |
308 | 0 | current_elt: None, |
309 | 0 | done: false, |
310 | 0 | top_group: 0, |
311 | 0 | oldest_buffered_group: 0, |
312 | 0 | bottom_group: 0, |
313 | 0 | buffer: Vec::new(), |
314 | 0 | dropped_group: !0, |
315 | 0 | }), |
316 | 0 | index: Cell::new(0), |
317 | 0 | } |
318 | 0 | } |
319 | | |
320 | | impl<K, I, F> GroupBy<K, I, F> |
321 | | where I: Iterator, |
322 | | { |
323 | | /// `client`: Index of group that requests next element |
324 | 0 | fn step(&self, client: usize) -> Option<I::Item> |
325 | 0 | where F: FnMut(&I::Item) -> K, |
326 | 0 | K: PartialEq, |
327 | 0 | { |
328 | 0 | self.inner.borrow_mut().step(client) |
329 | 0 | } |
330 | | |
331 | | /// `client`: Index of group |
332 | 0 | fn drop_group(&self, client: usize) { |
333 | 0 | self.inner.borrow_mut().drop_group(client); |
334 | 0 | } |
335 | | } |
336 | | |
337 | | impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F> |
338 | | where I: Iterator, |
339 | | I::Item: 'a, |
340 | | F: FnMut(&I::Item) -> K, |
341 | | K: PartialEq |
342 | | { |
343 | | type Item = (K, Group<'a, K, I, F>); |
344 | | type IntoIter = Groups<'a, K, I, F>; |
345 | | |
346 | 0 | fn into_iter(self) -> Self::IntoIter { |
347 | 0 | Groups { parent: self } |
348 | 0 | } |
349 | | } |
350 | | |
351 | | |
352 | | /// An iterator that yields the Group iterators. |
353 | | /// |
354 | | /// Iterator element type is `(K, Group)`: |
355 | | /// the group's key `K` and the group's iterator. |
356 | | /// |
357 | | /// See [`.group_by()`](crate::Itertools::group_by) for more information. |
358 | | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
359 | | pub struct Groups<'a, K: 'a, I: 'a, F: 'a> |
360 | | where I: Iterator, |
361 | | I::Item: 'a |
362 | | { |
363 | | parent: &'a GroupBy<K, I, F>, |
364 | | } |
365 | | |
366 | | impl<'a, K, I, F> Iterator for Groups<'a, K, I, F> |
367 | | where I: Iterator, |
368 | | I::Item: 'a, |
369 | | F: FnMut(&I::Item) -> K, |
370 | | K: PartialEq |
371 | | { |
372 | | type Item = (K, Group<'a, K, I, F>); |
373 | | |
374 | | #[inline] |
375 | 0 | fn next(&mut self) -> Option<Self::Item> { |
376 | 0 | let index = self.parent.index.get(); |
377 | 0 | self.parent.index.set(index + 1); |
378 | 0 | let inner = &mut *self.parent.inner.borrow_mut(); |
379 | 0 | inner.step(index).map(|elt| { |
380 | 0 | let key = inner.group_key(index); |
381 | 0 | (key, Group { |
382 | 0 | parent: self.parent, |
383 | 0 | index, |
384 | 0 | first: Some(elt), |
385 | 0 | }) |
386 | 0 | }) |
387 | 0 | } |
388 | | } |
389 | | |
390 | | /// An iterator for the elements in a single group. |
391 | | /// |
392 | | /// Iterator element type is `I::Item`. |
393 | | pub struct Group<'a, K: 'a, I: 'a, F: 'a> |
394 | | where I: Iterator, |
395 | | I::Item: 'a, |
396 | | { |
397 | | parent: &'a GroupBy<K, I, F>, |
398 | | index: usize, |
399 | | first: Option<I::Item>, |
400 | | } |
401 | | |
402 | | impl<'a, K, I, F> Drop for Group<'a, K, I, F> |
403 | | where I: Iterator, |
404 | | I::Item: 'a, |
405 | | { |
406 | 0 | fn drop(&mut self) { |
407 | 0 | self.parent.drop_group(self.index); |
408 | 0 | } |
409 | | } |
410 | | |
411 | | impl<'a, K, I, F> Iterator for Group<'a, K, I, F> |
412 | | where I: Iterator, |
413 | | I::Item: 'a, |
414 | | F: FnMut(&I::Item) -> K, |
415 | | K: PartialEq, |
416 | | { |
417 | | type Item = I::Item; |
418 | | #[inline] |
419 | 0 | fn next(&mut self) -> Option<Self::Item> { |
420 | 0 | if let elt @ Some(..) = self.first.take() { |
421 | 0 | return elt; |
422 | 0 | } |
423 | 0 | self.parent.step(self.index) |
424 | 0 | } |
425 | | } |
426 | | |
427 | | ///// IntoChunks ///// |
428 | | |
429 | | /// Create a new |
430 | 0 | pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter> |
431 | 0 | where J: IntoIterator, |
432 | 0 | { |
433 | 0 | IntoChunks { |
434 | 0 | inner: RefCell::new(GroupInner { |
435 | 0 | key: ChunkIndex::new(size), |
436 | 0 | iter: iter.into_iter(), |
437 | 0 | current_key: None, |
438 | 0 | current_elt: None, |
439 | 0 | done: false, |
440 | 0 | top_group: 0, |
441 | 0 | oldest_buffered_group: 0, |
442 | 0 | bottom_group: 0, |
443 | 0 | buffer: Vec::new(), |
444 | 0 | dropped_group: !0, |
445 | 0 | }), |
446 | 0 | index: Cell::new(0), |
447 | 0 | } |
448 | 0 | } |
449 | | |
450 | | |
451 | | /// `ChunkLazy` is the storage for a lazy chunking operation. |
452 | | /// |
453 | | /// `IntoChunks` behaves just like `GroupBy`: it is iterable, and |
454 | | /// it only buffers if several chunk iterators are alive at the same time. |
455 | | /// |
456 | | /// This type implements [`IntoIterator`] (it is **not** an iterator |
457 | | /// itself), because the chunk iterators need to borrow from this |
458 | | /// value. It should be stored in a local variable or temporary and |
459 | | /// iterated. |
460 | | /// |
461 | | /// Iterator element type is `Chunk`, each chunk's iterator. |
462 | | /// |
463 | | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
464 | | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
465 | | pub struct IntoChunks<I> |
466 | | where I: Iterator, |
467 | | { |
468 | | inner: RefCell<GroupInner<usize, I, ChunkIndex>>, |
469 | | // the chunk iterator's current index. Keep this in the main value |
470 | | // so that simultaneous iterators all use the same state. |
471 | | index: Cell<usize>, |
472 | | } |
473 | | |
474 | | impl<I> Clone for IntoChunks<I> |
475 | | where I: Clone + Iterator, |
476 | | I::Item: Clone, |
477 | | { |
478 | | clone_fields!(inner, index); |
479 | | } |
480 | | |
481 | | |
482 | | impl<I> IntoChunks<I> |
483 | | where I: Iterator, |
484 | | { |
485 | | /// `client`: Index of chunk that requests next element |
486 | 0 | fn step(&self, client: usize) -> Option<I::Item> { |
487 | 0 | self.inner.borrow_mut().step(client) |
488 | 0 | } |
489 | | |
490 | | /// `client`: Index of chunk |
491 | 0 | fn drop_group(&self, client: usize) { |
492 | 0 | self.inner.borrow_mut().drop_group(client); |
493 | 0 | } |
494 | | } |
495 | | |
496 | | impl<'a, I> IntoIterator for &'a IntoChunks<I> |
497 | | where I: Iterator, |
498 | | I::Item: 'a, |
499 | | { |
500 | | type Item = Chunk<'a, I>; |
501 | | type IntoIter = Chunks<'a, I>; |
502 | | |
503 | 0 | fn into_iter(self) -> Self::IntoIter { |
504 | 0 | Chunks { |
505 | 0 | parent: self, |
506 | 0 | } |
507 | 0 | } |
508 | | } |
509 | | |
510 | | |
511 | | /// An iterator that yields the Chunk iterators. |
512 | | /// |
513 | | /// Iterator element type is `Chunk`. |
514 | | /// |
515 | | /// See [`.chunks()`](crate::Itertools::chunks) for more information. |
516 | | #[must_use = "iterator adaptors are lazy and do nothing unless consumed"] |
517 | 0 | #[derive(Clone)] |
518 | | pub struct Chunks<'a, I: 'a> |
519 | | where I: Iterator, |
520 | | I::Item: 'a, |
521 | | { |
522 | | parent: &'a IntoChunks<I>, |
523 | | } |
524 | | |
525 | | impl<'a, I> Iterator for Chunks<'a, I> |
526 | | where I: Iterator, |
527 | | I::Item: 'a, |
528 | | { |
529 | | type Item = Chunk<'a, I>; |
530 | | |
531 | | #[inline] |
532 | 0 | fn next(&mut self) -> Option<Self::Item> { |
533 | 0 | let index = self.parent.index.get(); |
534 | 0 | self.parent.index.set(index + 1); |
535 | 0 | let inner = &mut *self.parent.inner.borrow_mut(); |
536 | 0 | inner.step(index).map(|elt| { |
537 | 0 | Chunk { |
538 | 0 | parent: self.parent, |
539 | 0 | index, |
540 | 0 | first: Some(elt), |
541 | 0 | } |
542 | 0 | }) |
543 | 0 | } |
544 | | } |
545 | | |
546 | | /// An iterator for the elements in a single chunk. |
547 | | /// |
548 | | /// Iterator element type is `I::Item`. |
549 | | pub struct Chunk<'a, I: 'a> |
550 | | where I: Iterator, |
551 | | I::Item: 'a, |
552 | | { |
553 | | parent: &'a IntoChunks<I>, |
554 | | index: usize, |
555 | | first: Option<I::Item>, |
556 | | } |
557 | | |
558 | | impl<'a, I> Drop for Chunk<'a, I> |
559 | | where I: Iterator, |
560 | | I::Item: 'a, |
561 | | { |
562 | 0 | fn drop(&mut self) { |
563 | 0 | self.parent.drop_group(self.index); |
564 | 0 | } |
565 | | } |
566 | | |
567 | | impl<'a, I> Iterator for Chunk<'a, I> |
568 | | where I: Iterator, |
569 | | I::Item: 'a, |
570 | | { |
571 | | type Item = I::Item; |
572 | | #[inline] |
573 | 0 | fn next(&mut self) -> Option<Self::Item> { |
574 | 0 | if let elt @ Some(..) = self.first.take() { |
575 | 0 | return elt; |
576 | 0 | } |
577 | 0 | self.parent.step(self.index) |
578 | 0 | } |
579 | | } |