Coverage Report

Created: 2024-05-20 06:38

/rust/registry/src/index.crates.io-6f17d22bba15001f/itertools-0.11.0/src/groupbylazy.rs
Line
Count
Source (jump to first uncovered line)
1
use std::cell::{Cell, RefCell};
2
use alloc::vec::{self, Vec};
3
4
/// A trait to unify `FnMut` for `GroupBy` with the chunk key in `IntoChunks`
5
trait KeyFunction<A> {
6
    type Key;
7
    fn call_mut(&mut self, arg: A) -> Self::Key;
8
}
9
10
impl<A, K, F: ?Sized> KeyFunction<A> for F
11
    where F: FnMut(A) -> K
12
{
13
    type Key = K;
14
    #[inline]
15
0
    fn call_mut(&mut self, arg: A) -> Self::Key {
16
0
        (*self)(arg)
17
0
    }
18
}
19
20
21
/// `ChunkIndex` acts like the grouping key function for `IntoChunks`
22
0
#[derive(Debug, Clone)]
23
struct ChunkIndex {
24
    size: usize,
25
    index: usize,
26
    key: usize,
27
}
28
29
impl ChunkIndex {
30
    #[inline(always)]
31
0
    fn new(size: usize) -> Self {
32
0
        ChunkIndex {
33
0
            size,
34
0
            index: 0,
35
0
            key: 0,
36
0
        }
37
0
    }
38
}
39
40
impl<A> KeyFunction<A> for ChunkIndex {
41
    type Key = usize;
42
    #[inline(always)]
43
0
    fn call_mut(&mut self, _arg: A) -> Self::Key {
44
0
        if self.index == self.size {
45
0
            self.key += 1;
46
0
            self.index = 0;
47
0
        }
48
0
        self.index += 1;
49
0
        self.key
50
0
    }
51
}
52
53
0
#[derive(Clone)]
54
struct GroupInner<K, I, F>
55
    where I: Iterator
56
{
57
    key: F,
58
    iter: I,
59
    current_key: Option<K>,
60
    current_elt: Option<I::Item>,
61
    /// flag set if iterator is exhausted
62
    done: bool,
63
    /// Index of group we are currently buffering or visiting
64
    top_group: usize,
65
    /// Least index for which we still have elements buffered
66
    oldest_buffered_group: usize,
67
    /// Group index for `buffer[0]` -- the slots
68
    /// bottom_group..oldest_buffered_group are unused and will be erased when
69
    /// that range is large enough.
70
    bottom_group: usize,
71
    /// Buffered groups, from `bottom_group` (index 0) to `top_group`.
72
    buffer: Vec<vec::IntoIter<I::Item>>,
73
    /// index of last group iter that was dropped, usize::MAX == none
74
    dropped_group: usize,
75
}
76
77
impl<K, I, F> GroupInner<K, I, F>
78
    where I: Iterator,
79
          F: for<'a> KeyFunction<&'a I::Item, Key=K>,
80
          K: PartialEq,
81
{
82
    /// `client`: Index of group that requests next element
83
    #[inline(always)]
84
0
    fn step(&mut self, client: usize) -> Option<I::Item> {
85
0
        /*
86
0
        println!("client={}, bottom_group={}, oldest_buffered_group={}, top_group={}, buffers=[{}]",
87
0
                 client, self.bottom_group, self.oldest_buffered_group,
88
0
                 self.top_group,
89
0
                 self.buffer.iter().map(|elt| elt.len()).format(", "));
90
0
        */
91
0
        if client < self.oldest_buffered_group {
92
0
            None
93
0
        } else if client < self.top_group ||
94
0
            (client == self.top_group &&
95
0
             self.buffer.len() > self.top_group - self.bottom_group)
96
        {
97
0
            self.lookup_buffer(client)
98
0
        } else if self.done {
99
0
            None
100
0
        } else if self.top_group == client {
101
0
            self.step_current()
102
        } else {
103
0
            self.step_buffering(client)
104
        }
105
0
    }
106
107
    #[inline(never)]
108
0
    fn lookup_buffer(&mut self, client: usize) -> Option<I::Item> {
109
0
        // if `bufidx` doesn't exist in self.buffer, it might be empty
110
0
        let bufidx = client - self.bottom_group;
111
0
        if client < self.oldest_buffered_group {
112
0
            return None;
113
0
        }
114
0
        let elt = self.buffer.get_mut(bufidx).and_then(|queue| queue.next());
115
0
        if elt.is_none() && client == self.oldest_buffered_group {
116
            // FIXME: VecDeque is unfortunately not zero allocation when empty,
117
            // so we do this job manually.
118
            // `bottom_group..oldest_buffered_group` is unused, and if it's large enough, erase it.
119
0
            self.oldest_buffered_group += 1;
120
            // skip forward further empty queues too
121
0
            while self.buffer.get(self.oldest_buffered_group - self.bottom_group)
122
0
                             .map_or(false, |buf| buf.len() == 0)
123
0
            {
124
0
                self.oldest_buffered_group += 1;
125
0
            }
126
127
0
            let nclear = self.oldest_buffered_group - self.bottom_group;
128
0
            if nclear > 0 && nclear >= self.buffer.len() / 2 {
129
0
                let mut i = 0;
130
0
                self.buffer.retain(|buf| {
131
0
                    i += 1;
132
0
                    debug_assert!(buf.len() == 0 || i > nclear);
133
0
                    i > nclear
134
0
                });
135
0
                self.bottom_group = self.oldest_buffered_group;
136
0
            }
137
0
        }
138
0
        elt
139
0
    }
140
141
    /// Take the next element from the iterator, and set the done
142
    /// flag if exhausted. Must not be called after done.
143
    #[inline(always)]
144
0
    fn next_element(&mut self) -> Option<I::Item> {
145
0
        debug_assert!(!self.done);
146
0
        match self.iter.next() {
147
0
            None => { self.done = true; None }
148
0
            otherwise => otherwise,
149
        }
150
0
    }
151
152
153
    #[inline(never)]
154
0
    fn step_buffering(&mut self, client: usize) -> Option<I::Item> {
155
        // requested a later group -- walk through the current group up to
156
        // the requested group index, and buffer the elements (unless
157
        // the group is marked as dropped).
158
        // Because the `Groups` iterator is always the first to request
159
        // each group index, client is the next index efter top_group.
160
0
        debug_assert!(self.top_group + 1 == client);
161
0
        let mut group = Vec::new();
162
163
0
        if let Some(elt) = self.current_elt.take() {
164
0
            if self.top_group != self.dropped_group {
165
0
                group.push(elt);
166
0
            }
167
0
        }
168
0
        let mut first_elt = None; // first element of the next group
169
170
0
        while let Some(elt) = self.next_element() {
171
0
            let key = self.key.call_mut(&elt);
172
0
            match self.current_key.take() {
173
0
                None => {}
174
0
                Some(old_key) => if old_key != key {
175
0
                    self.current_key = Some(key);
176
0
                    first_elt = Some(elt);
177
0
                    break;
178
0
                },
179
            }
180
0
            self.current_key = Some(key);
181
0
            if self.top_group != self.dropped_group {
182
0
                group.push(elt);
183
0
            }
184
        }
185
186
0
        if self.top_group != self.dropped_group {
187
0
            self.push_next_group(group);
188
0
        }
189
0
        if first_elt.is_some() {
190
0
            self.top_group += 1;
191
0
            debug_assert!(self.top_group == client);
192
0
        }
193
0
        first_elt
194
0
    }
195
196
0
    fn push_next_group(&mut self, group: Vec<I::Item>) {
197
        // When we add a new buffered group, fill up slots between oldest_buffered_group and top_group
198
0
        while self.top_group - self.bottom_group > self.buffer.len() {
199
0
            if self.buffer.is_empty() {
200
0
                self.bottom_group += 1;
201
0
                self.oldest_buffered_group += 1;
202
0
            } else {
203
0
                self.buffer.push(Vec::new().into_iter());
204
0
            }
205
        }
206
0
        self.buffer.push(group.into_iter());
207
0
        debug_assert!(self.top_group + 1 - self.bottom_group == self.buffer.len());
208
0
    }
209
210
    /// This is the immediate case, where we use no buffering
211
    #[inline]
212
0
    fn step_current(&mut self) -> Option<I::Item> {
213
0
        debug_assert!(!self.done);
214
0
        if let elt @ Some(..) = self.current_elt.take() {
215
0
            return elt;
216
0
        }
217
0
        match self.next_element() {
218
0
            None => None,
219
0
            Some(elt) => {
220
0
                let key = self.key.call_mut(&elt);
221
0
                match self.current_key.take() {
222
0
                    None => {}
223
0
                    Some(old_key) => if old_key != key {
224
0
                        self.current_key = Some(key);
225
0
                        self.current_elt = Some(elt);
226
0
                        self.top_group += 1;
227
0
                        return None;
228
0
                    },
229
                }
230
0
                self.current_key = Some(key);
231
0
                Some(elt)
232
            }
233
        }
234
0
    }
235
236
    /// Request the just started groups' key.
237
    ///
238
    /// `client`: Index of group
239
    ///
240
    /// **Panics** if no group key is available.
241
0
    fn group_key(&mut self, client: usize) -> K {
242
        // This can only be called after we have just returned the first
243
        // element of a group.
244
        // Perform this by simply buffering one more element, grabbing the
245
        // next key.
246
0
        debug_assert!(!self.done);
247
0
        debug_assert!(client == self.top_group);
248
0
        debug_assert!(self.current_key.is_some());
249
0
        debug_assert!(self.current_elt.is_none());
250
0
        let old_key = self.current_key.take().unwrap();
251
0
        if let Some(elt) = self.next_element() {
252
0
            let key = self.key.call_mut(&elt);
253
0
            if old_key != key {
254
0
                self.top_group += 1;
255
0
            }
256
0
            self.current_key = Some(key);
257
0
            self.current_elt = Some(elt);
258
0
        }
259
0
        old_key
260
0
    }
261
}
262
263
impl<K, I, F> GroupInner<K, I, F>
264
    where I: Iterator,
265
{
266
    /// Called when a group is dropped
267
0
    fn drop_group(&mut self, client: usize) {
268
0
        // It's only useful to track the maximal index
269
0
        if self.dropped_group == !0 || client > self.dropped_group {
270
0
            self.dropped_group = client;
271
0
        }
272
0
    }
273
}
274
275
/// `GroupBy` is the storage for the lazy grouping operation.
276
///
277
/// If the groups are consumed in their original order, or if each
278
/// group is dropped without keeping it around, then `GroupBy` uses
279
/// no allocations. It needs allocations only if several group iterators
280
/// are alive at the same time.
281
///
282
/// This type implements [`IntoIterator`] (it is **not** an iterator
283
/// itself), because the group iterators need to borrow from this
284
/// value. It should be stored in a local variable or temporary and
285
/// iterated.
286
///
287
/// See [`.group_by()`](crate::Itertools::group_by) for more information.
288
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
289
pub struct GroupBy<K, I, F>
290
    where I: Iterator,
291
{
292
    inner: RefCell<GroupInner<K, I, F>>,
293
    // the group iterator's current index. Keep this in the main value
294
    // so that simultaneous iterators all use the same state.
295
    index: Cell<usize>,
296
}
297
298
/// Create a new
299
0
pub fn new<K, J, F>(iter: J, f: F) -> GroupBy<K, J::IntoIter, F>
300
0
    where J: IntoIterator,
301
0
          F: FnMut(&J::Item) -> K,
302
0
{
303
0
    GroupBy {
304
0
        inner: RefCell::new(GroupInner {
305
0
            key: f,
306
0
            iter: iter.into_iter(),
307
0
            current_key: None,
308
0
            current_elt: None,
309
0
            done: false,
310
0
            top_group: 0,
311
0
            oldest_buffered_group: 0,
312
0
            bottom_group: 0,
313
0
            buffer: Vec::new(),
314
0
            dropped_group: !0,
315
0
        }),
316
0
        index: Cell::new(0),
317
0
    }
318
0
}
319
320
impl<K, I, F> GroupBy<K, I, F>
321
    where I: Iterator,
322
{
323
    /// `client`: Index of group that requests next element
324
0
    fn step(&self, client: usize) -> Option<I::Item>
325
0
        where F: FnMut(&I::Item) -> K,
326
0
              K: PartialEq,
327
0
    {
328
0
        self.inner.borrow_mut().step(client)
329
0
    }
330
331
    /// `client`: Index of group
332
0
    fn drop_group(&self, client: usize) {
333
0
        self.inner.borrow_mut().drop_group(client);
334
0
    }
335
}
336
337
impl<'a, K, I, F> IntoIterator for &'a GroupBy<K, I, F>
338
    where I: Iterator,
339
          I::Item: 'a,
340
          F: FnMut(&I::Item) -> K,
341
          K: PartialEq
342
{
343
    type Item = (K, Group<'a, K, I, F>);
344
    type IntoIter = Groups<'a, K, I, F>;
345
346
0
    fn into_iter(self) -> Self::IntoIter {
347
0
        Groups { parent: self }
348
0
    }
349
}
350
351
352
/// An iterator that yields the Group iterators.
353
///
354
/// Iterator element type is `(K, Group)`:
355
/// the group's key `K` and the group's iterator.
356
///
357
/// See [`.group_by()`](crate::Itertools::group_by) for more information.
358
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
359
pub struct Groups<'a, K: 'a, I: 'a, F: 'a>
360
    where I: Iterator,
361
          I::Item: 'a
362
{
363
    parent: &'a GroupBy<K, I, F>,
364
}
365
366
impl<'a, K, I, F> Iterator for Groups<'a, K, I, F>
367
    where I: Iterator,
368
          I::Item: 'a,
369
          F: FnMut(&I::Item) -> K,
370
          K: PartialEq
371
{
372
    type Item = (K, Group<'a, K, I, F>);
373
374
    #[inline]
375
0
    fn next(&mut self) -> Option<Self::Item> {
376
0
        let index = self.parent.index.get();
377
0
        self.parent.index.set(index + 1);
378
0
        let inner = &mut *self.parent.inner.borrow_mut();
379
0
        inner.step(index).map(|elt| {
380
0
            let key = inner.group_key(index);
381
0
            (key, Group {
382
0
                parent: self.parent,
383
0
                index,
384
0
                first: Some(elt),
385
0
            })
386
0
        })
387
0
    }
388
}
389
390
/// An iterator for the elements in a single group.
391
///
392
/// Iterator element type is `I::Item`.
393
pub struct Group<'a, K: 'a, I: 'a, F: 'a>
394
    where I: Iterator,
395
          I::Item: 'a,
396
{
397
    parent: &'a GroupBy<K, I, F>,
398
    index: usize,
399
    first: Option<I::Item>,
400
}
401
402
impl<'a, K, I, F> Drop for Group<'a, K, I, F>
403
    where I: Iterator,
404
          I::Item: 'a,
405
{
406
0
    fn drop(&mut self) {
407
0
        self.parent.drop_group(self.index);
408
0
    }
409
}
410
411
impl<'a, K, I, F> Iterator for Group<'a, K, I, F>
412
    where I: Iterator,
413
          I::Item: 'a,
414
          F: FnMut(&I::Item) -> K,
415
          K: PartialEq,
416
{
417
    type Item = I::Item;
418
    #[inline]
419
0
    fn next(&mut self) -> Option<Self::Item> {
420
0
        if let elt @ Some(..) = self.first.take() {
421
0
            return elt;
422
0
        }
423
0
        self.parent.step(self.index)
424
0
    }
425
}
426
427
///// IntoChunks /////
428
429
/// Create a new
430
0
pub fn new_chunks<J>(iter: J, size: usize) -> IntoChunks<J::IntoIter>
431
0
    where J: IntoIterator,
432
0
{
433
0
    IntoChunks {
434
0
        inner: RefCell::new(GroupInner {
435
0
            key: ChunkIndex::new(size),
436
0
            iter: iter.into_iter(),
437
0
            current_key: None,
438
0
            current_elt: None,
439
0
            done: false,
440
0
            top_group: 0,
441
0
            oldest_buffered_group: 0,
442
0
            bottom_group: 0,
443
0
            buffer: Vec::new(),
444
0
            dropped_group: !0,
445
0
        }),
446
0
        index: Cell::new(0),
447
0
    }
448
0
}
449
450
451
/// `ChunkLazy` is the storage for a lazy chunking operation.
452
///
453
/// `IntoChunks` behaves just like `GroupBy`: it is iterable, and
454
/// it only buffers if several chunk iterators are alive at the same time.
455
///
456
/// This type implements [`IntoIterator`] (it is **not** an iterator
457
/// itself), because the chunk iterators need to borrow from this
458
/// value. It should be stored in a local variable or temporary and
459
/// iterated.
460
///
461
/// Iterator element type is `Chunk`, each chunk's iterator.
462
///
463
/// See [`.chunks()`](crate::Itertools::chunks) for more information.
464
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
465
pub struct IntoChunks<I>
466
    where I: Iterator,
467
{
468
    inner: RefCell<GroupInner<usize, I, ChunkIndex>>,
469
    // the chunk iterator's current index. Keep this in the main value
470
    // so that simultaneous iterators all use the same state.
471
    index: Cell<usize>,
472
}
473
474
impl<I> Clone for IntoChunks<I>
475
    where I: Clone + Iterator,
476
          I::Item: Clone,
477
{
478
    clone_fields!(inner, index);
479
}
480
481
482
impl<I> IntoChunks<I>
483
    where I: Iterator,
484
{
485
    /// `client`: Index of chunk that requests next element
486
0
    fn step(&self, client: usize) -> Option<I::Item> {
487
0
        self.inner.borrow_mut().step(client)
488
0
    }
489
490
    /// `client`: Index of chunk
491
0
    fn drop_group(&self, client: usize) {
492
0
        self.inner.borrow_mut().drop_group(client);
493
0
    }
494
}
495
496
impl<'a, I> IntoIterator for &'a IntoChunks<I>
497
    where I: Iterator,
498
          I::Item: 'a,
499
{
500
    type Item = Chunk<'a, I>;
501
    type IntoIter = Chunks<'a, I>;
502
503
0
    fn into_iter(self) -> Self::IntoIter {
504
0
        Chunks {
505
0
            parent: self,
506
0
        }
507
0
    }
508
}
509
510
511
/// An iterator that yields the Chunk iterators.
512
///
513
/// Iterator element type is `Chunk`.
514
///
515
/// See [`.chunks()`](crate::Itertools::chunks) for more information.
516
#[must_use = "iterator adaptors are lazy and do nothing unless consumed"]
517
0
#[derive(Clone)]
518
pub struct Chunks<'a, I: 'a>
519
    where I: Iterator,
520
          I::Item: 'a,
521
{
522
    parent: &'a IntoChunks<I>,
523
}
524
525
impl<'a, I> Iterator for Chunks<'a, I>
526
    where I: Iterator,
527
          I::Item: 'a,
528
{
529
    type Item = Chunk<'a, I>;
530
531
    #[inline]
532
0
    fn next(&mut self) -> Option<Self::Item> {
533
0
        let index = self.parent.index.get();
534
0
        self.parent.index.set(index + 1);
535
0
        let inner = &mut *self.parent.inner.borrow_mut();
536
0
        inner.step(index).map(|elt| {
537
0
            Chunk {
538
0
                parent: self.parent,
539
0
                index,
540
0
                first: Some(elt),
541
0
            }
542
0
        })
543
0
    }
544
}
545
546
/// An iterator for the elements in a single chunk.
547
///
548
/// Iterator element type is `I::Item`.
549
pub struct Chunk<'a, I: 'a>
550
    where I: Iterator,
551
          I::Item: 'a,
552
{
553
    parent: &'a IntoChunks<I>,
554
    index: usize,
555
    first: Option<I::Item>,
556
}
557
558
impl<'a, I> Drop for Chunk<'a, I>
559
    where I: Iterator,
560
          I::Item: 'a,
561
{
562
0
    fn drop(&mut self) {
563
0
        self.parent.drop_group(self.index);
564
0
    }
565
}
566
567
impl<'a, I> Iterator for Chunk<'a, I>
568
    where I: Iterator,
569
          I::Item: 'a,
570
{
571
    type Item = I::Item;
572
    #[inline]
573
0
    fn next(&mut self) -> Option<Self::Item> {
574
0
        if let elt @ Some(..) = self.first.take() {
575
0
            return elt;
576
0
        }
577
0
        self.parent.step(self.index)
578
0
    }
579
}