Coverage Report

Created: 2025-08-26 07:04

/src/bson-rust/src/raw/iter.rs
Line
Count
Source (jump to first uncovered line)
1
use std::convert::TryInto;
2
3
use crate::{
4
    oid::ObjectId,
5
    raw::{CStr, Error, Result, MIN_BSON_DOCUMENT_SIZE, MIN_CODE_WITH_SCOPE_SIZE},
6
    spec::{BinarySubtype, ElementType},
7
    Bson,
8
    DateTime,
9
    Decimal128,
10
    RawArray,
11
    RawBinaryRef,
12
    RawBson,
13
    RawDbPointerRef,
14
    RawJavaScriptCodeWithScopeRef,
15
    RawRegexRef,
16
    Timestamp,
17
};
18
19
use super::{
20
    bool_from_slice,
21
    checked_add,
22
    f64_from_slice,
23
    i32_from_slice,
24
    i64_from_slice,
25
    read_len,
26
    read_lenencode,
27
    read_lenencode_bytes,
28
    try_to_str,
29
    RawBsonRef,
30
    RawDocument,
31
};
32
33
/// An iterator over the document's entries.
34
pub struct Iter<'a> {
35
    inner: RawIter<'a>,
36
}
37
38
impl<'a> Iter<'a> {
39
243k
    pub(crate) fn new(doc: &'a RawDocument) -> Self {
40
243k
        Iter {
41
243k
            inner: RawIter::new(doc),
42
243k
        }
43
243k
    }
44
}
45
46
impl<'a> Iterator for Iter<'a> {
47
    type Item = Result<(&'a CStr, RawBsonRef<'a>)>;
48
49
4.43M
    fn next(&mut self) -> Option<Result<(&'a CStr, RawBsonRef<'a>)>> {
50
4.43M
        match self.inner.next() {
51
4.18M
            Some(Ok(elem)) => match elem.value() {
52
217k
                Err(e) => Some(Err(e)),
53
3.96M
                Ok(value) => Some(Ok((elem.key, value))),
54
            },
55
7.72k
            Some(Err(e)) => Some(Err(e)),
56
237k
            None => None,
57
        }
58
4.43M
    }
59
}
60
61
/// An iterator over the document's elements.
62
pub struct RawIter<'a> {
63
    doc: &'a RawDocument,
64
    offset: usize,
65
66
    /// Whether the underlying doc is assumed to be valid or if an error has been encountered.
67
    /// After an error, all subsequent iterations will return None.
68
    valid: bool,
69
}
70
71
impl<'a> RawIter<'a> {
72
485k
    pub(crate) fn new(doc: &'a RawDocument) -> Self {
73
485k
        Self {
74
485k
            doc,
75
485k
            offset: 4,
76
485k
            valid: true,
77
485k
        }
78
485k
    }
79
80
10.1M
    fn verify_enough_bytes(&self, start: usize, num_bytes: usize) -> Result<()> {
81
10.1M
        let end = checked_add(start, num_bytes)?;
82
10.1M
        if self.doc.as_bytes().get(start..end).is_none() {
83
2.45k
            return Err(Error::malformed_bytes(format!(
84
2.45k
                "length exceeds remaining length of buffer: {} vs {}",
85
2.45k
                num_bytes,
86
2.45k
                self.doc.as_bytes().len() - start
87
2.45k
            )));
88
10.1M
        }
89
10.1M
        Ok(())
90
10.1M
    }
91
92
494k
    fn next_document_len(&self, starting_at: usize) -> Result<usize> {
93
494k
        self.verify_enough_bytes(starting_at, MIN_BSON_DOCUMENT_SIZE as usize)?;
94
494k
        let size = i32_from_slice(&self.doc.as_bytes()[starting_at..])? as usize;
95
96
494k
        if size < MIN_BSON_DOCUMENT_SIZE as usize {
97
59
            return Err(Error::malformed_bytes(format!(
98
59
                "document too small: {} bytes",
99
59
                size
100
59
            )));
101
494k
        }
102
494k
103
494k
        self.verify_enough_bytes(starting_at, size)?;
104
105
493k
        if self.doc.as_bytes()[starting_at + size - 1] != 0 {
106
220
            return Err(Error::malformed_bytes("not null terminated"));
107
493k
        }
108
493k
        Ok(size)
109
494k
    }
110
}
111
112
/// A view into a value contained in a [`RawDocument`] or [`RawDocumentBuf`](crate::RawDocumentBuf).
113
/// The underlying bytes of the element are not parsed or validated; call [`RawElement::value`] or
114
/// one of the `TryFrom` implementations to convert the element into a BSON value.
115
#[derive(Clone)]
116
pub struct RawElement<'a> {
117
    key: &'a CStr,
118
    kind: ElementType,
119
    doc: &'a RawDocument,
120
    start_at: usize,
121
    size: usize,
122
}
123
124
impl<'a> TryFrom<RawElement<'a>> for RawBsonRef<'a> {
125
    type Error = Error;
126
127
2.53M
    fn try_from(element: RawElement<'a>) -> Result<Self> {
128
2.53M
        element.value()
129
2.53M
    }
130
}
131
132
impl TryFrom<RawElement<'_>> for RawBson {
133
    type Error = Error;
134
135
0
    fn try_from(element: RawElement<'_>) -> Result<Self> {
136
0
        Ok(element.value()?.into())
137
0
    }
138
}
139
140
impl TryFrom<RawElement<'_>> for Bson {
141
    type Error = Error;
142
143
0
    fn try_from(element: RawElement<'_>) -> Result<Self> {
144
0
        element.value()?.try_into()
145
0
    }
146
}
147
148
impl<'a> RawElement<'a> {
149
    #[cfg(feature = "serde")]
150
17.5k
    pub(crate) fn toplevel(bytes: &'a [u8]) -> Result<Self> {
151
        use crate::raw::cstr;
152
153
17.5k
        let doc = RawDocument::from_bytes(bytes)?;
154
17.3k
        Ok(Self {
155
17.3k
            key: cstr!("TOPLEVEL"),
156
17.3k
            kind: ElementType::EmbeddedDocument,
157
17.3k
            doc,
158
17.3k
            start_at: 0,
159
17.3k
            size: doc.as_bytes().len(),
160
17.3k
        })
161
17.5k
    }
162
163
    /// The size of the element.
164
0
    pub fn size(&self) -> usize {
165
0
        self.size
166
0
    }
167
168
    /// The document key the element corresponds to.
169
1.87M
    pub fn key(&self) -> &'a CStr {
170
1.87M
        self.key
171
1.87M
    }
172
173
    /// The type of the element.
174
0
    pub fn element_type(&self) -> ElementType {
175
0
        self.kind
176
0
    }
177
178
    /// Parses this element into a [`RawBsonRef`] and returns an error if the underlying bytes are
179
    /// invalid.
180
8.91M
    pub fn value(&self) -> Result<RawBsonRef<'a>> {
181
8.91M
        Ok(match self.kind {
182
963k
            ElementType::Null => RawBsonRef::Null,
183
919k
            ElementType::Undefined => RawBsonRef::Undefined,
184
567k
            ElementType::MinKey => RawBsonRef::MinKey,
185
431k
            ElementType::MaxKey => RawBsonRef::MaxKey,
186
227k
            ElementType::ObjectId => RawBsonRef::ObjectId(self.get_oid_at(self.start_at)?),
187
225k
            ElementType::Int32 => RawBsonRef::Int32(i32_from_slice(self.slice())?),
188
152k
            ElementType::Int64 => RawBsonRef::Int64(i64_from_slice(self.slice())?),
189
484k
            ElementType::Double => RawBsonRef::Double(f64_from_slice(self.slice())?),
190
115k
            ElementType::String => RawBsonRef::String(self.read_str()?),
191
            ElementType::EmbeddedDocument => {
192
338k
                RawBsonRef::Document(RawDocument::from_bytes(self.slice())?)
193
            }
194
            ElementType::Array => {
195
172k
                RawBsonRef::Array(RawArray::from_doc(RawDocument::from_bytes(self.slice())?))
196
            }
197
            ElementType::Boolean => RawBsonRef::Boolean(
198
1.12M
                bool_from_slice(self.slice()).map_err(|e| self.malformed_error(e))?,
199
            ),
200
            ElementType::DateTime => {
201
451k
                RawBsonRef::DateTime(DateTime::from_millis(i64_from_slice(self.slice())?))
202
            }
203
            ElementType::Decimal128 => RawBsonRef::Decimal128(Decimal128::from_bytes(
204
156k
                self.slice()
205
156k
                    .try_into()
206
156k
                    .map_err(|e| self.malformed_error(e))?,
207
            )),
208
170k
            ElementType::JavaScriptCode => RawBsonRef::JavaScriptCode(self.read_str()?),
209
112k
            ElementType::Symbol => RawBsonRef::Symbol(self.read_str()?),
210
            ElementType::DbPointer => RawBsonRef::DbPointer(RawDbPointerRef {
211
135k
                namespace: read_lenencode(self.slice())?,
212
121k
                id: self.get_oid_at(self.start_at + (self.size - 12))?,
213
            }),
214
            ElementType::RegularExpression => {
215
921k
                let pattern = self.doc.read_cstring_at(self.start_at)?;
216
                RawBsonRef::RegularExpression(RawRegexRef {
217
921k
                    pattern,
218
921k
                    options: self
219
921k
                        .doc
220
921k
                        .read_cstring_at(self.start_at + pattern.len() + 1)?,
221
                })
222
            }
223
            ElementType::Timestamp => RawBsonRef::Timestamp({
224
166k
                let bytes: [u8; 8] = self.slice()[0..8]
225
166k
                    .try_into()
226
166k
                    .map_err(|e| self.malformed_error(e))?;
227
166k
                Timestamp::from_le_bytes(bytes)
228
            }),
229
            ElementType::Binary => {
230
719k
                let len = self.size.checked_sub(4 + 1).ok_or_else(|| {
231
0
                    self.malformed_error(format!("length exceeds maximum: {}", self.size))
232
719k
                })?;
233
234
719k
                let data_start = self.start_at + 4 + 1;
235
719k
236
719k
                if self.size >= i32::MAX as usize {
237
0
                    return Err(
238
0
                        self.malformed_error(format!("binary length exceeds maximum: {}", len))
239
0
                    );
240
719k
                }
241
719k
242
719k
                let subtype = BinarySubtype::from(self.doc.as_bytes()[self.start_at + 4]);
243
719k
                let data = match subtype {
244
                    BinarySubtype::BinaryOld => {
245
168k
                        if len < 4 {
246
6.05k
                            return Err(self.malformed_error(
247
6.05k
                                "old binary subtype has no inner declared length",
248
6.05k
                            ));
249
162k
                        }
250
162k
                        let oldlength =
251
162k
                            i32_from_slice(&self.doc.as_bytes()[data_start..])? as usize;
252
162k
                        if checked_add(oldlength, 4)? != len {
253
105k
                            return Err(self.malformed_error(
254
105k
                                "old binary subtype has wrong inner declared length",
255
105k
                            ));
256
41.5k
                        }
257
41.5k
                        self.slice_bounds(data_start + 4, len - 4)
258
                    }
259
551k
                    _ => self.slice_bounds(data_start, len),
260
                };
261
593k
                RawBsonRef::Binary(RawBinaryRef {
262
593k
                    subtype,
263
593k
                    bytes: data,
264
593k
                })
265
            }
266
            ElementType::JavaScriptCodeWithScope => {
267
353k
                if self.size < MIN_CODE_WITH_SCOPE_SIZE as usize {
268
16.2k
                    return Err(self.malformed_error("code with scope length too small"));
269
337k
                }
270
337k
271
337k
                let slice = self.slice();
272
337k
                let code = read_lenencode(&slice[4..])?;
273
133k
                let scope_start = 4 + 4 + code.len() + 1;
274
133k
                let scope = RawDocument::from_bytes(&slice[scope_start..])?;
275
276
44.8k
                RawBsonRef::JavaScriptCodeWithScope(RawJavaScriptCodeWithScopeRef { code, scope })
277
            }
278
        })
279
8.91M
    }
280
281
    /// Parses this element into [`RawBson`], replacing any invalid UTF-8 strings with the Unicode
282
    /// replacement character. Returns an error if the underlying bytes are invalid.
283
0
    pub fn value_utf8_lossy(&self) -> Result<RawBson> {
284
0
        match self.value_utf8_lossy_inner()? {
285
0
            Some(v) => Ok(v.into()),
286
0
            None => Ok(self.value()?.into()),
287
        }
288
0
    }
289
290
1.07M
    pub(crate) fn value_utf8_lossy_inner(&self) -> Result<Option<Utf8LossyBson<'a>>> {
291
1.07M
        Ok(Some(match self.kind {
292
18.2k
            ElementType::String => Utf8LossyBson::String(self.read_utf8_lossy()),
293
19.3k
            ElementType::JavaScriptCode => Utf8LossyBson::JavaScriptCode(self.read_utf8_lossy()),
294
            ElementType::JavaScriptCodeWithScope => {
295
7.63k
                if self.size < MIN_CODE_WITH_SCOPE_SIZE as usize {
296
23
                    return Err(self.malformed_error("code with scope length too small"));
297
7.60k
                }
298
7.60k
299
7.60k
                let slice = self.slice();
300
7.60k
                let code = String::from_utf8_lossy(read_lenencode_bytes(&slice[4..])?).into_owned();
301
7.59k
                let scope_start = 4 + 4 + code.len() + 1;
302
7.59k
                if scope_start >= slice.len() {
303
13
                    return Err(self.malformed_error("code with scope length overrun"));
304
7.57k
                }
305
7.57k
                let scope = RawDocument::from_bytes(&slice[scope_start..])?;
306
307
7.50k
                Utf8LossyBson::JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope {
308
7.50k
                    code,
309
7.50k
                    scope,
310
7.50k
                })
311
            }
312
3.07k
            ElementType::Symbol => Utf8LossyBson::Symbol(self.read_utf8_lossy()),
313
            ElementType::DbPointer => Utf8LossyBson::DbPointer(crate::DbPointer {
314
25.1k
                namespace: String::from_utf8_lossy(read_lenencode_bytes(self.slice())?)
315
25.1k
                    .into_owned(),
316
25.1k
                id: self.get_oid_at(self.start_at + (self.size - 12))?,
317
            }),
318
            ElementType::RegularExpression => {
319
215k
                let pattern =
320
215k
                    String::from_utf8_lossy(self.doc.cstring_bytes_at(self.start_at)?).into_owned();
321
215k
                let pattern_len = pattern.len();
322
215k
                Utf8LossyBson::RegularExpression(crate::Regex {
323
215k
                    pattern: pattern.try_into()?,
324
                    options: String::from_utf8_lossy(
325
215k
                        self.doc.cstring_bytes_at(self.start_at + pattern_len + 1)?,
326
                    )
327
215k
                    .into_owned()
328
215k
                    .try_into()?,
329
                })
330
            }
331
784k
            _ => return Ok(None),
332
        }))
333
1.07M
    }
334
335
673k
    fn malformed_error(&self, e: impl ToString) -> Error {
336
673k
        Error::malformed_bytes(e).with_key(self.key.as_str())
337
673k
    }
<bson::raw::iter::RawElement>::malformed_error::<bson::error::Error>
Line
Count
Source
335
545k
    fn malformed_error(&self, e: impl ToString) -> Error {
336
545k
        Error::malformed_bytes(e).with_key(self.key.as_str())
337
545k
    }
Unexecuted instantiation: <bson::raw::iter::RawElement>::malformed_error::<alloc::string::String>
Unexecuted instantiation: <bson::raw::iter::RawElement>::malformed_error::<core::array::TryFromSliceError>
<bson::raw::iter::RawElement>::malformed_error::<&str>
Line
Count
Source
335
128k
    fn malformed_error(&self, e: impl ToString) -> Error {
336
128k
        Error::malformed_bytes(e).with_key(self.key.as_str())
337
128k
    }
338
339
3.78M
    pub(crate) fn slice(&self) -> &'a [u8] {
340
3.78M
        self.slice_bounds(self.start_at, self.size)
341
3.78M
    }
342
343
4.81M
    fn slice_bounds(&self, start_at: usize, size: usize) -> &'a [u8] {
344
4.81M
        &self.doc.as_bytes()[start_at..(start_at + size)]
345
4.81M
    }
346
347
440k
    fn str_bytes(&self) -> &'a [u8] {
348
440k
        self.slice_bounds(self.start_at + 4, self.size - 4 - 1)
349
440k
    }
350
351
399k
    fn read_str(&self) -> Result<&'a str> {
352
399k
        try_to_str(self.str_bytes())
353
399k
    }
354
355
40.7k
    fn read_utf8_lossy(&self) -> String {
356
40.7k
        String::from_utf8_lossy(self.str_bytes()).into_owned()
357
40.7k
    }
358
359
373k
    fn get_oid_at(&self, start_at: usize) -> Result<ObjectId> {
360
373k
        Ok(ObjectId::from_bytes(
361
373k
            self.doc.as_bytes()[start_at..(start_at + 12)]
362
373k
                .try_into()
363
373k
                .map_err(|e| Error::malformed_bytes(e).with_key(self.key.as_str()))?,
364
        ))
365
373k
    }
366
}
367
368
impl RawIter<'_> {
369
1.08M
    fn get_next_length_at(&self, start_at: usize) -> Result<usize> {
370
1.08M
        let len = i32_from_slice(&self.doc.as_bytes()[start_at..])?;
371
1.08M
        if len < 0 {
372
292
            Err(Error::malformed_bytes("lengths can't be negative"))
373
        } else {
374
1.08M
            Ok(len as usize)
375
        }
376
1.08M
    }
377
378
9.19M
    fn get_next_kvp(&mut self, offset: usize) -> Result<(ElementType, usize)> {
379
9.19M
        let element_type = match ElementType::from(self.doc.as_bytes()[self.offset]) {
380
9.19M
            Some(et) => et,
381
            None => {
382
3.87k
                return Err(Error::malformed_bytes(format!(
383
3.87k
                    "invalid tag: {}",
384
3.87k
                    self.doc.as_bytes()[self.offset]
385
3.87k
                )));
386
            }
387
        };
388
389
9.19M
        let element_size = match element_type {
390
1.12M
            ElementType::Boolean => 1,
391
225k
            ElementType::Int32 => 4,
392
152k
            ElementType::Int64 => 8,
393
484k
            ElementType::Double => 8,
394
452k
            ElementType::DateTime => 8,
395
166k
            ElementType::Timestamp => 8,
396
227k
            ElementType::ObjectId => 12,
397
156k
            ElementType::Decimal128 => 16,
398
963k
            ElementType::Null => 0,
399
919k
            ElementType::Undefined => 0,
400
567k
            ElementType::MinKey => 0,
401
431k
            ElementType::MaxKey => 0,
402
134k
            ElementType::String => read_len(&self.doc.as_bytes()[offset..])?,
403
321k
            ElementType::EmbeddedDocument => self.next_document_len(offset)?,
404
173k
            ElementType::Array => self.next_document_len(offset)?,
405
720k
            ElementType::Binary => self.get_next_length_at(offset)? + 4 + 1,
406
            ElementType::RegularExpression => {
407
1.13M
                let pattern = self.doc.read_cstring_at(offset)?;
408
1.13M
                let options = self.doc.read_cstring_at(offset + pattern.len() + 1)?;
409
1.13M
                pattern.len() + 1 + options.len() + 1
410
            }
411
161k
            ElementType::DbPointer => read_len(&self.doc.as_bytes()[offset..])? + 12,
412
116k
            ElementType::Symbol => read_len(&self.doc.as_bytes()[offset..])?,
413
190k
            ElementType::JavaScriptCode => read_len(&self.doc.as_bytes()[offset..])?,
414
361k
            ElementType::JavaScriptCodeWithScope => self.get_next_length_at(offset)?,
415
        };
416
417
9.18M
        self.verify_enough_bytes(offset, element_size)?;
418
9.18M
        self.offset = offset + element_size;
419
9.18M
420
9.18M
        Ok((element_type, element_size))
421
9.19M
    }
422
}
423
424
impl<'a> Iterator for RawIter<'a> {
425
    type Item = Result<RawElement<'a>>;
426
427
9.60M
    fn next(&mut self) -> Option<Result<RawElement<'a>>> {
428
9.60M
        if !self.valid {
429
7.71k
            return None;
430
9.59M
        } else if self.offset == self.doc.as_bytes().len() - 1 {
431
391k
            if self.doc.as_bytes()[self.offset] == 0 {
432
                // end of document marker
433
391k
                return None;
434
            } else {
435
0
                self.valid = false;
436
0
                return Some(Err(Error::malformed_bytes("document not null terminated")));
437
            }
438
9.20M
        } else if self.offset >= self.doc.as_bytes().len() {
439
6.61k
            self.valid = false;
440
6.61k
            return Some(Err(Error::malformed_bytes("iteration overflowed document")));
441
9.19M
        }
442
443
9.19M
        let key = match self.doc.read_cstring_at(self.offset + 1) {
444
9.19M
            Ok(k) => k,
445
1.77k
            Err(e) => {
446
1.77k
                self.valid = false;
447
1.77k
                return Some(Err(e));
448
            }
449
        };
450
9.19M
        let offset = self.offset + 1 + key.len() + 1; // type specifier + key + \0
451
9.19M
452
9.19M
        Some(match self.get_next_kvp(offset) {
453
9.18M
            Ok((kind, size)) => Ok(RawElement {
454
9.18M
                key,
455
9.18M
                kind,
456
9.18M
                doc: self.doc,
457
9.18M
                start_at: offset,
458
9.18M
                size,
459
9.18M
            }),
460
9.66k
            Err(error) => {
461
9.66k
                self.valid = false;
462
9.66k
                Err(error.with_key(key.as_str()))
463
            }
464
        })
465
9.60M
    }
466
}
467
468
pub(crate) enum Utf8LossyBson<'a> {
469
    String(String),
470
    JavaScriptCode(String),
471
    JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope<'a>),
472
    Symbol(String),
473
    DbPointer(crate::DbPointer),
474
    RegularExpression(crate::Regex),
475
}
476
477
pub(crate) struct Utf8LossyJavaScriptCodeWithScope<'a> {
478
    pub(crate) code: String,
479
    pub(crate) scope: &'a RawDocument,
480
}
481
482
impl<'a> From<Utf8LossyBson<'a>> for RawBson {
483
0
    fn from(value: Utf8LossyBson<'a>) -> Self {
484
0
        match value {
485
0
            Utf8LossyBson::String(s) => RawBson::String(s),
486
0
            Utf8LossyBson::JavaScriptCode(s) => RawBson::JavaScriptCode(s),
487
            Utf8LossyBson::JavaScriptCodeWithScope(Utf8LossyJavaScriptCodeWithScope {
488
0
                code,
489
0
                scope,
490
0
            }) => RawBson::JavaScriptCodeWithScope(super::RawJavaScriptCodeWithScope {
491
0
                code,
492
0
                scope: scope.to_owned(),
493
0
            }),
494
0
            Utf8LossyBson::Symbol(s) => RawBson::Symbol(s),
495
0
            Utf8LossyBson::DbPointer(p) => RawBson::DbPointer(p),
496
0
            Utf8LossyBson::RegularExpression(r) => RawBson::RegularExpression(r),
497
        }
498
0
    }
499
}