Coverage Report

Created: 2025-10-10 06:03

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/json/src/read.rs
Line
Count
Source
1
use crate::error::{Error, ErrorCode, Result};
2
use alloc::vec::Vec;
3
use core::cmp;
4
use core::mem;
5
use core::ops::Deref;
6
use core::str;
7
8
#[cfg(feature = "std")]
9
use crate::io;
10
#[cfg(feature = "std")]
11
use crate::iter::LineColIterator;
12
13
#[cfg(feature = "raw_value")]
14
use crate::raw::BorrowedRawDeserializer;
15
#[cfg(all(feature = "raw_value", feature = "std"))]
16
use crate::raw::OwnedRawDeserializer;
17
#[cfg(all(feature = "raw_value", feature = "std"))]
18
use alloc::string::String;
19
#[cfg(feature = "raw_value")]
20
use serde::de::Visitor;
21
22
/// Trait used by the deserializer for iterating over input. This is manually
23
/// "specialized" for iterating over `&[u8]`. Once feature(specialization) is
24
/// stable we can use actual specialization.
25
///
26
/// This trait is sealed and cannot be implemented for types outside of
27
/// `serde_json`.
28
pub trait Read<'de>: private::Sealed {
29
    #[doc(hidden)]
30
    fn next(&mut self) -> Result<Option<u8>>;
31
    #[doc(hidden)]
32
    fn peek(&mut self) -> Result<Option<u8>>;
33
34
    /// Only valid after a call to peek(). Discards the peeked byte.
35
    #[doc(hidden)]
36
    fn discard(&mut self);
37
38
    /// Position of the most recent call to next().
39
    ///
40
    /// The most recent call was probably next() and not peek(), but this method
41
    /// should try to return a sensible result if the most recent call was
42
    /// actually peek() because we don't always know.
43
    ///
44
    /// Only called in case of an error, so performance is not important.
45
    #[doc(hidden)]
46
    fn position(&self) -> Position;
47
48
    /// Position of the most recent call to peek().
49
    ///
50
    /// The most recent call was probably peek() and not next(), but this method
51
    /// should try to return a sensible result if the most recent call was
52
    /// actually next() because we don't always know.
53
    ///
54
    /// Only called in case of an error, so performance is not important.
55
    #[doc(hidden)]
56
    fn peek_position(&self) -> Position;
57
58
    /// Offset from the beginning of the input to the next byte that would be
59
    /// returned by next() or peek().
60
    #[doc(hidden)]
61
    fn byte_offset(&self) -> usize;
62
63
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64
    /// string until the next quotation mark using the given scratch space if
65
    /// necessary. The scratch space is initially empty.
66
    #[doc(hidden)]
67
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70
    /// string until the next quotation mark using the given scratch space if
71
    /// necessary. The scratch space is initially empty.
72
    ///
73
    /// This function returns the raw bytes in the string with escape sequences
74
    /// expanded but without performing unicode validation.
75
    #[doc(hidden)]
76
    fn parse_str_raw<'s>(
77
        &'s mut self,
78
        scratch: &'s mut Vec<u8>,
79
    ) -> Result<Reference<'de, 's, [u8]>>;
80
81
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82
    /// string until the next quotation mark but discards the data.
83
    #[doc(hidden)]
84
    fn ignore_str(&mut self) -> Result<()>;
85
86
    /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87
    /// Parses next hexadecimal sequence.
88
    #[doc(hidden)]
89
    fn decode_hex_escape(&mut self) -> Result<u16>;
90
91
    /// Switch raw buffering mode on.
92
    ///
93
    /// This is used when deserializing `RawValue`.
94
    #[cfg(feature = "raw_value")]
95
    #[doc(hidden)]
96
    fn begin_raw_buffering(&mut self);
97
98
    /// Switch raw buffering mode off and provides the raw buffered data to the
99
    /// given visitor.
100
    #[cfg(feature = "raw_value")]
101
    #[doc(hidden)]
102
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103
    where
104
        V: Visitor<'de>;
105
106
    /// Whether StreamDeserializer::next needs to check the failed flag. True
107
    /// for IoRead, false for StrRead and SliceRead which can track failure by
108
    /// truncating their input slice to avoid the extra check on every next
109
    /// call.
110
    #[doc(hidden)]
111
    const should_early_return_if_failed: bool;
112
113
    /// Mark a persistent failure of StreamDeserializer, either by setting the
114
    /// flag or by truncating the input data.
115
    #[doc(hidden)]
116
    fn set_failed(&mut self, failed: &mut bool);
117
}
118
119
pub struct Position {
120
    pub line: usize,
121
    pub column: usize,
122
}
123
124
pub enum Reference<'b, 'c, T>
125
where
126
    T: ?Sized + 'static,
127
{
128
    Borrowed(&'b T),
129
    Copied(&'c T),
130
}
131
132
impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133
where
134
    T: ?Sized + 'static,
135
{
136
    type Target = T;
137
138
0
    fn deref(&self) -> &Self::Target {
139
0
        match *self {
140
0
            Reference::Borrowed(b) => b,
141
0
            Reference::Copied(c) => c,
142
        }
143
0
    }
144
}
145
146
/// JSON input source that reads from a std::io input stream.
147
#[cfg(feature = "std")]
148
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149
pub struct IoRead<R>
150
where
151
    R: io::Read,
152
{
153
    iter: LineColIterator<io::Bytes<R>>,
154
    /// Temporary storage of peeked byte.
155
    ch: Option<u8>,
156
    #[cfg(feature = "raw_value")]
157
    raw_buffer: Option<Vec<u8>>,
158
}
159
160
/// JSON input source that reads from a slice of bytes.
161
//
162
// This is more efficient than other iterators because peek() can be read-only
163
// and we can compute line/col position only if an error happens.
164
pub struct SliceRead<'a> {
165
    slice: &'a [u8],
166
    /// Index of the *next* byte that will be returned by next() or peek().
167
    index: usize,
168
    #[cfg(feature = "raw_value")]
169
    raw_buffering_start_index: usize,
170
}
171
172
/// JSON input source that reads from a UTF-8 string.
173
//
174
// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
175
pub struct StrRead<'a> {
176
    delegate: SliceRead<'a>,
177
    #[cfg(feature = "raw_value")]
178
    data: &'a str,
179
}
180
181
// Prevent users from implementing the Read trait.
182
mod private {
183
    pub trait Sealed {}
184
}
185
186
//////////////////////////////////////////////////////////////////////////////
187
188
#[cfg(feature = "std")]
189
impl<R> IoRead<R>
190
where
191
    R: io::Read,
192
{
193
    /// Create a JSON input source to read from a std::io input stream.
194
    ///
195
    /// When reading from a source against which short reads are not efficient, such
196
    /// as a [`File`], you will want to apply your own buffering because serde_json
197
    /// will not buffer the input. See [`std::io::BufReader`].
198
    ///
199
    /// [`File`]: std::fs::File
200
    pub fn new(reader: R) -> Self {
201
        IoRead {
202
            iter: LineColIterator::new(reader.bytes()),
203
            ch: None,
204
            #[cfg(feature = "raw_value")]
205
            raw_buffer: None,
206
        }
207
    }
208
}
209
210
#[cfg(feature = "std")]
211
impl<R> private::Sealed for IoRead<R> where R: io::Read {}
212
213
#[cfg(feature = "std")]
214
impl<R> IoRead<R>
215
where
216
    R: io::Read,
217
{
218
    fn parse_str_bytes<'s, T, F>(
219
        &'s mut self,
220
        scratch: &'s mut Vec<u8>,
221
        validate: bool,
222
        result: F,
223
    ) -> Result<T>
224
    where
225
        T: 's,
226
        F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
227
    {
228
        loop {
229
            let ch = tri!(next_or_eof(self));
230
            if !is_escape(ch, true) {
231
                scratch.push(ch);
232
                continue;
233
            }
234
            match ch {
235
                b'"' => {
236
                    return result(self, scratch);
237
                }
238
                b'\\' => {
239
                    tri!(parse_escape(self, validate, scratch));
240
                }
241
                _ => {
242
                    if validate {
243
                        return error(self, ErrorCode::ControlCharacterWhileParsingString);
244
                    }
245
                    scratch.push(ch);
246
                }
247
            }
248
        }
249
    }
250
}
251
252
#[cfg(feature = "std")]
253
impl<'de, R> Read<'de> for IoRead<R>
254
where
255
    R: io::Read,
256
{
257
    #[inline]
258
    fn next(&mut self) -> Result<Option<u8>> {
259
        match self.ch.take() {
260
            Some(ch) => {
261
                #[cfg(feature = "raw_value")]
262
                {
263
                    if let Some(buf) = &mut self.raw_buffer {
264
                        buf.push(ch);
265
                    }
266
                }
267
                Ok(Some(ch))
268
            }
269
            None => match self.iter.next() {
270
                Some(Err(err)) => Err(Error::io(err)),
271
                Some(Ok(ch)) => {
272
                    #[cfg(feature = "raw_value")]
273
                    {
274
                        if let Some(buf) = &mut self.raw_buffer {
275
                            buf.push(ch);
276
                        }
277
                    }
278
                    Ok(Some(ch))
279
                }
280
                None => Ok(None),
281
            },
282
        }
283
    }
284
285
    #[inline]
286
    fn peek(&mut self) -> Result<Option<u8>> {
287
        match self.ch {
288
            Some(ch) => Ok(Some(ch)),
289
            None => match self.iter.next() {
290
                Some(Err(err)) => Err(Error::io(err)),
291
                Some(Ok(ch)) => {
292
                    self.ch = Some(ch);
293
                    Ok(self.ch)
294
                }
295
                None => Ok(None),
296
            },
297
        }
298
    }
299
300
    #[cfg(not(feature = "raw_value"))]
301
    #[inline]
302
    fn discard(&mut self) {
303
        self.ch = None;
304
    }
305
306
    #[cfg(feature = "raw_value")]
307
    fn discard(&mut self) {
308
        if let Some(ch) = self.ch.take() {
309
            if let Some(buf) = &mut self.raw_buffer {
310
                buf.push(ch);
311
            }
312
        }
313
    }
314
315
    fn position(&self) -> Position {
316
        Position {
317
            line: self.iter.line(),
318
            column: self.iter.col(),
319
        }
320
    }
321
322
    fn peek_position(&self) -> Position {
323
        // The LineColIterator updates its position during peek() so it has the
324
        // right one here.
325
        self.position()
326
    }
327
328
    fn byte_offset(&self) -> usize {
329
        match self.ch {
330
            Some(_) => self.iter.byte_offset() - 1,
331
            None => self.iter.byte_offset(),
332
        }
333
    }
334
335
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
336
        self.parse_str_bytes(scratch, true, as_str)
337
            .map(Reference::Copied)
338
    }
339
340
    fn parse_str_raw<'s>(
341
        &'s mut self,
342
        scratch: &'s mut Vec<u8>,
343
    ) -> Result<Reference<'de, 's, [u8]>> {
344
        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
345
            .map(Reference::Copied)
346
    }
347
348
    fn ignore_str(&mut self) -> Result<()> {
349
        loop {
350
            let ch = tri!(next_or_eof(self));
351
            if !is_escape(ch, true) {
352
                continue;
353
            }
354
            match ch {
355
                b'"' => {
356
                    return Ok(());
357
                }
358
                b'\\' => {
359
                    tri!(ignore_escape(self));
360
                }
361
                _ => {
362
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
363
                }
364
            }
365
        }
366
    }
367
368
    fn decode_hex_escape(&mut self) -> Result<u16> {
369
        let a = tri!(next_or_eof(self));
370
        let b = tri!(next_or_eof(self));
371
        let c = tri!(next_or_eof(self));
372
        let d = tri!(next_or_eof(self));
373
        match decode_four_hex_digits(a, b, c, d) {
374
            Some(val) => Ok(val),
375
            None => error(self, ErrorCode::InvalidEscape),
376
        }
377
    }
378
379
    #[cfg(feature = "raw_value")]
380
    fn begin_raw_buffering(&mut self) {
381
        self.raw_buffer = Some(Vec::new());
382
    }
383
384
    #[cfg(feature = "raw_value")]
385
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
386
    where
387
        V: Visitor<'de>,
388
    {
389
        let raw = self.raw_buffer.take().unwrap();
390
        let raw = match String::from_utf8(raw) {
391
            Ok(raw) => raw,
392
            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
393
        };
394
        visitor.visit_map(OwnedRawDeserializer {
395
            raw_value: Some(raw),
396
        })
397
    }
398
399
    const should_early_return_if_failed: bool = true;
400
401
    #[inline]
402
    #[cold]
403
    fn set_failed(&mut self, failed: &mut bool) {
404
        *failed = true;
405
    }
406
}
407
408
//////////////////////////////////////////////////////////////////////////////
409
410
impl<'a> SliceRead<'a> {
411
    /// Create a JSON input source to read from a slice of bytes.
412
6.06k
    pub fn new(slice: &'a [u8]) -> Self {
413
6.06k
        SliceRead {
414
6.06k
            slice,
415
6.06k
            index: 0,
416
6.06k
            #[cfg(feature = "raw_value")]
417
6.06k
            raw_buffering_start_index: 0,
418
6.06k
        }
419
6.06k
    }
420
421
32.0k
    fn position_of_index(&self, i: usize) -> Position {
422
32.0k
        let start_of_line = match memchr::memrchr(b'\n', &self.slice[..i]) {
423
19.7k
            Some(position) => position + 1,
424
12.3k
            None => 0,
425
        };
426
32.0k
        Position {
427
32.0k
            line: 1 + memchr::memchr_iter(b'\n', &self.slice[..start_of_line]).count(),
428
32.0k
            column: i - start_of_line,
429
32.0k
        }
430
32.0k
    }
431
432
595k
    fn skip_to_escape(&mut self, forbid_control_characters: bool) {
433
        // Immediately bail-out on empty strings and consecutive escapes (e.g. \u041b\u0435)
434
595k
        if self.index == self.slice.len()
435
595k
            || is_escape(self.slice[self.index], forbid_control_characters)
436
        {
437
204k
            return;
438
391k
        }
439
391k
        self.index += 1;
440
441
391k
        let rest = &self.slice[self.index..];
442
443
391k
        if !forbid_control_characters {
444
0
            self.index += memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
445
0
            return;
446
391k
        }
447
448
        // We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
449
        // something akin to memchr3, but the memchr crate does not support this at the moment.
450
        // Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
451
        // than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
452
        // benchmarks and it's cross-platform, so probably the right fit.
453
        // [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
454
455
        #[cfg(fast_arithmetic = "64")]
456
        type Chunk = u64;
457
        #[cfg(fast_arithmetic = "32")]
458
        type Chunk = u32;
459
460
        const STEP: usize = mem::size_of::<Chunk>();
461
        const ONE_BYTES: Chunk = Chunk::MAX / 255; // 0x0101...01
462
463
15.9M
        for chunk in rest.chunks_exact(STEP) {
464
15.9M
            let chars = Chunk::from_le_bytes(chunk.try_into().unwrap());
465
15.9M
            let contains_ctrl = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
466
15.9M
            let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
467
15.9M
            let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
468
15.9M
            let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'\\'));
469
15.9M
            let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
470
15.9M
            let masked = (contains_ctrl | contains_quote | contains_backslash) & (ONE_BYTES << 7);
471
15.9M
            if masked != 0 {
472
                // SAFETY: chunk is in-bounds for slice
473
389k
                self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
474
389k
                    + masked.trailing_zeros() as usize / 8;
475
389k
                return;
476
15.5M
            }
477
        }
478
479
1.40k
        self.index += rest.len() / STEP * STEP;
480
1.40k
        self.skip_to_escape_slow();
481
595k
    }
482
483
    #[cold]
484
    #[inline(never)]
485
1.40k
    fn skip_to_escape_slow(&mut self) {
486
3.50k
        while self.index < self.slice.len() && !is_escape(self.slice[self.index], true) {
487
2.10k
            self.index += 1;
488
2.10k
        }
489
1.40k
    }
490
491
    /// The big optimization here over IoRead is that if the string contains no
492
    /// backslash escape sequences, the returned &str is a slice of the raw JSON
493
    /// data so we avoid copying into the scratch space.
494
312k
    fn parse_str_bytes<'s, T, F>(
495
312k
        &'s mut self,
496
312k
        scratch: &'s mut Vec<u8>,
497
312k
        validate: bool,
498
312k
        result: F,
499
312k
    ) -> Result<Reference<'a, 's, T>>
500
312k
    where
501
312k
        T: ?Sized + 's,
502
312k
        F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
503
    {
504
        // Index of the first byte not yet copied into the scratch space.
505
312k
        let mut start = self.index;
506
507
        loop {
508
595k
            self.skip_to_escape(validate);
509
595k
            if self.index == self.slice.len() {
510
842
                return error(self, ErrorCode::EofWhileParsingString);
511
594k
            }
512
594k
            match self.slice[self.index] {
513
                b'"' => {
514
310k
                    if scratch.is_empty() {
515
                        // Fast path: return a slice of the raw JSON without any
516
                        // copying.
517
294k
                        let borrowed = &self.slice[start..self.index];
518
294k
                        self.index += 1;
519
294k
                        return result(self, borrowed).map(Reference::Borrowed);
520
                    } else {
521
16.2k
                        scratch.extend_from_slice(&self.slice[start..self.index]);
522
16.2k
                        self.index += 1;
523
16.2k
                        return result(self, scratch).map(Reference::Copied);
524
                    }
525
                }
526
                b'\\' => {
527
284k
                    scratch.extend_from_slice(&self.slice[start..self.index]);
528
284k
                    self.index += 1;
529
284k
                    tri!(parse_escape(self, validate, scratch));
530
283k
                    start = self.index;
531
                }
532
                _ => {
533
294
                    self.index += 1;
534
294
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
535
                }
536
            }
537
        }
538
312k
    }
Unexecuted instantiation: <serde_json::read::SliceRead>::parse_str_bytes::<[u8], <serde_json::read::SliceRead as serde_json::read::Read>::parse_str_raw::{closure#0}>
<serde_json::read::SliceRead>::parse_str_bytes::<str, serde_json::read::as_str<serde_json::read::SliceRead>>
Line
Count
Source
494
312k
    fn parse_str_bytes<'s, T, F>(
495
312k
        &'s mut self,
496
312k
        scratch: &'s mut Vec<u8>,
497
312k
        validate: bool,
498
312k
        result: F,
499
312k
    ) -> Result<Reference<'a, 's, T>>
500
312k
    where
501
312k
        T: ?Sized + 's,
502
312k
        F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
503
    {
504
        // Index of the first byte not yet copied into the scratch space.
505
312k
        let mut start = self.index;
506
507
        loop {
508
595k
            self.skip_to_escape(validate);
509
595k
            if self.index == self.slice.len() {
510
842
                return error(self, ErrorCode::EofWhileParsingString);
511
594k
            }
512
594k
            match self.slice[self.index] {
513
                b'"' => {
514
310k
                    if scratch.is_empty() {
515
                        // Fast path: return a slice of the raw JSON without any
516
                        // copying.
517
294k
                        let borrowed = &self.slice[start..self.index];
518
294k
                        self.index += 1;
519
294k
                        return result(self, borrowed).map(Reference::Borrowed);
520
                    } else {
521
16.2k
                        scratch.extend_from_slice(&self.slice[start..self.index]);
522
16.2k
                        self.index += 1;
523
16.2k
                        return result(self, scratch).map(Reference::Copied);
524
                    }
525
                }
526
                b'\\' => {
527
284k
                    scratch.extend_from_slice(&self.slice[start..self.index]);
528
284k
                    self.index += 1;
529
284k
                    tri!(parse_escape(self, validate, scratch));
530
283k
                    start = self.index;
531
                }
532
                _ => {
533
294
                    self.index += 1;
534
294
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
535
                }
536
            }
537
        }
538
312k
    }
Unexecuted instantiation: <serde_json::read::SliceRead>::parse_str_bytes::<str, <serde_json::read::StrRead as serde_json::read::Read>::parse_str::{closure#0}>
539
}
540
541
impl<'a> private::Sealed for SliceRead<'a> {}
542
543
impl<'a> Read<'a> for SliceRead<'a> {
544
    #[inline]
545
2.43M
    fn next(&mut self) -> Result<Option<u8>> {
546
        // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
547
        // is about 10% slower.
548
2.43M
        Ok(if self.index < self.slice.len() {
549
2.43M
            let ch = self.slice[self.index];
550
2.43M
            self.index += 1;
551
2.43M
            Some(ch)
552
        } else {
553
304
            None
554
        })
555
2.43M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::next
Line
Count
Source
545
2.14M
    fn next(&mut self) -> Result<Option<u8>> {
546
        // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
547
        // is about 10% slower.
548
2.14M
        Ok(if self.index < self.slice.len() {
549
2.14M
            let ch = self.slice[self.index];
550
2.14M
            self.index += 1;
551
2.14M
            Some(ch)
552
        } else {
553
157
            None
554
        })
555
2.14M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::next
Line
Count
Source
545
284k
    fn next(&mut self) -> Result<Option<u8>> {
546
        // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
547
        // is about 10% slower.
548
284k
        Ok(if self.index < self.slice.len() {
549
284k
            let ch = self.slice[self.index];
550
284k
            self.index += 1;
551
284k
            Some(ch)
552
        } else {
553
147
            None
554
        })
555
284k
    }
556
557
    #[inline]
558
33.4M
    fn peek(&mut self) -> Result<Option<u8>> {
559
        // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
560
        // for some reason.
561
33.4M
        Ok(if self.index < self.slice.len() {
562
33.4M
            Some(self.slice[self.index])
563
        } else {
564
25.0k
            None
565
        })
566
33.4M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::peek
Line
Count
Source
558
33.4M
    fn peek(&mut self) -> Result<Option<u8>> {
559
        // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
560
        // for some reason.
561
33.4M
        Ok(if self.index < self.slice.len() {
562
33.4M
            Some(self.slice[self.index])
563
        } else {
564
24.9k
            None
565
        })
566
33.4M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::peek
Line
Count
Source
558
17.5k
    fn peek(&mut self) -> Result<Option<u8>> {
559
        // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
560
        // for some reason.
561
17.5k
        Ok(if self.index < self.slice.len() {
562
17.4k
            Some(self.slice[self.index])
563
        } else {
564
86
            None
565
        })
566
17.5k
    }
567
568
    #[inline]
569
25.7M
    fn discard(&mut self) {
570
25.7M
        self.index += 1;
571
25.7M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::discard
Line
Count
Source
569
25.7M
    fn discard(&mut self) {
570
25.7M
        self.index += 1;
571
25.7M
    }
<serde_json::read::SliceRead as serde_json::read::Read>::discard
Line
Count
Source
569
17.4k
    fn discard(&mut self) {
570
17.4k
        self.index += 1;
571
17.4k
    }
572
573
2.87k
    fn position(&self) -> Position {
574
2.87k
        self.position_of_index(self.index)
575
2.87k
    }
576
577
29.1k
    fn peek_position(&self) -> Position {
578
        // Cap it at slice.len() just in case the most recent call was next()
579
        // and it returned the last byte.
580
29.1k
        self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
581
29.1k
    }
582
583
0
    fn byte_offset(&self) -> usize {
584
0
        self.index
585
0
    }
586
587
312k
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
588
312k
        self.parse_str_bytes(scratch, true, as_str)
589
312k
    }
590
591
0
    fn parse_str_raw<'s>(
592
0
        &'s mut self,
593
0
        scratch: &'s mut Vec<u8>,
594
0
    ) -> Result<Reference<'a, 's, [u8]>> {
595
0
        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
596
0
    }
597
598
0
    fn ignore_str(&mut self) -> Result<()> {
599
        loop {
600
0
            self.skip_to_escape(true);
601
0
            if self.index == self.slice.len() {
602
0
                return error(self, ErrorCode::EofWhileParsingString);
603
0
            }
604
0
            match self.slice[self.index] {
605
                b'"' => {
606
0
                    self.index += 1;
607
0
                    return Ok(());
608
                }
609
                b'\\' => {
610
0
                    self.index += 1;
611
0
                    tri!(ignore_escape(self));
612
                }
613
                _ => {
614
0
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
615
                }
616
            }
617
        }
618
0
    }
619
620
    #[inline]
621
45.3k
    fn decode_hex_escape(&mut self) -> Result<u16> {
622
45.3k
        match self.slice[self.index..] {
623
45.2k
            [a, b, c, d, ..] => {
624
45.2k
                self.index += 4;
625
45.2k
                match decode_four_hex_digits(a, b, c, d) {
626
45.1k
                    Some(val) => Ok(val),
627
129
                    None => error(self, ErrorCode::InvalidEscape),
628
                }
629
            }
630
            _ => {
631
119
                self.index = self.slice.len();
632
119
                error(self, ErrorCode::EofWhileParsingString)
633
            }
634
        }
635
45.3k
    }
636
637
    #[cfg(feature = "raw_value")]
638
    fn begin_raw_buffering(&mut self) {
639
        self.raw_buffering_start_index = self.index;
640
    }
641
642
    #[cfg(feature = "raw_value")]
643
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
644
    where
645
        V: Visitor<'a>,
646
    {
647
        let raw = &self.slice[self.raw_buffering_start_index..self.index];
648
        let raw = match str::from_utf8(raw) {
649
            Ok(raw) => raw,
650
            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
651
        };
652
        visitor.visit_map(BorrowedRawDeserializer {
653
            raw_value: Some(raw),
654
        })
655
    }
656
657
    const should_early_return_if_failed: bool = false;
658
659
    #[inline]
660
    #[cold]
661
    fn set_failed(&mut self, _failed: &mut bool) {
662
        self.slice = &self.slice[..self.index];
663
    }
664
}
665
666
//////////////////////////////////////////////////////////////////////////////
667
668
impl<'a> StrRead<'a> {
669
    /// Create a JSON input source to read from a UTF-8 string.
670
0
    pub fn new(s: &'a str) -> Self {
671
0
        StrRead {
672
0
            delegate: SliceRead::new(s.as_bytes()),
673
0
            #[cfg(feature = "raw_value")]
674
0
            data: s,
675
0
        }
676
0
    }
677
}
678
679
impl<'a> private::Sealed for StrRead<'a> {}
680
681
impl<'a> Read<'a> for StrRead<'a> {
682
    #[inline]
683
0
    fn next(&mut self) -> Result<Option<u8>> {
684
0
        self.delegate.next()
685
0
    }
686
687
    #[inline]
688
0
    fn peek(&mut self) -> Result<Option<u8>> {
689
0
        self.delegate.peek()
690
0
    }
691
692
    #[inline]
693
0
    fn discard(&mut self) {
694
0
        self.delegate.discard();
695
0
    }
696
697
0
    fn position(&self) -> Position {
698
0
        self.delegate.position()
699
0
    }
700
701
0
    fn peek_position(&self) -> Position {
702
0
        self.delegate.peek_position()
703
0
    }
704
705
0
    fn byte_offset(&self) -> usize {
706
0
        self.delegate.byte_offset()
707
0
    }
708
709
0
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
710
0
        self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
711
            // The deserialization input came in as &str with a UTF-8 guarantee,
712
            // and the \u-escapes are checked along the way, so don't need to
713
            // check here.
714
0
            Ok(unsafe { str::from_utf8_unchecked(bytes) })
715
0
        })
716
0
    }
717
718
0
    fn parse_str_raw<'s>(
719
0
        &'s mut self,
720
0
        scratch: &'s mut Vec<u8>,
721
0
    ) -> Result<Reference<'a, 's, [u8]>> {
722
0
        self.delegate.parse_str_raw(scratch)
723
0
    }
724
725
0
    fn ignore_str(&mut self) -> Result<()> {
726
0
        self.delegate.ignore_str()
727
0
    }
728
729
0
    fn decode_hex_escape(&mut self) -> Result<u16> {
730
0
        self.delegate.decode_hex_escape()
731
0
    }
732
733
    #[cfg(feature = "raw_value")]
734
    fn begin_raw_buffering(&mut self) {
735
        self.delegate.begin_raw_buffering();
736
    }
737
738
    #[cfg(feature = "raw_value")]
739
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
740
    where
741
        V: Visitor<'a>,
742
    {
743
        let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
744
        visitor.visit_map(BorrowedRawDeserializer {
745
            raw_value: Some(raw),
746
        })
747
    }
748
749
    const should_early_return_if_failed: bool = false;
750
751
    #[inline]
752
    #[cold]
753
    fn set_failed(&mut self, failed: &mut bool) {
754
        self.delegate.set_failed(failed);
755
    }
756
}
757
758
//////////////////////////////////////////////////////////////////////////////
759
760
impl<'de, R> private::Sealed for &mut R where R: Read<'de> {}
761
762
impl<'de, R> Read<'de> for &mut R
763
where
764
    R: Read<'de>,
765
{
766
    fn next(&mut self) -> Result<Option<u8>> {
767
        R::next(self)
768
    }
769
770
    fn peek(&mut self) -> Result<Option<u8>> {
771
        R::peek(self)
772
    }
773
774
    fn discard(&mut self) {
775
        R::discard(self);
776
    }
777
778
    fn position(&self) -> Position {
779
        R::position(self)
780
    }
781
782
    fn peek_position(&self) -> Position {
783
        R::peek_position(self)
784
    }
785
786
    fn byte_offset(&self) -> usize {
787
        R::byte_offset(self)
788
    }
789
790
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
791
        R::parse_str(self, scratch)
792
    }
793
794
    fn parse_str_raw<'s>(
795
        &'s mut self,
796
        scratch: &'s mut Vec<u8>,
797
    ) -> Result<Reference<'de, 's, [u8]>> {
798
        R::parse_str_raw(self, scratch)
799
    }
800
801
    fn ignore_str(&mut self) -> Result<()> {
802
        R::ignore_str(self)
803
    }
804
805
    fn decode_hex_escape(&mut self) -> Result<u16> {
806
        R::decode_hex_escape(self)
807
    }
808
809
    #[cfg(feature = "raw_value")]
810
    fn begin_raw_buffering(&mut self) {
811
        R::begin_raw_buffering(self);
812
    }
813
814
    #[cfg(feature = "raw_value")]
815
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
816
    where
817
        V: Visitor<'de>,
818
    {
819
        R::end_raw_buffering(self, visitor)
820
    }
821
822
    const should_early_return_if_failed: bool = R::should_early_return_if_failed;
823
824
    fn set_failed(&mut self, failed: &mut bool) {
825
        R::set_failed(self, failed);
826
    }
827
}
828
829
//////////////////////////////////////////////////////////////////////////////
830
831
/// Marker for whether StreamDeserializer can implement FusedIterator.
832
pub trait Fused: private::Sealed {}
833
impl<'a> Fused for SliceRead<'a> {}
834
impl<'a> Fused for StrRead<'a> {}
835
836
598k
fn is_escape(ch: u8, including_control_characters: bool) -> bool {
837
598k
    ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20)
838
598k
}
839
840
284k
fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
841
284k
where
842
284k
    R: ?Sized + Read<'de>,
843
{
844
284k
    match tri!(read.next()) {
845
284k
        Some(b) => Ok(b),
846
147
        None => error(read, ErrorCode::EofWhileParsingString),
847
    }
848
284k
}
849
850
17.5k
fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
851
17.5k
where
852
17.5k
    R: ?Sized + Read<'de>,
853
{
854
17.5k
    match tri!(read.peek()) {
855
17.4k
        Some(b) => Ok(b),
856
86
        None => error(read, ErrorCode::EofWhileParsingString),
857
    }
858
17.5k
}
859
860
2.07k
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
2.07k
where
862
2.07k
    R: ?Sized + Read<'de>,
863
{
864
2.07k
    let position = read.position();
865
2.07k
    Err(Error::syntax(reason, position.line, position.column))
866
2.07k
}
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, serde_json::read::Reference<[u8]>>
serde_json::read::error::<serde_json::read::SliceRead, serde_json::read::Reference<str>>
Line
Count
Source
860
1.13k
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
1.13k
where
862
1.13k
    R: ?Sized + Read<'de>,
863
{
864
1.13k
    let position = read.position();
865
1.13k
    Err(Error::syntax(reason, position.line, position.column))
866
1.13k
}
serde_json::read::error::<serde_json::read::SliceRead, &str>
Line
Count
Source
860
135
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
135
where
862
135
    R: ?Sized + Read<'de>,
863
{
864
135
    let position = read.position();
865
135
    Err(Error::syntax(reason, position.line, position.column))
866
135
}
serde_json::read::error::<serde_json::read::SliceRead, u8>
Line
Count
Source
860
233
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
233
where
862
233
    R: ?Sized + Read<'de>,
863
{
864
233
    let position = read.position();
865
233
    Err(Error::syntax(reason, position.line, position.column))
866
233
}
serde_json::read::error::<serde_json::read::SliceRead, u16>
Line
Count
Source
860
248
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
248
where
862
248
    R: ?Sized + Read<'de>,
863
{
864
248
    let position = read.position();
865
248
    Err(Error::syntax(reason, position.line, position.column))
866
248
}
serde_json::read::error::<serde_json::read::SliceRead, ()>
Line
Count
Source
860
327
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
327
where
862
327
    R: ?Sized + Read<'de>,
863
{
864
327
    let position = read.position();
865
327
    Err(Error::syntax(reason, position.line, position.column))
866
327
}
867
868
310k
fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
869
310k
    str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
870
310k
}
871
872
/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
873
/// the previous byte read was a backslash.
874
284k
fn parse_escape<'de, R: Read<'de>>(
875
284k
    read: &mut R,
876
284k
    validate: bool,
877
284k
    scratch: &mut Vec<u8>,
878
284k
) -> Result<()> {
879
284k
    let ch = tri!(next_or_eof(read));
880
881
284k
    match ch {
882
56.3k
        b'"' => scratch.push(b'"'),
883
27.3k
        b'\\' => scratch.push(b'\\'),
884
11.2k
        b'/' => scratch.push(b'/'),
885
12.2k
        b'b' => scratch.push(b'\x08'),
886
42.6k
        b'f' => scratch.push(b'\x0c'),
887
13.0k
        b'n' => scratch.push(b'\n'),
888
46.6k
        b'r' => scratch.push(b'\r'),
889
37.7k
        b't' => scratch.push(b'\t'),
890
36.7k
        b'u' => return parse_unicode_escape(read, validate, scratch),
891
75
        _ => return error(read, ErrorCode::InvalidEscape),
892
    }
893
894
247k
    Ok(())
895
284k
}
896
897
/// Parses a JSON \u escape and appends it into the scratch space. Assumes `\u`
898
/// has just been read.
899
#[cold]
900
36.7k
fn parse_unicode_escape<'de, R: Read<'de>>(
901
36.7k
    read: &mut R,
902
36.7k
    validate: bool,
903
36.7k
    scratch: &mut Vec<u8>,
904
36.7k
) -> Result<()> {
905
36.7k
    let mut n = tri!(read.decode_hex_escape());
906
907
    // Non-BMP characters are encoded as a sequence of two hex escapes,
908
    // representing UTF-16 surrogates. If deserializing a utf-8 string the
909
    // surrogates are required to be paired, whereas deserializing a byte string
910
    // accepts lone surrogates.
911
36.5k
    if validate && n >= 0xDC00 && n <= 0xDFFF {
912
        // XXX: This is actually a trailing surrogate.
913
81
        return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
914
36.4k
    }
915
916
    loop {
917
36.4k
        if n < 0xD800 || n > 0xDBFF {
918
            // Every u16 outside of the surrogate ranges is guaranteed to be a
919
            // legal char.
920
27.6k
            push_wtf8_codepoint(n as u32, scratch);
921
27.6k
            return Ok(());
922
8.81k
        }
923
924
        // n is a leading surrogate, we now expect a trailing surrogate.
925
8.81k
        let n1 = n;
926
927
8.81k
        if tri!(peek_or_eof(read)) == b'\\' {
928
8.72k
            read.discard();
929
8.72k
        } else {
930
46
            return if validate {
931
46
                read.discard();
932
46
                error(read, ErrorCode::UnexpectedEndOfHexEscape)
933
            } else {
934
0
                push_wtf8_codepoint(n1 as u32, scratch);
935
0
                Ok(())
936
            };
937
        }
938
939
8.72k
        if tri!(peek_or_eof(read)) == b'u' {
940
8.64k
            read.discard();
941
8.64k
        } else {
942
40
            return if validate {
943
40
                read.discard();
944
40
                error(read, ErrorCode::UnexpectedEndOfHexEscape)
945
            } else {
946
0
                push_wtf8_codepoint(n1 as u32, scratch);
947
                // The \ prior to this byte started an escape sequence, so we
948
                // need to parse that now. This recursive call does not blow the
949
                // stack on malicious input because the escape is not \u, so it
950
                // will be handled by one of the easy nonrecursive cases.
951
0
                parse_escape(read, validate, scratch)
952
            };
953
        }
954
955
8.64k
        let n2 = tri!(read.decode_hex_escape());
956
957
8.55k
        if n2 < 0xDC00 || n2 > 0xDFFF {
958
85
            if validate {
959
85
                return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960
0
            }
961
0
            push_wtf8_codepoint(n1 as u32, scratch);
962
            // If n2 is a leading surrogate, we need to restart.
963
0
            n = n2;
964
0
            continue;
965
8.46k
        }
966
967
        // This value is in range U+10000..=U+10FFFF, which is always a valid
968
        // codepoint.
969
8.46k
        let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
970
8.46k
        push_wtf8_codepoint(n, scratch);
971
8.46k
        return Ok(());
972
    }
973
36.7k
}
974
975
/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
976
/// implementation of String::push. The codepoint may be a surrogate.
977
#[inline]
978
36.1k
fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
979
36.1k
    if n < 0x80 {
980
3.42k
        scratch.push(n as u8);
981
3.42k
        return;
982
32.7k
    }
983
984
32.7k
    scratch.reserve(4);
985
986
    // SAFETY: After the `reserve` call, `scratch` has at least 4 bytes of
987
    // allocated but uninitialized memory after its last initialized byte, which
988
    // is where `ptr` points. All reachable match arms write `encoded_len` bytes
989
    // to that region and update the length accordingly, and `encoded_len` is
990
    // always <= 4.
991
    unsafe {
992
32.7k
        let ptr = scratch.as_mut_ptr().add(scratch.len());
993
994
32.7k
        let encoded_len = match n {
995
32.7k
            0..=0x7F => unreachable!(),
996
32.7k
            0x80..=0x7FF => {
997
10.1k
                ptr.write(((n >> 6) & 0b0001_1111) as u8 | 0b1100_0000);
998
10.1k
                2
999
            }
1000
22.5k
            0x800..=0xFFFF => {
1001
14.1k
                ptr.write(((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000);
1002
14.1k
                ptr.add(1)
1003
14.1k
                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1004
14.1k
                3
1005
            }
1006
8.46k
            0x1_0000..=0x10_FFFF => {
1007
8.46k
                ptr.write(((n >> 18) & 0b0000_0111) as u8 | 0b1111_0000);
1008
8.46k
                ptr.add(1)
1009
8.46k
                    .write(((n >> 12) & 0b0011_1111) as u8 | 0b1000_0000);
1010
8.46k
                ptr.add(2)
1011
8.46k
                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1012
8.46k
                4
1013
            }
1014
0
            0x11_0000.. => unreachable!(),
1015
        };
1016
32.7k
        ptr.add(encoded_len - 1)
1017
32.7k
            .write((n & 0b0011_1111) as u8 | 0b1000_0000);
1018
1019
32.7k
        scratch.set_len(scratch.len() + encoded_len);
1020
    }
1021
36.1k
}
1022
1023
/// Parses a JSON escape sequence and discards the value. Assumes the previous
1024
/// byte read was a backslash.
1025
0
fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1026
0
where
1027
0
    R: ?Sized + Read<'de>,
1028
{
1029
0
    let ch = tri!(next_or_eof(read));
1030
1031
0
    match ch {
1032
0
        b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
1033
        b'u' => {
1034
            // At this point we don't care if the codepoint is valid. We just
1035
            // want to consume it. We don't actually know what is valid or not
1036
            // at this point, because that depends on if this string will
1037
            // ultimately be parsed into a string or a byte buffer in the "real"
1038
            // parse.
1039
1040
0
            tri!(read.decode_hex_escape());
1041
        }
1042
        _ => {
1043
0
            return error(read, ErrorCode::InvalidEscape);
1044
        }
1045
    }
1046
1047
0
    Ok(())
1048
0
}
1049
1050
const fn decode_hex_val_slow(val: u8) -> Option<u8> {
1051
    match val {
1052
        b'0'..=b'9' => Some(val - b'0'),
1053
        b'A'..=b'F' => Some(val - b'A' + 10),
1054
        b'a'..=b'f' => Some(val - b'a' + 10),
1055
        _ => None,
1056
    }
1057
}
1058
1059
const fn build_hex_table(shift: usize) -> [i16; 256] {
1060
    let mut table = [0; 256];
1061
    let mut ch = 0;
1062
    while ch < 256 {
1063
        table[ch] = match decode_hex_val_slow(ch as u8) {
1064
            Some(val) => (val as i16) << shift,
1065
            None => -1,
1066
        };
1067
        ch += 1;
1068
    }
1069
    table
1070
}
1071
1072
static HEX0: [i16; 256] = build_hex_table(0);
1073
static HEX1: [i16; 256] = build_hex_table(4);
1074
1075
45.2k
fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1076
45.2k
    let a = HEX1[a as usize] as i32;
1077
45.2k
    let b = HEX0[b as usize] as i32;
1078
45.2k
    let c = HEX1[c as usize] as i32;
1079
45.2k
    let d = HEX0[d as usize] as i32;
1080
1081
45.2k
    let codepoint = ((a | b) << 8) | c | d;
1082
1083
    // A single sign bit check.
1084
45.2k
    if codepoint >= 0 {
1085
45.1k
        Some(codepoint as u16)
1086
    } else {
1087
129
        None
1088
    }
1089
45.2k
}