Coverage Report

Created: 2025-07-12 06:37

/rust/registry/src/index.crates.io-6f17d22bba15001f/serde_json-1.0.140/src/read.rs
Line
Count
Source (jump to first uncovered line)
1
use crate::error::{Error, ErrorCode, Result};
2
use alloc::vec::Vec;
3
use core::cmp;
4
use core::mem;
5
use core::ops::Deref;
6
use core::str;
7
8
#[cfg(feature = "std")]
9
use crate::io;
10
#[cfg(feature = "std")]
11
use crate::iter::LineColIterator;
12
13
#[cfg(feature = "raw_value")]
14
use crate::raw::BorrowedRawDeserializer;
15
#[cfg(all(feature = "raw_value", feature = "std"))]
16
use crate::raw::OwnedRawDeserializer;
17
#[cfg(all(feature = "raw_value", feature = "std"))]
18
use alloc::string::String;
19
#[cfg(feature = "raw_value")]
20
use serde::de::Visitor;
21
22
/// Trait used by the deserializer for iterating over input. This is manually
23
/// "specialized" for iterating over `&[u8]`. Once feature(specialization) is
24
/// stable we can use actual specialization.
25
///
26
/// This trait is sealed and cannot be implemented for types outside of
27
/// `serde_json`.
28
pub trait Read<'de>: private::Sealed {
29
    #[doc(hidden)]
30
    fn next(&mut self) -> Result<Option<u8>>;
31
    #[doc(hidden)]
32
    fn peek(&mut self) -> Result<Option<u8>>;
33
34
    /// Only valid after a call to peek(). Discards the peeked byte.
35
    #[doc(hidden)]
36
    fn discard(&mut self);
37
38
    /// Position of the most recent call to next().
39
    ///
40
    /// The most recent call was probably next() and not peek(), but this method
41
    /// should try to return a sensible result if the most recent call was
42
    /// actually peek() because we don't always know.
43
    ///
44
    /// Only called in case of an error, so performance is not important.
45
    #[doc(hidden)]
46
    fn position(&self) -> Position;
47
48
    /// Position of the most recent call to peek().
49
    ///
50
    /// The most recent call was probably peek() and not next(), but this method
51
    /// should try to return a sensible result if the most recent call was
52
    /// actually next() because we don't always know.
53
    ///
54
    /// Only called in case of an error, so performance is not important.
55
    #[doc(hidden)]
56
    fn peek_position(&self) -> Position;
57
58
    /// Offset from the beginning of the input to the next byte that would be
59
    /// returned by next() or peek().
60
    #[doc(hidden)]
61
    fn byte_offset(&self) -> usize;
62
63
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
64
    /// string until the next quotation mark using the given scratch space if
65
    /// necessary. The scratch space is initially empty.
66
    #[doc(hidden)]
67
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>>;
68
69
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
70
    /// string until the next quotation mark using the given scratch space if
71
    /// necessary. The scratch space is initially empty.
72
    ///
73
    /// This function returns the raw bytes in the string with escape sequences
74
    /// expanded but without performing unicode validation.
75
    #[doc(hidden)]
76
    fn parse_str_raw<'s>(
77
        &'s mut self,
78
        scratch: &'s mut Vec<u8>,
79
    ) -> Result<Reference<'de, 's, [u8]>>;
80
81
    /// Assumes the previous byte was a quotation mark. Parses a JSON-escaped
82
    /// string until the next quotation mark but discards the data.
83
    #[doc(hidden)]
84
    fn ignore_str(&mut self) -> Result<()>;
85
86
    /// Assumes the previous byte was a hex escape sequence ('\u') in a string.
87
    /// Parses next hexadecimal sequence.
88
    #[doc(hidden)]
89
    fn decode_hex_escape(&mut self) -> Result<u16>;
90
91
    /// Switch raw buffering mode on.
92
    ///
93
    /// This is used when deserializing `RawValue`.
94
    #[cfg(feature = "raw_value")]
95
    #[doc(hidden)]
96
    fn begin_raw_buffering(&mut self);
97
98
    /// Switch raw buffering mode off and provides the raw buffered data to the
99
    /// given visitor.
100
    #[cfg(feature = "raw_value")]
101
    #[doc(hidden)]
102
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
103
    where
104
        V: Visitor<'de>;
105
106
    /// Whether StreamDeserializer::next needs to check the failed flag. True
107
    /// for IoRead, false for StrRead and SliceRead which can track failure by
108
    /// truncating their input slice to avoid the extra check on every next
109
    /// call.
110
    #[doc(hidden)]
111
    const should_early_return_if_failed: bool;
112
113
    /// Mark a persistent failure of StreamDeserializer, either by setting the
114
    /// flag or by truncating the input data.
115
    #[doc(hidden)]
116
    fn set_failed(&mut self, failed: &mut bool);
117
}
118
119
pub struct Position {
120
    pub line: usize,
121
    pub column: usize,
122
}
123
124
pub enum Reference<'b, 'c, T>
125
where
126
    T: ?Sized + 'static,
127
{
128
    Borrowed(&'b T),
129
    Copied(&'c T),
130
}
131
132
impl<'b, 'c, T> Deref for Reference<'b, 'c, T>
133
where
134
    T: ?Sized + 'static,
135
{
136
    type Target = T;
137
138
0
    fn deref(&self) -> &Self::Target {
139
0
        match *self {
140
0
            Reference::Borrowed(b) => b,
141
0
            Reference::Copied(c) => c,
142
        }
143
0
    }
144
}
145
146
/// JSON input source that reads from a std::io input stream.
147
#[cfg(feature = "std")]
148
#[cfg_attr(docsrs, doc(cfg(feature = "std")))]
149
pub struct IoRead<R>
150
where
151
    R: io::Read,
152
{
153
    iter: LineColIterator<io::Bytes<R>>,
154
    /// Temporary storage of peeked byte.
155
    ch: Option<u8>,
156
    #[cfg(feature = "raw_value")]
157
    raw_buffer: Option<Vec<u8>>,
158
}
159
160
/// JSON input source that reads from a slice of bytes.
161
//
162
// This is more efficient than other iterators because peek() can be read-only
163
// and we can compute line/col position only if an error happens.
164
pub struct SliceRead<'a> {
165
    slice: &'a [u8],
166
    /// Index of the *next* byte that will be returned by next() or peek().
167
    index: usize,
168
    #[cfg(feature = "raw_value")]
169
    raw_buffering_start_index: usize,
170
}
171
172
/// JSON input source that reads from a UTF-8 string.
173
//
174
// Able to elide UTF-8 checks by assuming that the input is valid UTF-8.
175
pub struct StrRead<'a> {
176
    delegate: SliceRead<'a>,
177
    #[cfg(feature = "raw_value")]
178
    data: &'a str,
179
}
180
181
// Prevent users from implementing the Read trait.
182
mod private {
183
    pub trait Sealed {}
184
}
185
186
//////////////////////////////////////////////////////////////////////////////
187
188
#[cfg(feature = "std")]
189
impl<R> IoRead<R>
190
where
191
    R: io::Read,
192
{
193
    /// Create a JSON input source to read from a std::io input stream.
194
    ///
195
    /// When reading from a source against which short reads are not efficient, such
196
    /// as a [`File`], you will want to apply your own buffering because serde_json
197
    /// will not buffer the input. See [`std::io::BufReader`].
198
    ///
199
    /// [`File`]: std::fs::File
200
0
    pub fn new(reader: R) -> Self {
201
0
        IoRead {
202
0
            iter: LineColIterator::new(reader.bytes()),
203
0
            ch: None,
204
0
            #[cfg(feature = "raw_value")]
205
0
            raw_buffer: None,
206
0
        }
207
0
    }
208
}
209
210
#[cfg(feature = "std")]
211
impl<R> private::Sealed for IoRead<R> where R: io::Read {}
212
213
#[cfg(feature = "std")]
214
impl<R> IoRead<R>
215
where
216
    R: io::Read,
217
{
218
0
    fn parse_str_bytes<'s, T, F>(
219
0
        &'s mut self,
220
0
        scratch: &'s mut Vec<u8>,
221
0
        validate: bool,
222
0
        result: F,
223
0
    ) -> Result<T>
224
0
    where
225
0
        T: 's,
226
0
        F: FnOnce(&'s Self, &'s [u8]) -> Result<T>,
227
0
    {
228
        loop {
229
0
            let ch = tri!(next_or_eof(self));
230
0
            if !is_escape(ch, true) {
231
0
                scratch.push(ch);
232
0
                continue;
233
0
            }
234
0
            match ch {
235
                b'"' => {
236
0
                    return result(self, scratch);
237
                }
238
                b'\\' => {
239
0
                    tri!(parse_escape(self, validate, scratch));
240
                }
241
                _ => {
242
0
                    if validate {
243
0
                        return error(self, ErrorCode::ControlCharacterWhileParsingString);
244
0
                    }
245
0
                    scratch.push(ch);
246
                }
247
            }
248
        }
249
0
    }
250
}
251
252
#[cfg(feature = "std")]
253
impl<'de, R> Read<'de> for IoRead<R>
254
where
255
    R: io::Read,
256
{
257
    #[inline]
258
0
    fn next(&mut self) -> Result<Option<u8>> {
259
0
        match self.ch.take() {
260
0
            Some(ch) => {
261
0
                #[cfg(feature = "raw_value")]
262
0
                {
263
0
                    if let Some(buf) = &mut self.raw_buffer {
264
0
                        buf.push(ch);
265
0
                    }
266
0
                }
267
0
                Ok(Some(ch))
268
            }
269
0
            None => match self.iter.next() {
270
0
                Some(Err(err)) => Err(Error::io(err)),
271
0
                Some(Ok(ch)) => {
272
0
                    #[cfg(feature = "raw_value")]
273
0
                    {
274
0
                        if let Some(buf) = &mut self.raw_buffer {
275
0
                            buf.push(ch);
276
0
                        }
277
0
                    }
278
0
                    Ok(Some(ch))
279
                }
280
0
                None => Ok(None),
281
            },
282
        }
283
0
    }
284
285
    #[inline]
286
0
    fn peek(&mut self) -> Result<Option<u8>> {
287
0
        match self.ch {
288
0
            Some(ch) => Ok(Some(ch)),
289
0
            None => match self.iter.next() {
290
0
                Some(Err(err)) => Err(Error::io(err)),
291
0
                Some(Ok(ch)) => {
292
0
                    self.ch = Some(ch);
293
0
                    Ok(self.ch)
294
                }
295
0
                None => Ok(None),
296
            },
297
        }
298
0
    }
299
300
    #[cfg(not(feature = "raw_value"))]
301
    #[inline]
302
0
    fn discard(&mut self) {
303
0
        self.ch = None;
304
0
    }
305
306
    #[cfg(feature = "raw_value")]
307
    fn discard(&mut self) {
308
        if let Some(ch) = self.ch.take() {
309
            if let Some(buf) = &mut self.raw_buffer {
310
                buf.push(ch);
311
            }
312
        }
313
    }
314
315
0
    fn position(&self) -> Position {
316
0
        Position {
317
0
            line: self.iter.line(),
318
0
            column: self.iter.col(),
319
0
        }
320
0
    }
321
322
0
    fn peek_position(&self) -> Position {
323
0
        // The LineColIterator updates its position during peek() so it has the
324
0
        // right one here.
325
0
        self.position()
326
0
    }
327
328
0
    fn byte_offset(&self) -> usize {
329
0
        match self.ch {
330
0
            Some(_) => self.iter.byte_offset() - 1,
331
0
            None => self.iter.byte_offset(),
332
        }
333
0
    }
334
335
0
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
336
0
        self.parse_str_bytes(scratch, true, as_str)
337
0
            .map(Reference::Copied)
338
0
    }
339
340
0
    fn parse_str_raw<'s>(
341
0
        &'s mut self,
342
0
        scratch: &'s mut Vec<u8>,
343
0
    ) -> Result<Reference<'de, 's, [u8]>> {
344
0
        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
345
0
            .map(Reference::Copied)
346
0
    }
347
348
0
    fn ignore_str(&mut self) -> Result<()> {
349
        loop {
350
0
            let ch = tri!(next_or_eof(self));
351
0
            if !is_escape(ch, true) {
352
0
                continue;
353
0
            }
354
0
            match ch {
355
                b'"' => {
356
0
                    return Ok(());
357
                }
358
                b'\\' => {
359
0
                    tri!(ignore_escape(self));
360
                }
361
                _ => {
362
0
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
363
                }
364
            }
365
        }
366
0
    }
367
368
0
    fn decode_hex_escape(&mut self) -> Result<u16> {
369
0
        let a = tri!(next_or_eof(self));
370
0
        let b = tri!(next_or_eof(self));
371
0
        let c = tri!(next_or_eof(self));
372
0
        let d = tri!(next_or_eof(self));
373
0
        match decode_four_hex_digits(a, b, c, d) {
374
0
            Some(val) => Ok(val),
375
0
            None => error(self, ErrorCode::InvalidEscape),
376
        }
377
0
    }
378
379
    #[cfg(feature = "raw_value")]
380
    fn begin_raw_buffering(&mut self) {
381
        self.raw_buffer = Some(Vec::new());
382
    }
383
384
    #[cfg(feature = "raw_value")]
385
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
386
    where
387
        V: Visitor<'de>,
388
    {
389
        let raw = self.raw_buffer.take().unwrap();
390
        let raw = match String::from_utf8(raw) {
391
            Ok(raw) => raw,
392
            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
393
        };
394
        visitor.visit_map(OwnedRawDeserializer {
395
            raw_value: Some(raw),
396
        })
397
    }
398
399
    const should_early_return_if_failed: bool = true;
400
401
    #[inline]
402
    #[cold]
403
0
    fn set_failed(&mut self, failed: &mut bool) {
404
0
        *failed = true;
405
0
    }
406
}
407
408
//////////////////////////////////////////////////////////////////////////////
409
410
impl<'a> SliceRead<'a> {
411
    /// Create a JSON input source to read from a slice of bytes.
412
0
    pub fn new(slice: &'a [u8]) -> Self {
413
0
        SliceRead {
414
0
            slice,
415
0
            index: 0,
416
0
            #[cfg(feature = "raw_value")]
417
0
            raw_buffering_start_index: 0,
418
0
        }
419
0
    }
420
421
0
    fn position_of_index(&self, i: usize) -> Position {
422
0
        let start_of_line = match memchr::memrchr(b'\n', &self.slice[..i]) {
423
0
            Some(position) => position + 1,
424
0
            None => 0,
425
        };
426
0
        Position {
427
0
            line: 1 + memchr::memchr_iter(b'\n', &self.slice[..start_of_line]).count(),
428
0
            column: i - start_of_line,
429
0
        }
430
0
    }
431
432
0
    fn skip_to_escape(&mut self, forbid_control_characters: bool) {
433
0
        // Immediately bail-out on empty strings and consecutive escapes (e.g. \u041b\u0435)
434
0
        if self.index == self.slice.len()
435
0
            || is_escape(self.slice[self.index], forbid_control_characters)
436
        {
437
0
            return;
438
0
        }
439
0
        self.index += 1;
440
0
441
0
        let rest = &self.slice[self.index..];
442
0
443
0
        if !forbid_control_characters {
444
0
            self.index += memchr::memchr2(b'"', b'\\', rest).unwrap_or(rest.len());
445
0
            return;
446
0
        }
447
448
        // We wish to find the first byte in range 0x00..=0x1F or " or \. Ideally, we'd use
449
        // something akin to memchr3, but the memchr crate does not support this at the moment.
450
        // Therefore, we use a variation on Mycroft's algorithm [1] to provide performance better
451
        // than a naive loop. It runs faster than equivalent two-pass memchr2+SWAR code on
452
        // benchmarks and it's cross-platform, so probably the right fit.
453
        // [1]: https://groups.google.com/forum/#!original/comp.lang.c/2HtQXvg7iKc/xOJeipH6KLMJ
454
455
        #[cfg(fast_arithmetic = "64")]
456
        type Chunk = u64;
457
        #[cfg(fast_arithmetic = "32")]
458
        type Chunk = u32;
459
460
        const STEP: usize = mem::size_of::<Chunk>();
461
        const ONE_BYTES: Chunk = Chunk::MAX / 255; // 0x0101...01
462
463
0
        for chunk in rest.chunks_exact(STEP) {
464
0
            let chars = Chunk::from_le_bytes(chunk.try_into().unwrap());
465
0
            let contains_ctrl = chars.wrapping_sub(ONE_BYTES * 0x20) & !chars;
466
0
            let chars_quote = chars ^ (ONE_BYTES * Chunk::from(b'"'));
467
0
            let contains_quote = chars_quote.wrapping_sub(ONE_BYTES) & !chars_quote;
468
0
            let chars_backslash = chars ^ (ONE_BYTES * Chunk::from(b'\\'));
469
0
            let contains_backslash = chars_backslash.wrapping_sub(ONE_BYTES) & !chars_backslash;
470
0
            let masked = (contains_ctrl | contains_quote | contains_backslash) & (ONE_BYTES << 7);
471
0
            if masked != 0 {
472
                // SAFETY: chunk is in-bounds for slice
473
0
                self.index = unsafe { chunk.as_ptr().offset_from(self.slice.as_ptr()) } as usize
474
0
                    + masked.trailing_zeros() as usize / 8;
475
0
                return;
476
0
            }
477
        }
478
479
0
        self.index += rest.len() / STEP * STEP;
480
0
        self.skip_to_escape_slow();
481
0
    }
482
483
    #[cold]
484
    #[inline(never)]
485
0
    fn skip_to_escape_slow(&mut self) {
486
0
        while self.index < self.slice.len() && !is_escape(self.slice[self.index], true) {
487
0
            self.index += 1;
488
0
        }
489
0
    }
490
491
    /// The big optimization here over IoRead is that if the string contains no
492
    /// backslash escape sequences, the returned &str is a slice of the raw JSON
493
    /// data so we avoid copying into the scratch space.
494
0
    fn parse_str_bytes<'s, T, F>(
495
0
        &'s mut self,
496
0
        scratch: &'s mut Vec<u8>,
497
0
        validate: bool,
498
0
        result: F,
499
0
    ) -> Result<Reference<'a, 's, T>>
500
0
    where
501
0
        T: ?Sized + 's,
502
0
        F: for<'f> FnOnce(&'s Self, &'f [u8]) -> Result<&'f T>,
503
0
    {
504
0
        // Index of the first byte not yet copied into the scratch space.
505
0
        let mut start = self.index;
506
507
        loop {
508
0
            self.skip_to_escape(validate);
509
0
            if self.index == self.slice.len() {
510
0
                return error(self, ErrorCode::EofWhileParsingString);
511
0
            }
512
0
            match self.slice[self.index] {
513
                b'"' => {
514
0
                    if scratch.is_empty() {
515
                        // Fast path: return a slice of the raw JSON without any
516
                        // copying.
517
0
                        let borrowed = &self.slice[start..self.index];
518
0
                        self.index += 1;
519
0
                        return result(self, borrowed).map(Reference::Borrowed);
520
                    } else {
521
0
                        scratch.extend_from_slice(&self.slice[start..self.index]);
522
0
                        self.index += 1;
523
0
                        return result(self, scratch).map(Reference::Copied);
524
                    }
525
                }
526
                b'\\' => {
527
0
                    scratch.extend_from_slice(&self.slice[start..self.index]);
528
0
                    self.index += 1;
529
0
                    tri!(parse_escape(self, validate, scratch));
530
0
                    start = self.index;
531
                }
532
                _ => {
533
0
                    self.index += 1;
534
0
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
535
                }
536
            }
537
        }
538
0
    }
Unexecuted instantiation: <serde_json::read::SliceRead>::parse_str_bytes::<[u8], <serde_json::read::SliceRead as serde_json::read::Read>::parse_str_raw::{closure#0}>
Unexecuted instantiation: <serde_json::read::SliceRead>::parse_str_bytes::<str, serde_json::read::as_str<serde_json::read::SliceRead>>
Unexecuted instantiation: <serde_json::read::SliceRead>::parse_str_bytes::<str, <serde_json::read::StrRead as serde_json::read::Read>::parse_str::{closure#0}>
539
}
540
541
impl<'a> private::Sealed for SliceRead<'a> {}
542
543
impl<'a> Read<'a> for SliceRead<'a> {
544
    #[inline]
545
0
    fn next(&mut self) -> Result<Option<u8>> {
546
0
        // `Ok(self.slice.get(self.index).map(|ch| { self.index += 1; *ch }))`
547
0
        // is about 10% slower.
548
0
        Ok(if self.index < self.slice.len() {
549
0
            let ch = self.slice[self.index];
550
0
            self.index += 1;
551
0
            Some(ch)
552
        } else {
553
0
            None
554
        })
555
0
    }
556
557
    #[inline]
558
0
    fn peek(&mut self) -> Result<Option<u8>> {
559
0
        // `Ok(self.slice.get(self.index).map(|ch| *ch))` is about 10% slower
560
0
        // for some reason.
561
0
        Ok(if self.index < self.slice.len() {
562
0
            Some(self.slice[self.index])
563
        } else {
564
0
            None
565
        })
566
0
    }
567
568
    #[inline]
569
0
    fn discard(&mut self) {
570
0
        self.index += 1;
571
0
    }
572
573
0
    fn position(&self) -> Position {
574
0
        self.position_of_index(self.index)
575
0
    }
576
577
0
    fn peek_position(&self) -> Position {
578
0
        // Cap it at slice.len() just in case the most recent call was next()
579
0
        // and it returned the last byte.
580
0
        self.position_of_index(cmp::min(self.slice.len(), self.index + 1))
581
0
    }
582
583
0
    fn byte_offset(&self) -> usize {
584
0
        self.index
585
0
    }
586
587
0
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
588
0
        self.parse_str_bytes(scratch, true, as_str)
589
0
    }
590
591
0
    fn parse_str_raw<'s>(
592
0
        &'s mut self,
593
0
        scratch: &'s mut Vec<u8>,
594
0
    ) -> Result<Reference<'a, 's, [u8]>> {
595
0
        self.parse_str_bytes(scratch, false, |_, bytes| Ok(bytes))
596
0
    }
597
598
0
    fn ignore_str(&mut self) -> Result<()> {
599
        loop {
600
0
            self.skip_to_escape(true);
601
0
            if self.index == self.slice.len() {
602
0
                return error(self, ErrorCode::EofWhileParsingString);
603
0
            }
604
0
            match self.slice[self.index] {
605
                b'"' => {
606
0
                    self.index += 1;
607
0
                    return Ok(());
608
                }
609
                b'\\' => {
610
0
                    self.index += 1;
611
0
                    tri!(ignore_escape(self));
612
                }
613
                _ => {
614
0
                    return error(self, ErrorCode::ControlCharacterWhileParsingString);
615
                }
616
            }
617
        }
618
0
    }
619
620
    #[inline]
621
0
    fn decode_hex_escape(&mut self) -> Result<u16> {
622
0
        match self.slice[self.index..] {
623
0
            [a, b, c, d, ..] => {
624
0
                self.index += 4;
625
0
                match decode_four_hex_digits(a, b, c, d) {
626
0
                    Some(val) => Ok(val),
627
0
                    None => error(self, ErrorCode::InvalidEscape),
628
                }
629
            }
630
            _ => {
631
0
                self.index = self.slice.len();
632
0
                error(self, ErrorCode::EofWhileParsingString)
633
            }
634
        }
635
0
    }
636
637
    #[cfg(feature = "raw_value")]
638
    fn begin_raw_buffering(&mut self) {
639
        self.raw_buffering_start_index = self.index;
640
    }
641
642
    #[cfg(feature = "raw_value")]
643
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
644
    where
645
        V: Visitor<'a>,
646
    {
647
        let raw = &self.slice[self.raw_buffering_start_index..self.index];
648
        let raw = match str::from_utf8(raw) {
649
            Ok(raw) => raw,
650
            Err(_) => return error(self, ErrorCode::InvalidUnicodeCodePoint),
651
        };
652
        visitor.visit_map(BorrowedRawDeserializer {
653
            raw_value: Some(raw),
654
        })
655
    }
656
657
    const should_early_return_if_failed: bool = false;
658
659
    #[inline]
660
    #[cold]
661
0
    fn set_failed(&mut self, _failed: &mut bool) {
662
0
        self.slice = &self.slice[..self.index];
663
0
    }
664
}
665
666
//////////////////////////////////////////////////////////////////////////////
667
668
impl<'a> StrRead<'a> {
669
    /// Create a JSON input source to read from a UTF-8 string.
670
0
    pub fn new(s: &'a str) -> Self {
671
0
        StrRead {
672
0
            delegate: SliceRead::new(s.as_bytes()),
673
0
            #[cfg(feature = "raw_value")]
674
0
            data: s,
675
0
        }
676
0
    }
677
}
678
679
impl<'a> private::Sealed for StrRead<'a> {}
680
681
impl<'a> Read<'a> for StrRead<'a> {
682
    #[inline]
683
0
    fn next(&mut self) -> Result<Option<u8>> {
684
0
        self.delegate.next()
685
0
    }
686
687
    #[inline]
688
0
    fn peek(&mut self) -> Result<Option<u8>> {
689
0
        self.delegate.peek()
690
0
    }
691
692
    #[inline]
693
0
    fn discard(&mut self) {
694
0
        self.delegate.discard();
695
0
    }
696
697
0
    fn position(&self) -> Position {
698
0
        self.delegate.position()
699
0
    }
700
701
0
    fn peek_position(&self) -> Position {
702
0
        self.delegate.peek_position()
703
0
    }
704
705
0
    fn byte_offset(&self) -> usize {
706
0
        self.delegate.byte_offset()
707
0
    }
708
709
0
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'a, 's, str>> {
710
0
        self.delegate.parse_str_bytes(scratch, true, |_, bytes| {
711
0
            // The deserialization input came in as &str with a UTF-8 guarantee,
712
0
            // and the \u-escapes are checked along the way, so don't need to
713
0
            // check here.
714
0
            Ok(unsafe { str::from_utf8_unchecked(bytes) })
715
0
        })
716
0
    }
717
718
0
    fn parse_str_raw<'s>(
719
0
        &'s mut self,
720
0
        scratch: &'s mut Vec<u8>,
721
0
    ) -> Result<Reference<'a, 's, [u8]>> {
722
0
        self.delegate.parse_str_raw(scratch)
723
0
    }
724
725
0
    fn ignore_str(&mut self) -> Result<()> {
726
0
        self.delegate.ignore_str()
727
0
    }
728
729
0
    fn decode_hex_escape(&mut self) -> Result<u16> {
730
0
        self.delegate.decode_hex_escape()
731
0
    }
732
733
    #[cfg(feature = "raw_value")]
734
    fn begin_raw_buffering(&mut self) {
735
        self.delegate.begin_raw_buffering();
736
    }
737
738
    #[cfg(feature = "raw_value")]
739
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
740
    where
741
        V: Visitor<'a>,
742
    {
743
        let raw = &self.data[self.delegate.raw_buffering_start_index..self.delegate.index];
744
        visitor.visit_map(BorrowedRawDeserializer {
745
            raw_value: Some(raw),
746
        })
747
    }
748
749
    const should_early_return_if_failed: bool = false;
750
751
    #[inline]
752
    #[cold]
753
0
    fn set_failed(&mut self, failed: &mut bool) {
754
0
        self.delegate.set_failed(failed);
755
0
    }
756
}
757
758
//////////////////////////////////////////////////////////////////////////////
759
760
impl<'de, R> private::Sealed for &mut R where R: Read<'de> {}
761
762
impl<'de, R> Read<'de> for &mut R
763
where
764
    R: Read<'de>,
765
{
766
0
    fn next(&mut self) -> Result<Option<u8>> {
767
0
        R::next(self)
768
0
    }
769
770
0
    fn peek(&mut self) -> Result<Option<u8>> {
771
0
        R::peek(self)
772
0
    }
773
774
0
    fn discard(&mut self) {
775
0
        R::discard(self);
776
0
    }
777
778
0
    fn position(&self) -> Position {
779
0
        R::position(self)
780
0
    }
781
782
0
    fn peek_position(&self) -> Position {
783
0
        R::peek_position(self)
784
0
    }
785
786
0
    fn byte_offset(&self) -> usize {
787
0
        R::byte_offset(self)
788
0
    }
789
790
0
    fn parse_str<'s>(&'s mut self, scratch: &'s mut Vec<u8>) -> Result<Reference<'de, 's, str>> {
791
0
        R::parse_str(self, scratch)
792
0
    }
793
794
0
    fn parse_str_raw<'s>(
795
0
        &'s mut self,
796
0
        scratch: &'s mut Vec<u8>,
797
0
    ) -> Result<Reference<'de, 's, [u8]>> {
798
0
        R::parse_str_raw(self, scratch)
799
0
    }
800
801
0
    fn ignore_str(&mut self) -> Result<()> {
802
0
        R::ignore_str(self)
803
0
    }
804
805
0
    fn decode_hex_escape(&mut self) -> Result<u16> {
806
0
        R::decode_hex_escape(self)
807
0
    }
808
809
    #[cfg(feature = "raw_value")]
810
    fn begin_raw_buffering(&mut self) {
811
        R::begin_raw_buffering(self);
812
    }
813
814
    #[cfg(feature = "raw_value")]
815
    fn end_raw_buffering<V>(&mut self, visitor: V) -> Result<V::Value>
816
    where
817
        V: Visitor<'de>,
818
    {
819
        R::end_raw_buffering(self, visitor)
820
    }
821
822
    const should_early_return_if_failed: bool = R::should_early_return_if_failed;
823
824
0
    fn set_failed(&mut self, failed: &mut bool) {
825
0
        R::set_failed(self, failed);
826
0
    }
827
}
828
829
//////////////////////////////////////////////////////////////////////////////
830
831
/// Marker for whether StreamDeserializer can implement FusedIterator.
832
pub trait Fused: private::Sealed {}
833
impl<'a> Fused for SliceRead<'a> {}
834
impl<'a> Fused for StrRead<'a> {}
835
836
0
fn is_escape(ch: u8, including_control_characters: bool) -> bool {
837
0
    ch == b'"' || ch == b'\\' || (including_control_characters && ch < 0x20)
838
0
}
839
840
0
fn next_or_eof<'de, R>(read: &mut R) -> Result<u8>
841
0
where
842
0
    R: ?Sized + Read<'de>,
843
0
{
844
0
    match tri!(read.next()) {
845
0
        Some(b) => Ok(b),
846
0
        None => error(read, ErrorCode::EofWhileParsingString),
847
    }
848
0
}
849
850
0
fn peek_or_eof<'de, R>(read: &mut R) -> Result<u8>
851
0
where
852
0
    R: ?Sized + Read<'de>,
853
0
{
854
0
    match tri!(read.peek()) {
855
0
        Some(b) => Ok(b),
856
0
        None => error(read, ErrorCode::EofWhileParsingString),
857
    }
858
0
}
859
860
0
fn error<'de, R, T>(read: &R, reason: ErrorCode) -> Result<T>
861
0
where
862
0
    R: ?Sized + Read<'de>,
863
0
{
864
0
    let position = read.position();
865
0
    Err(Error::syntax(reason, position.line, position.column))
866
0
}
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, serde_json::read::Reference<[u8]>>
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, serde_json::read::Reference<str>>
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, &str>
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, u8>
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, u16>
Unexecuted instantiation: serde_json::read::error::<serde_json::read::SliceRead, ()>
867
868
0
fn as_str<'de, 's, R: Read<'de>>(read: &R, slice: &'s [u8]) -> Result<&'s str> {
869
0
    str::from_utf8(slice).or_else(|_| error(read, ErrorCode::InvalidUnicodeCodePoint))
870
0
}
871
872
/// Parses a JSON escape sequence and appends it into the scratch space. Assumes
873
/// the previous byte read was a backslash.
874
0
fn parse_escape<'de, R: Read<'de>>(
875
0
    read: &mut R,
876
0
    validate: bool,
877
0
    scratch: &mut Vec<u8>,
878
0
) -> Result<()> {
879
0
    let ch = tri!(next_or_eof(read));
880
881
0
    match ch {
882
0
        b'"' => scratch.push(b'"'),
883
0
        b'\\' => scratch.push(b'\\'),
884
0
        b'/' => scratch.push(b'/'),
885
0
        b'b' => scratch.push(b'\x08'),
886
0
        b'f' => scratch.push(b'\x0c'),
887
0
        b'n' => scratch.push(b'\n'),
888
0
        b'r' => scratch.push(b'\r'),
889
0
        b't' => scratch.push(b'\t'),
890
0
        b'u' => return parse_unicode_escape(read, validate, scratch),
891
0
        _ => return error(read, ErrorCode::InvalidEscape),
892
    }
893
894
0
    Ok(())
895
0
}
896
897
/// Parses a JSON \u escape and appends it into the scratch space. Assumes `\u`
898
/// has just been read.
899
#[cold]
900
0
fn parse_unicode_escape<'de, R: Read<'de>>(
901
0
    read: &mut R,
902
0
    validate: bool,
903
0
    scratch: &mut Vec<u8>,
904
0
) -> Result<()> {
905
0
    let mut n = tri!(read.decode_hex_escape());
906
907
    // Non-BMP characters are encoded as a sequence of two hex escapes,
908
    // representing UTF-16 surrogates. If deserializing a utf-8 string the
909
    // surrogates are required to be paired, whereas deserializing a byte string
910
    // accepts lone surrogates.
911
0
    if validate && n >= 0xDC00 && n <= 0xDFFF {
912
        // XXX: This is actually a trailing surrogate.
913
0
        return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
914
0
    }
915
916
    loop {
917
0
        if n < 0xD800 || n > 0xDBFF {
918
            // Every u16 outside of the surrogate ranges is guaranteed to be a
919
            // legal char.
920
0
            push_wtf8_codepoint(n as u32, scratch);
921
0
            return Ok(());
922
0
        }
923
0
924
0
        // n is a leading surrogate, we now expect a trailing surrogate.
925
0
        let n1 = n;
926
927
0
        if tri!(peek_or_eof(read)) == b'\\' {
928
0
            read.discard();
929
0
        } else {
930
0
            return if validate {
931
0
                read.discard();
932
0
                error(read, ErrorCode::UnexpectedEndOfHexEscape)
933
            } else {
934
0
                push_wtf8_codepoint(n1 as u32, scratch);
935
0
                Ok(())
936
            };
937
        }
938
939
0
        if tri!(peek_or_eof(read)) == b'u' {
940
0
            read.discard();
941
0
        } else {
942
0
            return if validate {
943
0
                read.discard();
944
0
                error(read, ErrorCode::UnexpectedEndOfHexEscape)
945
            } else {
946
0
                push_wtf8_codepoint(n1 as u32, scratch);
947
0
                // The \ prior to this byte started an escape sequence, so we
948
0
                // need to parse that now. This recursive call does not blow the
949
0
                // stack on malicious input because the escape is not \u, so it
950
0
                // will be handled by one of the easy nonrecursive cases.
951
0
                parse_escape(read, validate, scratch)
952
            };
953
        }
954
955
0
        let n2 = tri!(read.decode_hex_escape());
956
957
0
        if n2 < 0xDC00 || n2 > 0xDFFF {
958
0
            if validate {
959
0
                return error(read, ErrorCode::LoneLeadingSurrogateInHexEscape);
960
0
            }
961
0
            push_wtf8_codepoint(n1 as u32, scratch);
962
0
            // If n2 is a leading surrogate, we need to restart.
963
0
            n = n2;
964
0
            continue;
965
0
        }
966
0
967
0
        // This value is in range U+10000..=U+10FFFF, which is always a valid
968
0
        // codepoint.
969
0
        let n = ((((n1 - 0xD800) as u32) << 10) | (n2 - 0xDC00) as u32) + 0x1_0000;
970
0
        push_wtf8_codepoint(n, scratch);
971
0
        return Ok(());
972
    }
973
0
}
974
975
/// Adds a WTF-8 codepoint to the end of the buffer. This is a more efficient
976
/// implementation of String::push. The codepoint may be a surrogate.
977
#[inline]
978
0
fn push_wtf8_codepoint(n: u32, scratch: &mut Vec<u8>) {
979
0
    if n < 0x80 {
980
0
        scratch.push(n as u8);
981
0
        return;
982
0
    }
983
0
984
0
    scratch.reserve(4);
985
0
986
0
    // SAFETY: After the `reserve` call, `scratch` has at least 4 bytes of
987
0
    // allocated but unintialized memory after its last initialized byte, which
988
0
    // is where `ptr` points. All reachable match arms write `encoded_len` bytes
989
0
    // to that region and update the length accordingly, and `encoded_len` is
990
0
    // always <= 4.
991
0
    unsafe {
992
0
        let ptr = scratch.as_mut_ptr().add(scratch.len());
993
994
0
        let encoded_len = match n {
995
0
            0..=0x7F => unreachable!(),
996
0
            0x80..=0x7FF => {
997
0
                ptr.write(((n >> 6) & 0b0001_1111) as u8 | 0b1100_0000);
998
0
                2
999
            }
1000
0
            0x800..=0xFFFF => {
1001
0
                ptr.write(((n >> 12) & 0b0000_1111) as u8 | 0b1110_0000);
1002
0
                ptr.add(1)
1003
0
                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1004
0
                3
1005
            }
1006
0
            0x1_0000..=0x10_FFFF => {
1007
0
                ptr.write(((n >> 18) & 0b0000_0111) as u8 | 0b1111_0000);
1008
0
                ptr.add(1)
1009
0
                    .write(((n >> 12) & 0b0011_1111) as u8 | 0b1000_0000);
1010
0
                ptr.add(2)
1011
0
                    .write(((n >> 6) & 0b0011_1111) as u8 | 0b1000_0000);
1012
0
                4
1013
            }
1014
0
            0x11_0000.. => unreachable!(),
1015
        };
1016
0
        ptr.add(encoded_len - 1)
1017
0
            .write((n & 0b0011_1111) as u8 | 0b1000_0000);
1018
0
1019
0
        scratch.set_len(scratch.len() + encoded_len);
1020
    }
1021
0
}
1022
1023
/// Parses a JSON escape sequence and discards the value. Assumes the previous
1024
/// byte read was a backslash.
1025
0
fn ignore_escape<'de, R>(read: &mut R) -> Result<()>
1026
0
where
1027
0
    R: ?Sized + Read<'de>,
1028
0
{
1029
0
    let ch = tri!(next_or_eof(read));
1030
1031
0
    match ch {
1032
0
        b'"' | b'\\' | b'/' | b'b' | b'f' | b'n' | b'r' | b't' => {}
1033
        b'u' => {
1034
            // At this point we don't care if the codepoint is valid. We just
1035
            // want to consume it. We don't actually know what is valid or not
1036
            // at this point, because that depends on if this string will
1037
            // ultimately be parsed into a string or a byte buffer in the "real"
1038
            // parse.
1039
1040
0
            tri!(read.decode_hex_escape());
1041
        }
1042
        _ => {
1043
0
            return error(read, ErrorCode::InvalidEscape);
1044
        }
1045
    }
1046
1047
0
    Ok(())
1048
0
}
1049
1050
0
const fn decode_hex_val_slow(val: u8) -> Option<u8> {
1051
0
    match val {
1052
0
        b'0'..=b'9' => Some(val - b'0'),
1053
0
        b'A'..=b'F' => Some(val - b'A' + 10),
1054
0
        b'a'..=b'f' => Some(val - b'a' + 10),
1055
0
        _ => None,
1056
    }
1057
0
}
1058
1059
0
const fn build_hex_table(shift: usize) -> [i16; 256] {
1060
0
    let mut table = [0; 256];
1061
0
    let mut ch = 0;
1062
0
    while ch < 256 {
1063
0
        table[ch] = match decode_hex_val_slow(ch as u8) {
1064
0
            Some(val) => (val as i16) << shift,
1065
0
            None => -1,
1066
        };
1067
0
        ch += 1;
1068
    }
1069
0
    table
1070
0
}
1071
1072
static HEX0: [i16; 256] = build_hex_table(0);
1073
static HEX1: [i16; 256] = build_hex_table(4);
1074
1075
0
fn decode_four_hex_digits(a: u8, b: u8, c: u8, d: u8) -> Option<u16> {
1076
0
    let a = HEX1[a as usize] as i32;
1077
0
    let b = HEX0[b as usize] as i32;
1078
0
    let c = HEX1[c as usize] as i32;
1079
0
    let d = HEX0[d as usize] as i32;
1080
0
1081
0
    let codepoint = ((a | b) << 8) | c | d;
1082
0
1083
0
    // A single sign bit check.
1084
0
    if codepoint >= 0 {
1085
0
        Some(codepoint as u16)
1086
    } else {
1087
0
        None
1088
    }
1089
0
}