Coverage Report

Created: 2026-01-10 06:41

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/http/src/uri/path.rs
Line
Count
Source
1
use std::convert::TryFrom;
2
use std::str::FromStr;
3
use std::{cmp, fmt, hash, str};
4
5
use bytes::Bytes;
6
7
use super::{ErrorKind, InvalidUri};
8
use crate::byte_str::ByteStr;
9
10
/// Validation result for path and query parsing.
11
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
12
enum PathAndQueryError {
13
    InvalidPathChar,
14
    InvalidQueryChar,
15
    FragmentNotAllowed,
16
}
17
18
/// Represents the path component of a URI
19
#[derive(Clone)]
20
pub struct PathAndQuery {
21
    pub(super) data: ByteStr,
22
    pub(super) query: u16,
23
}
24
25
const NONE: u16 = u16::MAX;
26
27
impl PathAndQuery {
28
    // Not public while `bytes` is unstable.
29
342
    pub(super) fn from_shared(mut src: Bytes) -> Result<Self, InvalidUri> {
30
342
        let mut query = NONE;
31
342
        let mut fragment = None;
32
33
342
        let mut is_maybe_not_utf8 = false;
34
35
        // block for iterator borrow
36
        {
37
342
            let mut iter = src.as_ref().iter().enumerate();
38
39
            // path ...
40
936k
            for (i, &b) in &mut iter {
41
                // See https://url.spec.whatwg.org/#path-state
42
936k
                match b {
43
                    b'?' => {
44
124
                        debug_assert_eq!(query, NONE);
45
124
                        query = i as u16;
46
124
                        break;
47
                    }
48
                    b'#' => {
49
30
                        fragment = Some(i);
50
30
                        break;
51
                    }
52
53
                    // This is the range of bytes that don't need to be
54
                    // percent-encoded in the path. If it should have been
55
                    // percent-encoded, then error.
56
                    #[rustfmt::skip]
57
                    0x21 |
58
934k
                    0x24..=0x3B |
59
                    0x3D |
60
933k
                    0x40..=0x5F |
61
932k
                    0x61..=0x7A |
62
                    0x7C |
63
855k
                    0x7E => {}
64
65
                    // potentially utf8, might not, should check
66
79.8k
                    0x7F..=0xFF => {
67
79.8k
                        is_maybe_not_utf8 = true;
68
79.8k
                    }
69
70
                    // These are code points that are supposed to be
71
                    // percent-encoded in the path but there are clients
72
                    // out there sending them as is and httparse accepts
73
                    // to parse those requests, so they are allowed here
74
                    // for parity.
75
                    //
76
                    // For reference, those are code points that are used
77
                    // to send requests with JSON directly embedded in
78
                    // the URI path. Yes, those things happen for real.
79
                    #[rustfmt::skip]
80
                    b'"' |
81
748
                    b'{' | b'}' => {}
82
83
50
                    _ => return Err(ErrorKind::InvalidUriChar.into()),
84
                }
85
            }
86
87
            // query ...
88
292
            if query != NONE {
89
76.0k
                for (i, &b) in iter {
90
76.0k
                    match b {
91
                        // While queries *should* be percent-encoded, most
92
                        // bytes are actually allowed...
93
                        // See https://url.spec.whatwg.org/#query-state
94
                        //
95
                        // Allowed: 0x21 / 0x24 - 0x3B / 0x3D / 0x3F - 0x7E
96
                        #[rustfmt::skip]
97
                        0x21 |
98
55.6k
                        0x24..=0x3B |
99
                        0x3D |
100
67.0k
                        0x3F..=0x7E => {}
101
102
8.92k
                        0x7F..=0xFF => {
103
8.92k
                            is_maybe_not_utf8 = true;
104
8.92k
                        }
105
106
                        b'#' => {
107
16
                            fragment = Some(i);
108
16
                            break;
109
                        }
110
111
41
                        _ => return Err(ErrorKind::InvalidUriChar.into()),
112
                    }
113
                }
114
168
            }
115
        }
116
117
251
        if let Some(i) = fragment {
118
46
            src.truncate(i);
119
205
        }
120
121
251
        let data = if is_maybe_not_utf8 {
122
68
            ByteStr::from_utf8(src).map_err(|_| ErrorKind::InvalidUriChar)?
123
        } else {
124
183
            unsafe { ByteStr::from_utf8_unchecked(src) }
125
        };
126
127
184
        Ok(PathAndQuery { data, query })
128
342
    }
129
130
    /// Convert a `PathAndQuery` from a static string.
131
    ///
132
    /// This function will not perform any copying, however the string is
133
    /// checked to ensure that it is valid.
134
    ///
135
    /// # Panics
136
    ///
137
    /// This function panics if the argument is an invalid path and query.
138
    ///
139
    /// # Examples
140
    ///
141
    /// ```
142
    /// # use http::uri::*;
143
    /// let v = PathAndQuery::from_static("/hello?world");
144
    ///
145
    /// assert_eq!(v.path(), "/hello");
146
    /// assert_eq!(v.query(), Some("world"));
147
    /// ```
148
    #[inline]
149
    pub const fn from_static(src: &'static str) -> Self {
150
        match validate_path_and_query_bytes(src.as_bytes()) {
151
            Ok(query) => PathAndQuery {
152
                data: ByteStr::from_static(src),
153
                query,
154
            },
155
            Err(_) => panic!("static str is not valid path"),
156
        }
157
    }
158
159
    /// Attempt to convert a `Bytes` buffer to a `PathAndQuery`.
160
    ///
161
    /// This will try to prevent a copy if the type passed is the type used
162
    /// internally, and will copy the data if it is not.
163
    pub fn from_maybe_shared<T>(src: T) -> Result<Self, InvalidUri>
164
    where
165
        T: AsRef<[u8]> + 'static,
166
    {
167
        if_downcast_into!(T, Bytes, src, {
168
            return PathAndQuery::from_shared(src);
169
        });
170
171
        PathAndQuery::try_from(src.as_ref())
172
    }
173
174
1.07k
    pub(super) fn empty() -> Self {
175
1.07k
        PathAndQuery {
176
1.07k
            data: ByteStr::new(),
177
1.07k
            query: NONE,
178
1.07k
        }
179
1.07k
    }
180
181
6.50k
    pub(super) fn slash() -> Self {
182
6.50k
        PathAndQuery {
183
6.50k
            data: ByteStr::from_static("/"),
184
6.50k
            query: NONE,
185
6.50k
        }
186
6.50k
    }
187
188
15
    pub(super) fn star() -> Self {
189
15
        PathAndQuery {
190
15
            data: ByteStr::from_static("*"),
191
15
            query: NONE,
192
15
        }
193
15
    }
194
195
    /// Returns the path component
196
    ///
197
    /// The path component is **case sensitive**.
198
    ///
199
    /// ```notrust
200
    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
201
    ///                                        |--------|
202
    ///                                             |
203
    ///                                           path
204
    /// ```
205
    ///
206
    /// If the URI is `*` then the path component is equal to `*`.
207
    ///
208
    /// # Examples
209
    ///
210
    /// ```
211
    /// # use http::uri::*;
212
    ///
213
    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
214
    ///
215
    /// assert_eq!(path_and_query.path(), "/hello/world");
216
    /// ```
217
    #[inline]
218
0
    pub fn path(&self) -> &str {
219
0
        let ret = if self.query == NONE {
220
0
            &self.data[..]
221
        } else {
222
0
            &self.data[..self.query as usize]
223
        };
224
225
0
        if ret.is_empty() {
226
0
            return "/";
227
0
        }
228
229
0
        ret
230
0
    }
231
232
    /// Returns the query string component
233
    ///
234
    /// The query component contains non-hierarchical data that, along with data
235
    /// in the path component, serves to identify a resource within the scope of
236
    /// the URI's scheme and naming authority (if any). The query component is
237
    /// indicated by the first question mark ("?") character and terminated by a
238
    /// number sign ("#") character or by the end of the URI.
239
    ///
240
    /// ```notrust
241
    /// abc://username:password@example.com:123/path/data?key=value&key2=value2#fragid1
242
    ///                                                   |-------------------|
243
    ///                                                             |
244
    ///                                                           query
245
    /// ```
246
    ///
247
    /// # Examples
248
    ///
249
    /// With a query string component
250
    ///
251
    /// ```
252
    /// # use http::uri::*;
253
    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
254
    ///
255
    /// assert_eq!(path_and_query.query(), Some("key=value&foo=bar"));
256
    /// ```
257
    ///
258
    /// Without a query string component
259
    ///
260
    /// ```
261
    /// # use http::uri::*;
262
    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
263
    ///
264
    /// assert!(path_and_query.query().is_none());
265
    /// ```
266
    #[inline]
267
0
    pub fn query(&self) -> Option<&str> {
268
0
        if self.query == NONE {
269
0
            None
270
        } else {
271
0
            let i = self.query + 1;
272
0
            Some(&self.data[i as usize..])
273
        }
274
0
    }
275
276
    /// Returns the path and query as a string component.
277
    ///
278
    /// # Examples
279
    ///
280
    /// With a query string component
281
    ///
282
    /// ```
283
    /// # use http::uri::*;
284
    /// let path_and_query: PathAndQuery = "/hello/world?key=value&foo=bar".parse().unwrap();
285
    ///
286
    /// assert_eq!(path_and_query.as_str(), "/hello/world?key=value&foo=bar");
287
    /// ```
288
    ///
289
    /// Without a query string component
290
    ///
291
    /// ```
292
    /// # use http::uri::*;
293
    /// let path_and_query: PathAndQuery = "/hello/world".parse().unwrap();
294
    ///
295
    /// assert_eq!(path_and_query.as_str(), "/hello/world");
296
    /// ```
297
    #[inline]
298
    pub fn as_str(&self) -> &str {
299
        let ret = &self.data[..];
300
        if ret.is_empty() {
301
            return "/";
302
        }
303
        ret
304
    }
305
}
306
307
impl<'a> TryFrom<&'a [u8]> for PathAndQuery {
308
    type Error = InvalidUri;
309
    #[inline]
310
    fn try_from(s: &'a [u8]) -> Result<Self, Self::Error> {
311
        PathAndQuery::from_shared(Bytes::copy_from_slice(s))
312
    }
313
}
314
315
impl<'a> TryFrom<&'a str> for PathAndQuery {
316
    type Error = InvalidUri;
317
    #[inline]
318
    fn try_from(s: &'a str) -> Result<Self, Self::Error> {
319
        TryFrom::try_from(s.as_bytes())
320
    }
321
}
322
323
impl TryFrom<Vec<u8>> for PathAndQuery {
324
    type Error = InvalidUri;
325
    #[inline]
326
    fn try_from(vec: Vec<u8>) -> Result<Self, Self::Error> {
327
        PathAndQuery::from_shared(vec.into())
328
    }
329
}
330
331
impl TryFrom<String> for PathAndQuery {
332
    type Error = InvalidUri;
333
    #[inline]
334
    fn try_from(s: String) -> Result<Self, Self::Error> {
335
        PathAndQuery::from_shared(s.into())
336
    }
337
}
338
339
impl TryFrom<&String> for PathAndQuery {
340
    type Error = InvalidUri;
341
    #[inline]
342
    fn try_from(s: &String) -> Result<Self, Self::Error> {
343
        TryFrom::try_from(s.as_bytes())
344
    }
345
}
346
347
impl FromStr for PathAndQuery {
348
    type Err = InvalidUri;
349
    #[inline]
350
    fn from_str(s: &str) -> Result<Self, InvalidUri> {
351
        TryFrom::try_from(s)
352
    }
353
}
354
355
impl fmt::Debug for PathAndQuery {
356
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
357
0
        fmt::Display::fmt(self, f)
358
0
    }
359
}
360
361
impl fmt::Display for PathAndQuery {
362
0
    fn fmt(&self, fmt: &mut fmt::Formatter<'_>) -> fmt::Result {
363
0
        if !self.data.is_empty() {
364
0
            match self.data.as_bytes()[0] {
365
0
                b'/' | b'*' => write!(fmt, "{}", &self.data[..]),
366
0
                _ => write!(fmt, "/{}", &self.data[..]),
367
            }
368
        } else {
369
0
            write!(fmt, "/")
370
        }
371
0
    }
372
}
373
374
impl hash::Hash for PathAndQuery {
375
    fn hash<H: hash::Hasher>(&self, state: &mut H) {
376
        self.data.hash(state);
377
    }
378
}
379
380
// ===== PartialEq / PartialOrd =====
381
382
impl PartialEq for PathAndQuery {
383
    #[inline]
384
    fn eq(&self, other: &PathAndQuery) -> bool {
385
        self.data == other.data
386
    }
387
}
388
389
impl Eq for PathAndQuery {}
390
391
impl PartialEq<str> for PathAndQuery {
392
    #[inline]
393
    fn eq(&self, other: &str) -> bool {
394
        self.as_str() == other
395
    }
396
}
397
398
impl<'a> PartialEq<PathAndQuery> for &'a str {
399
    #[inline]
400
    fn eq(&self, other: &PathAndQuery) -> bool {
401
        self == &other.as_str()
402
    }
403
}
404
405
impl<'a> PartialEq<&'a str> for PathAndQuery {
406
    #[inline]
407
    fn eq(&self, other: &&'a str) -> bool {
408
        self.as_str() == *other
409
    }
410
}
411
412
impl PartialEq<PathAndQuery> for str {
413
    #[inline]
414
    fn eq(&self, other: &PathAndQuery) -> bool {
415
        self == other.as_str()
416
    }
417
}
418
419
impl PartialEq<String> for PathAndQuery {
420
    #[inline]
421
    fn eq(&self, other: &String) -> bool {
422
        self.as_str() == other.as_str()
423
    }
424
}
425
426
impl PartialEq<PathAndQuery> for String {
427
    #[inline]
428
    fn eq(&self, other: &PathAndQuery) -> bool {
429
        self.as_str() == other.as_str()
430
    }
431
}
432
433
impl PartialOrd for PathAndQuery {
434
    #[inline]
435
    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
436
        self.as_str().partial_cmp(other.as_str())
437
    }
438
}
439
440
impl PartialOrd<str> for PathAndQuery {
441
    #[inline]
442
    fn partial_cmp(&self, other: &str) -> Option<cmp::Ordering> {
443
        self.as_str().partial_cmp(other)
444
    }
445
}
446
447
impl PartialOrd<PathAndQuery> for str {
448
    #[inline]
449
    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
450
        self.partial_cmp(other.as_str())
451
    }
452
}
453
454
impl<'a> PartialOrd<&'a str> for PathAndQuery {
455
    #[inline]
456
    fn partial_cmp(&self, other: &&'a str) -> Option<cmp::Ordering> {
457
        self.as_str().partial_cmp(*other)
458
    }
459
}
460
461
impl<'a> PartialOrd<PathAndQuery> for &'a str {
462
    #[inline]
463
    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
464
        self.partial_cmp(&other.as_str())
465
    }
466
}
467
468
impl PartialOrd<String> for PathAndQuery {
469
    #[inline]
470
    fn partial_cmp(&self, other: &String) -> Option<cmp::Ordering> {
471
        self.as_str().partial_cmp(other.as_str())
472
    }
473
}
474
475
impl PartialOrd<PathAndQuery> for String {
476
    #[inline]
477
    fn partial_cmp(&self, other: &PathAndQuery) -> Option<cmp::Ordering> {
478
        self.as_str().partial_cmp(other.as_str())
479
    }
480
}
481
482
/// Shared validation logic for path and query bytes.
483
/// Returns the query position (or NONE), or an error.
484
0
const fn validate_path_and_query_bytes(bytes: &[u8]) -> Result<u16, PathAndQueryError> {
485
0
    let mut query: u16 = NONE;
486
0
    let mut i: usize = 0;
487
488
    // path ...
489
0
    while i < bytes.len() {
490
0
        let b = bytes[i];
491
0
        if b == b'?' {
492
0
            query = i as u16;
493
0
            i += 1;
494
0
            break;
495
0
        } else if b == b'#' {
496
0
            return Err(PathAndQueryError::FragmentNotAllowed);
497
        } else {
498
0
            let allowed = b == 0x21
499
0
                || (b >= 0x24 && b <= 0x3B)
500
0
                || b == 0x3D
501
0
                || (b >= 0x40 && b <= 0x5F)
502
0
                || (b >= 0x61 && b <= 0x7A)
503
0
                || b == 0x7C
504
0
                || b == 0x7E
505
0
                || b == b'"'
506
0
                || b == b'{'
507
0
                || b == b'}'
508
0
                || (b >= 0x7F);
509
510
0
            if !allowed {
511
0
                return Err(PathAndQueryError::InvalidPathChar);
512
0
            }
513
        }
514
0
        i += 1;
515
    }
516
517
    // query ...
518
0
    if query != NONE {
519
0
        while i < bytes.len() {
520
0
            let b = bytes[i];
521
0
            if b == b'#' {
522
0
                return Err(PathAndQueryError::FragmentNotAllowed);
523
0
            }
524
525
0
            let allowed = b == 0x21
526
0
                || (b >= 0x24 && b <= 0x3B)
527
0
                || b == 0x3D
528
0
                || (b >= 0x3F && b <= 0x7E)
529
0
                || (b >= 0x7F);
530
531
0
            if !allowed {
532
0
                return Err(PathAndQueryError::InvalidQueryChar);
533
0
            }
534
535
0
            i += 1;
536
        }
537
0
    }
538
539
0
    Ok(query)
540
0
}
541
542
#[cfg(test)]
543
mod tests {
544
    use super::*;
545
546
    #[test]
547
    fn equal_to_self_of_same_path() {
548
        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
549
        let p2: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
550
        assert_eq!(p1, p2);
551
        assert_eq!(p2, p1);
552
    }
553
554
    #[test]
555
    fn not_equal_to_self_of_different_path() {
556
        let p1: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
557
        let p2: PathAndQuery = "/world&foo=bar".parse().unwrap();
558
        assert_ne!(p1, p2);
559
        assert_ne!(p2, p1);
560
    }
561
562
    #[test]
563
    fn equates_with_a_str() {
564
        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
565
        assert_eq!(&path_and_query, "/hello/world&foo=bar");
566
        assert_eq!("/hello/world&foo=bar", &path_and_query);
567
        assert_eq!(path_and_query, "/hello/world&foo=bar");
568
        assert_eq!("/hello/world&foo=bar", path_and_query);
569
    }
570
571
    #[test]
572
    fn not_equal_with_a_str_of_a_different_path() {
573
        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
574
        // as a reference
575
        assert_ne!(&path_and_query, "/hello&foo=bar");
576
        assert_ne!("/hello&foo=bar", &path_and_query);
577
        // without reference
578
        assert_ne!(path_and_query, "/hello&foo=bar");
579
        assert_ne!("/hello&foo=bar", path_and_query);
580
    }
581
582
    #[test]
583
    fn equates_with_a_string() {
584
        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
585
        assert_eq!(path_and_query, "/hello/world&foo=bar".to_string());
586
        assert_eq!("/hello/world&foo=bar".to_string(), path_and_query);
587
    }
588
589
    #[test]
590
    fn not_equal_with_a_string_of_a_different_path() {
591
        let path_and_query: PathAndQuery = "/hello/world&foo=bar".parse().unwrap();
592
        assert_ne!(path_and_query, "/hello&foo=bar".to_string());
593
        assert_ne!("/hello&foo=bar".to_string(), path_and_query);
594
    }
595
596
    #[test]
597
    fn compares_to_self() {
598
        let p1: PathAndQuery = "/a/world&foo=bar".parse().unwrap();
599
        let p2: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
600
        assert!(p1 < p2);
601
        assert!(p2 > p1);
602
    }
603
604
    #[test]
605
    fn compares_with_a_str() {
606
        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
607
        // by ref
608
        assert!(&path_and_query < "/c/world&foo=bar");
609
        assert!("/c/world&foo=bar" > &path_and_query);
610
        assert!(&path_and_query > "/a/world&foo=bar");
611
        assert!("/a/world&foo=bar" < &path_and_query);
612
613
        // by val
614
        assert!(path_and_query < "/c/world&foo=bar");
615
        assert!("/c/world&foo=bar" > path_and_query);
616
        assert!(path_and_query > "/a/world&foo=bar");
617
        assert!("/a/world&foo=bar" < path_and_query);
618
    }
619
620
    #[test]
621
    fn compares_with_a_string() {
622
        let path_and_query: PathAndQuery = "/b/world&foo=bar".parse().unwrap();
623
        assert!(path_and_query < "/c/world&foo=bar".to_string());
624
        assert!("/c/world&foo=bar".to_string() > path_and_query);
625
        assert!(path_and_query > "/a/world&foo=bar".to_string());
626
        assert!("/a/world&foo=bar".to_string() < path_and_query);
627
    }
628
629
    #[test]
630
    fn ignores_valid_percent_encodings() {
631
        assert_eq!("/a%20b", pq("/a%20b?r=1").path());
632
        assert_eq!("qr=%31", pq("/a/b?qr=%31").query().unwrap());
633
    }
634
635
    #[test]
636
    fn ignores_invalid_percent_encodings() {
637
        assert_eq!("/a%%b", pq("/a%%b?r=1").path());
638
        assert_eq!("/aaa%", pq("/aaa%").path());
639
        assert_eq!("/aaa%", pq("/aaa%?r=1").path());
640
        assert_eq!("/aa%2", pq("/aa%2").path());
641
        assert_eq!("/aa%2", pq("/aa%2?r=1").path());
642
        assert_eq!("qr=%3", pq("/a/b?qr=%3").query().unwrap());
643
    }
644
645
    #[test]
646
    fn allow_utf8_in_path() {
647
        assert_eq!("/🍕", pq("/🍕").path());
648
    }
649
650
    #[test]
651
    fn allow_utf8_in_query() {
652
        assert_eq!(Some("pizza=🍕"), pq("/test?pizza=🍕").query());
653
    }
654
655
    #[test]
656
    fn rejects_invalid_utf8_in_path() {
657
        PathAndQuery::try_from(&[b'/', 0xFF][..]).expect_err("reject invalid utf8");
658
    }
659
660
    #[test]
661
    fn rejects_invalid_utf8_in_query() {
662
        PathAndQuery::try_from(&[b'/', b'a', b'?', 0xFF][..]).expect_err("reject invalid utf8");
663
    }
664
665
    #[test]
666
    fn json_is_fine() {
667
        assert_eq!(
668
            r#"/{"bread":"baguette"}"#,
669
            pq(r#"/{"bread":"baguette"}"#).path()
670
        );
671
    }
672
673
    fn pq(s: &str) -> PathAndQuery {
674
        s.parse().expect(&format!("parsing {}", s))
675
    }
676
}