/rust/registry/src/index.crates.io-1949cf8c6b5b557f/iri-string-0.7.9/src/normalize.rs
Line | Count | Source |
1 | | //! Normalization. |
2 | | //! |
3 | | //! # IRI normalization (and resolution) can fail |
4 | | //! |
5 | | //! Though this is not explicitly stated in RFC 3986, IRI normalization can fail. |
6 | | //! For example, `foo:.///bar`, `foo:./..//bar`, and `foo:/..//bar` are all |
7 | | //! normalized to `foo://bar` as a string. However, IRI without authority (note |
8 | | //! that this is different from "with empty authority") cannot have a path |
9 | | //! starting with `//`, since it is ambiguous and can be interpreted as an IRI |
10 | | //! with authority. So, `foo://bar` is decomposed as scheme `foo`, authority |
11 | | //! `bar`, and empty path. The expected result is the combination of scheme |
12 | | //! `foo`, no authority, and path `//bar` (though this is not possible to |
13 | | //! serialize), so the algorithm fails as it cannot return the intended result. |
14 | | //! |
15 | | //! IRI resolution can also fail since it (conditionally) invokes normalization |
16 | | //! during the resolution process. For example, resolving a reference `.///bar` |
17 | | //! or `/..//bar` against the base `foo:` fail. |
18 | | //! |
19 | | //! Thus, IRI resolution can fail for some abnormal cases. |
20 | | //! |
21 | | //! Note that this kind of failure can happen only when the base IRI has no |
22 | | //! authority and empty path. This would be rare in the wild, since many people |
23 | | //! would use an IRI with authority part, such as `http://`. |
24 | | //! |
25 | | //! If you are handling `scheme://`-style URIs and IRIs, don't worry about the |
26 | | //! failure. Currently no cases are known to fail when at least one of the base |
27 | | //! IRI or the relative IRI contains authorities. |
28 | | //! |
29 | | //! To know what will happen on resolution failure, see the module documentation |
30 | | //! for [`resolve`][`crate::resolve`]. |
31 | | //! |
32 | | //! ## Examples |
33 | | //! |
34 | | //! ### Normalization failure |
35 | | //! |
36 | | //! ``` |
37 | | //! # #[cfg(feature = "alloc")] { |
38 | | //! use iri_string::normalize::Error; |
39 | | //! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; |
40 | | //! |
41 | | //! let base = IriAbsoluteStr::new("foo:.///bar")?; |
42 | | //! assert!( |
43 | | //! base.normalize().ensure_rfc3986_normalizable().is_err(), |
44 | | //! "this normalization should fails without WAHTWG URL Standard serialization" |
45 | | //! ); |
46 | | //! # } |
47 | | //! # Ok::<_, iri_string::validate::Error>(()) |
48 | | //! ``` |
49 | | //! |
50 | | //! ### Resolution failure |
51 | | //! |
52 | | //! ``` |
53 | | //! # #[cfg(feature = "alloc")] { |
54 | | //! use iri_string::types::{IriAbsoluteStr, IriReferenceStr}; |
55 | | //! |
56 | | //! let base = IriAbsoluteStr::new("scheme:")?; |
57 | | //! { |
58 | | //! let reference = IriReferenceStr::new(".///bar")?; |
59 | | //! let result = reference.resolve_against(base) |
60 | | //! .ensure_rfc3986_normalizable(); |
61 | | //! assert!(result.is_err()); |
62 | | //! } |
63 | | //! |
64 | | //! { |
65 | | //! let reference2 = IriReferenceStr::new("/..//bar")?; |
66 | | //! // Resulting string will be `scheme://bar`, but `bar` should be a path |
67 | | //! // segment, not a host. So, the semantically correct target IRI cannot |
68 | | //! // be represented. |
69 | | //! let result2 = reference2.resolve_against(base) |
70 | | //! .ensure_rfc3986_normalizable(); |
71 | | //! assert!(result2.is_err()); |
72 | | //! } |
73 | | //! # } |
74 | | //! # Ok::<_, iri_string::validate::Error>(()) |
75 | | //! ``` |
76 | | |
77 | | mod error; |
78 | | mod path; |
79 | | mod pct_case; |
80 | | |
81 | | use core::fmt::{self, Display as _, Write as _}; |
82 | | use core::marker::PhantomData; |
83 | | |
84 | | #[cfg(feature = "alloc")] |
85 | | use alloc::collections::TryReserveError; |
86 | | |
87 | | use crate::components::{RiReferenceComponents, Splitter}; |
88 | | #[cfg(feature = "alloc")] |
89 | | use crate::format::{ToDedicatedString, ToStringFallible}; |
90 | | use crate::parser::str::rfind_split_hole; |
91 | | use crate::parser::trusted::is_ascii_only_host; |
92 | | use crate::spec::Spec; |
93 | | use crate::types::{RiAbsoluteStr, RiReferenceStr, RiStr}; |
94 | | #[cfg(feature = "alloc")] |
95 | | use crate::types::{RiAbsoluteString, RiString}; |
96 | | |
97 | | pub use self::error::Error; |
98 | | pub(crate) use self::path::{Path, PathCharacteristic, PathToNormalize}; |
99 | | pub(crate) use self::pct_case::{ |
100 | | is_pct_case_normalized, NormalizedAsciiOnlyHost, PctCaseNormalized, |
101 | | }; |
102 | | |
103 | | /// Normalization algorithm. |
104 | | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
105 | | pub(crate) enum NormalizationMode { |
106 | | /// No normalization. |
107 | | None, |
108 | | /// Default normalization mode. |
109 | | /// |
110 | | /// Applies RFC 3986 normalization whenever possible. When not possible, |
111 | | /// applies serialization algorithm defined in WHATWG URL standard. |
112 | | Default, |
113 | | /// WHATWG-like normalization mode. |
114 | | /// |
115 | | /// Preserves relative path as is (modulo case/pct normalization) when the |
116 | | /// authority component is absent. |
117 | | PreserveAuthoritylessRelativePath, |
118 | | } |
119 | | |
120 | | impl NormalizationMode { |
121 | | /// Returns true if case normalization and percent-encoding normalization should be applied. |
122 | | /// |
123 | | /// Note that even when this option is `true`, plain US-ASCII characters |
124 | | /// won't be automatically lowered. Users should apply case normalization |
125 | | /// for US-ASCII only `host` component by themselves. |
126 | | #[inline] |
127 | | #[must_use] |
128 | 0 | fn case_pct_normalization(self) -> bool { |
129 | 0 | match self { |
130 | 0 | Self::None => false, |
131 | 0 | Self::Default | Self::PreserveAuthoritylessRelativePath => true, |
132 | | } |
133 | 0 | } Unexecuted instantiation: <iri_string::normalize::NormalizationMode>::case_pct_normalization Unexecuted instantiation: <iri_string::normalize::NormalizationMode>::case_pct_normalization |
134 | | } |
135 | | |
136 | | /// Normalizedness check algorithm. |
137 | | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
138 | | pub(crate) enum NormalizednessCheckMode { |
139 | | /// Default algorithm (corresponding to [`NormalizationMode::Default`]). |
140 | | Default, |
141 | | /// Strict RFC 3986 normalization. |
142 | | Rfc3986, |
143 | | /// WHATWG-like normalization algorithm (corresponding to |
144 | | /// [`NormalizationMode::PreserveAuthoritylessRelativePath`]). |
145 | | PreserveAuthoritylessRelativePath, |
146 | | } |
147 | | |
148 | | /// Normalization operation. |
149 | | #[derive(Debug, Clone, Copy, PartialEq, Eq)] |
150 | | pub(crate) struct NormalizationOp { |
151 | | /// Normalization mode. |
152 | | pub(crate) mode: NormalizationMode, |
153 | | } |
154 | | |
155 | | /// Spec-agnostic IRI normalization/resolution input. |
156 | | #[derive(Debug, Clone, Copy)] |
157 | | pub(crate) struct NormalizationInput<'a> { |
158 | | /// Target scheme. |
159 | | scheme: &'a str, |
160 | | /// Target authority. |
161 | | authority: Option<&'a str>, |
162 | | /// Target path without dot-removal. |
163 | | path: Path<'a>, |
164 | | /// Target query. |
165 | | query: Option<&'a str>, |
166 | | /// Target fragment. |
167 | | fragment: Option<&'a str>, |
168 | | /// Normalization type. |
169 | | op: NormalizationOp, |
170 | | } |
171 | | |
172 | | impl<'a> NormalizationInput<'a> { |
173 | | /// Creates a `NormalizedInput` from IRIs to resolve. |
174 | | #[inline] |
175 | | #[must_use] |
176 | 0 | pub(crate) fn with_resolution_params<S: Spec>( |
177 | 0 | base_components: &RiReferenceComponents<'a, S>, |
178 | 0 | reference: &'a RiReferenceStr<S>, |
179 | 0 | ) -> Self { |
180 | 0 | let r = RiReferenceComponents::from(reference); |
181 | | |
182 | 0 | Self::create_normalization_input( |
183 | 0 | r.iri.as_str(), |
184 | 0 | &r.splitter, |
185 | 0 | base_components.iri.as_str(), |
186 | 0 | &base_components.splitter, |
187 | | ) |
188 | 0 | } Unexecuted instantiation: <iri_string::normalize::NormalizationInput>::with_resolution_params::<iri_string::spec::UriSpec> Unexecuted instantiation: <iri_string::normalize::NormalizationInput>::with_resolution_params::<_> |
189 | | |
190 | | /// Creates a `NormalizationInput` from components to resolve an IRI. |
191 | | #[must_use] |
192 | 0 | fn create_normalization_input( |
193 | 0 | r_iri: &'a str, |
194 | 0 | r: &Splitter, |
195 | 0 | b_iri: &'a str, |
196 | 0 | b: &Splitter, |
197 | 0 | ) -> Self { |
198 | | /// The toplevel component the reference has. |
199 | | #[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] |
200 | | enum RefToplevel { |
201 | | /// Scheme. |
202 | | Scheme, |
203 | | /// Authority. |
204 | | Authority, |
205 | | /// Path. |
206 | | Path, |
207 | | /// Query. |
208 | | Query, |
209 | | /// Reference is empty or has only fragment. |
210 | | None, |
211 | | } |
212 | | |
213 | | impl RefToplevel { |
214 | | /// Choose a component from either of the reference or the base, |
215 | | /// based on the toplevel component of the reference. |
216 | | #[inline] |
217 | | #[must_use] |
218 | 0 | fn choose_then<T, F, G>(self, component: RefToplevel, reference: F, base: G) -> T |
219 | 0 | where |
220 | 0 | F: FnOnce() -> T, |
221 | 0 | G: FnOnce() -> T, |
222 | | { |
223 | 0 | if self <= component { |
224 | 0 | reference() |
225 | | } else { |
226 | 0 | base() |
227 | | } |
228 | 0 | } Unexecuted instantiation: <<iri_string::normalize::NormalizationInput>::create_normalization_input::RefToplevel>::choose_then::<core::option::Option<&str>, <iri_string::normalize::NormalizationInput>::create_normalization_input::{closure#3}, <iri_string::normalize::NormalizationInput>::create_normalization_input::{closure#4}>Unexecuted instantiation: <<iri_string::normalize::NormalizationInput>::create_normalization_input::RefToplevel>::choose_then::<core::option::Option<&str>, <iri_string::normalize::NormalizationInput>::create_normalization_input::{closure#1}, <iri_string::normalize::NormalizationInput>::create_normalization_input::{closure#2}> |
229 | | } |
230 | | |
231 | 0 | let ref_toplevel = if r.has_scheme() { |
232 | 0 | RefToplevel::Scheme |
233 | 0 | } else if r.has_authority() { |
234 | 0 | RefToplevel::Authority |
235 | 0 | } else if !r.is_path_empty(r_iri.len()) { |
236 | 0 | RefToplevel::Path |
237 | 0 | } else if r.has_query() { |
238 | 0 | RefToplevel::Query |
239 | | } else { |
240 | 0 | RefToplevel::None |
241 | | }; |
242 | | |
243 | 0 | let path = match ref_toplevel { |
244 | | RefToplevel::Scheme | RefToplevel::Authority => { |
245 | 0 | Path::NeedsProcessing(PathToNormalize::from_single_path(r.path_str(r_iri))) |
246 | | } |
247 | | RefToplevel::Path => { |
248 | 0 | let r_path = r.path_str(r_iri); |
249 | 0 | if r_path.starts_with('/') { |
250 | 0 | Path::NeedsProcessing(PathToNormalize::from_single_path(r_path)) |
251 | | } else { |
252 | | // About this branch, see |
253 | | // <https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.3>. |
254 | | // |
255 | | // > o If the base URI has a defined authority component and an empty |
256 | | // > path, then return a string consisting of "/" concatenated with the |
257 | | // > reference's path; otherwise, |
258 | 0 | let b_path = b.path_str(b_iri); |
259 | 0 | let b_path = if b.has_authority() && b_path.is_empty() { |
260 | 0 | "/" |
261 | | } else { |
262 | 0 | b_path |
263 | | }; |
264 | 0 | Path::NeedsProcessing(PathToNormalize::from_paths_to_be_resolved( |
265 | 0 | b_path, r_path, |
266 | 0 | )) |
267 | | } |
268 | | } |
269 | 0 | RefToplevel::Query | RefToplevel::None => Path::Done(b.path_str(b_iri)), |
270 | | }; |
271 | | |
272 | | Self { |
273 | 0 | scheme: r.scheme_str(r_iri).unwrap_or_else(|| { |
274 | 0 | b.scheme_str(b_iri) |
275 | 0 | .expect("[validity] non-relative IRI must have a scheme") |
276 | 0 | }), |
277 | 0 | authority: ref_toplevel.choose_then( |
278 | 0 | RefToplevel::Authority, |
279 | 0 | || r.authority_str(r_iri), |
280 | 0 | || b.authority_str(b_iri), |
281 | | ), |
282 | 0 | path, |
283 | 0 | query: ref_toplevel.choose_then( |
284 | 0 | RefToplevel::Query, |
285 | 0 | || r.query_str(r_iri), |
286 | 0 | || b.query_str(b_iri), |
287 | | ), |
288 | 0 | fragment: r.fragment_str(r_iri), |
289 | 0 | op: NormalizationOp { |
290 | 0 | mode: NormalizationMode::None, |
291 | 0 | }, |
292 | | } |
293 | 0 | } |
294 | | } |
295 | | |
296 | | impl<'a, S: Spec> From<&'a RiStr<S>> for NormalizationInput<'a> { |
297 | 0 | fn from(iri: &'a RiStr<S>) -> Self { |
298 | 0 | let components = RiReferenceComponents::<S>::from(iri.as_ref()); |
299 | 0 | let (scheme, authority, path, query, fragment) = components.to_major(); |
300 | 0 | let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); |
301 | 0 | let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); |
302 | | |
303 | 0 | NormalizationInput { |
304 | 0 | scheme, |
305 | 0 | authority, |
306 | 0 | path, |
307 | 0 | query, |
308 | 0 | fragment, |
309 | 0 | op: NormalizationOp { |
310 | 0 | mode: NormalizationMode::None, |
311 | 0 | }, |
312 | 0 | } |
313 | 0 | } |
314 | | } |
315 | | |
316 | | #[cfg(feature = "alloc")] |
317 | | impl<'a, S: Spec> From<&'a RiString<S>> for NormalizationInput<'a> { |
318 | | #[inline] |
319 | 0 | fn from(iri: &'a RiString<S>) -> Self { |
320 | 0 | Self::from(iri.as_slice()) |
321 | 0 | } |
322 | | } |
323 | | |
324 | | impl<'a, S: Spec> From<&'a RiAbsoluteStr<S>> for NormalizationInput<'a> { |
325 | 0 | fn from(iri: &'a RiAbsoluteStr<S>) -> Self { |
326 | 0 | let components = RiReferenceComponents::<S>::from(iri.as_ref()); |
327 | 0 | let (scheme, authority, path, query, fragment) = components.to_major(); |
328 | 0 | let scheme = scheme.expect("[validity] `absolute IRI must have `scheme`"); |
329 | 0 | let path = Path::NeedsProcessing(PathToNormalize::from_single_path(path)); |
330 | | |
331 | 0 | NormalizationInput { |
332 | 0 | scheme, |
333 | 0 | authority, |
334 | 0 | path, |
335 | 0 | query, |
336 | 0 | fragment, |
337 | 0 | op: NormalizationOp { |
338 | 0 | mode: NormalizationMode::None, |
339 | 0 | }, |
340 | 0 | } |
341 | 0 | } |
342 | | } |
343 | | |
344 | | #[cfg(feature = "alloc")] |
345 | | impl<'a, S: Spec> From<&'a RiAbsoluteString<S>> for NormalizationInput<'a> { |
346 | | #[inline] |
347 | 0 | fn from(iri: &'a RiAbsoluteString<S>) -> Self { |
348 | 0 | Self::from(iri.as_slice()) |
349 | 0 | } |
350 | | } |
351 | | |
352 | | impl NormalizationInput<'_> { |
353 | | /// Checks if the path is normalizable by RFC 3986 algorithm. |
354 | | /// |
355 | | /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. |
356 | 0 | pub(crate) fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { |
357 | 0 | if self.authority.is_some() { |
358 | 0 | return Ok(()); |
359 | 0 | } |
360 | 0 | match self.path { |
361 | 0 | Path::Done(_) => Ok(()), |
362 | 0 | Path::NeedsProcessing(path) => path.ensure_rfc3986_normalizable_with_authority_absent(), |
363 | | } |
364 | 0 | } |
365 | | } |
366 | | |
367 | | /// Writable as a normalized IRI. |
368 | | /// |
369 | | /// Note that this implicitly apply serialization rule defined by WHATWG URL |
370 | | /// Standard (to handle normalization impossible by RFC 3986) because `Display` |
371 | | /// should not fail by reasons other than backend I/O failure. If you make the |
372 | | /// normalization fail in such cases, check if the path starts with `/./`. |
373 | | /// When the normalization succeeds by RFC 3986 algorithm, the path never starts |
374 | | /// with `/./`. |
375 | | struct NormalizedInner<'a, S> { |
376 | | /// Spec-agnostic normalization input. |
377 | | input: NormalizationInput<'a>, |
378 | | /// Spec. |
379 | | _spec: PhantomData<fn() -> S>, |
380 | | } |
381 | | |
382 | | impl<S: Spec> fmt::Debug for NormalizedInner<'_, S> { |
383 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
384 | 0 | f.debug_struct("Normalized") |
385 | 0 | .field("input", &self.input) |
386 | 0 | .finish() |
387 | 0 | } |
388 | | } |
389 | | |
390 | | impl<'a, S: Spec> NormalizedInner<'a, S> { |
391 | | /// Creates a new `Normalized` object from the given input. |
392 | | #[inline] |
393 | | #[must_use] |
394 | 0 | fn from_input(input: NormalizationInput<'a>) -> Self { |
395 | 0 | Self { |
396 | 0 | input, |
397 | 0 | _spec: PhantomData, |
398 | 0 | } |
399 | 0 | } Unexecuted instantiation: <iri_string::normalize::NormalizedInner<iri_string::spec::UriSpec>>::from_input Unexecuted instantiation: <iri_string::normalize::NormalizedInner<_>>::from_input |
400 | | } |
401 | | |
402 | | impl<S: Spec> fmt::Display for NormalizedInner<'_, S> { |
403 | | #[inline] |
404 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
405 | | // Write the scheme. |
406 | 0 | if self.input.op.mode.case_pct_normalization() { |
407 | 0 | normalize_scheme(f, self.input.scheme)?; |
408 | | } else { |
409 | 0 | f.write_str(self.input.scheme)?; |
410 | | } |
411 | 0 | f.write_str(":")?; |
412 | | |
413 | | // Write the authority if available. |
414 | 0 | if let Some(authority) = self.input.authority { |
415 | 0 | f.write_str("//")?; |
416 | 0 | if self.input.op.mode.case_pct_normalization() { |
417 | 0 | normalize_authority::<S>(f, authority)?; |
418 | | } else { |
419 | | // No case/pct normalization. |
420 | 0 | f.write_str(authority)?; |
421 | | } |
422 | 0 | } |
423 | | |
424 | | // Process and write the path. |
425 | 0 | match self.input.path { |
426 | 0 | Path::Done(s) => { |
427 | 0 | if self.input.op.mode.case_pct_normalization() { |
428 | | // Normalize the path. |
429 | 0 | PathToNormalize::from_single_path(s).fmt_write_normalize::<S, _>( |
430 | 0 | f, |
431 | 0 | self.input.op, |
432 | 0 | self.input.authority.is_some(), |
433 | 0 | )? |
434 | | } else { |
435 | | // No normalization. |
436 | 0 | f.write_str(s)? |
437 | | } |
438 | | } |
439 | 0 | Path::NeedsProcessing(path) => { |
440 | 0 | path.fmt_write_normalize::<S, _>(f, self.input.op, self.input.authority.is_some())? |
441 | | } |
442 | | } |
443 | | |
444 | | // Write the query if available. |
445 | 0 | if let Some(query) = self.input.query { |
446 | 0 | f.write_char('?')?; |
447 | 0 | if self.input.op.mode.case_pct_normalization() { |
448 | 0 | normalize_query::<S>(f, query)?; |
449 | | } else { |
450 | 0 | f.write_str(query)?; |
451 | | } |
452 | 0 | } |
453 | | |
454 | | // Write the fragment if available. |
455 | 0 | if let Some(fragment) = self.input.fragment { |
456 | 0 | f.write_char('#')?; |
457 | 0 | if self.input.op.mode.case_pct_normalization() { |
458 | 0 | normalize_fragment::<S>(f, fragment)?; |
459 | | } else { |
460 | 0 | f.write_str(fragment)?; |
461 | | } |
462 | 0 | } |
463 | | |
464 | 0 | Ok(()) |
465 | 0 | } Unexecuted instantiation: <iri_string::normalize::NormalizedInner<iri_string::spec::UriSpec> as core::fmt::Display>::fmt Unexecuted instantiation: <iri_string::normalize::NormalizedInner<_> as core::fmt::Display>::fmt |
466 | | } |
467 | | |
468 | | /// Writes the normalized scheme. |
469 | 0 | pub(crate) fn normalize_scheme(f: &mut fmt::Formatter<'_>, scheme: &str) -> fmt::Result { |
470 | | // Apply case normalization. |
471 | | // |
472 | | // > namely, that the scheme and US-ASCII only host are case |
473 | | // > insensitive and therefore should be normalized to lowercase. |
474 | | // > |
475 | | // > --- <https://datatracker.ietf.org/doc/html/rfc3987#section-5.3.2.1>. |
476 | | // |
477 | | // Note that `scheme` consists of only ASCII characters and contains |
478 | | // no percent-encoded characters. |
479 | 0 | scheme |
480 | 0 | .chars() |
481 | 0 | .map(|c| c.to_ascii_lowercase()) |
482 | 0 | .try_for_each(|c| f.write_char(c)) |
483 | 0 | } |
484 | | |
485 | | /// Writes the normalized authority. |
486 | 0 | fn normalize_authority<S: Spec>(f: &mut fmt::Formatter<'_>, authority: &str) -> fmt::Result { |
487 | 0 | let host_port = match rfind_split_hole(authority, b'@') { |
488 | 0 | Some((userinfo, host_port)) => { |
489 | | // Don't lowercase `userinfo` even if it is ASCII only. `userinfo` |
490 | | // is not a part of `host`. |
491 | 0 | PctCaseNormalized::<S>::new(userinfo).fmt(f)?; |
492 | 0 | f.write_char('@')?; |
493 | 0 | host_port |
494 | | } |
495 | 0 | None => authority, |
496 | | }; |
497 | 0 | normalize_host_port::<S>(f, host_port) |
498 | 0 | } Unexecuted instantiation: iri_string::normalize::normalize_authority::<iri_string::spec::UriSpec> Unexecuted instantiation: iri_string::normalize::normalize_authority::<_> |
499 | | |
500 | | /// Writes the normalized host and port. |
501 | 0 | pub(crate) fn normalize_host_port<S: Spec>( |
502 | 0 | f: &mut fmt::Formatter<'_>, |
503 | 0 | host_port: &str, |
504 | 0 | ) -> fmt::Result { |
505 | | // If the suffix is a colon, it is a delimiter between the host and empty |
506 | | // port. An empty port should be removed during normalization (see RFC 3986 |
507 | | // section 3.2.3), so strip it. |
508 | | // |
509 | | // > URI producers and normalizers should omit the port component and its |
510 | | // > ":" delimiter if port is empty or if its value would be the same as |
511 | | // > that of the scheme's default. |
512 | | // > |
513 | | // > --- [RFC 3986 section 3.2.3. Port](https://www.rfc-editor.org/rfc/rfc3986.html#section-3.2.3) |
514 | 0 | let host_port = host_port.strip_suffix(':').unwrap_or(host_port); |
515 | | |
516 | | // Apply case normalization and percent-encoding normalization to `host`. |
517 | | // Optional `":" port` part only consists of an ASCII colon and ASCII |
518 | | // digits, so this won't affect to the test result. |
519 | 0 | if is_ascii_only_host(host_port) { |
520 | | // If the host is ASCII characters only, make plain alphabets lower case. |
521 | 0 | NormalizedAsciiOnlyHost::new(host_port).fmt(f) |
522 | | } else { |
523 | 0 | PctCaseNormalized::<S>::new(host_port).fmt(f) |
524 | | } |
525 | 0 | } Unexecuted instantiation: iri_string::normalize::normalize_host_port::<iri_string::spec::UriSpec> Unexecuted instantiation: iri_string::normalize::normalize_host_port::<_> |
526 | | |
527 | | /// Writes the normalized query without the '?' prefix. |
528 | 0 | pub(crate) fn normalize_query<S: Spec>(f: &mut fmt::Formatter<'_>, query: &str) -> fmt::Result { |
529 | | // Apply percent-encoding normalization. |
530 | 0 | PctCaseNormalized::<S>::new(query).fmt(f) |
531 | 0 | } Unexecuted instantiation: iri_string::normalize::normalize_query::<iri_string::spec::UriSpec> Unexecuted instantiation: iri_string::normalize::normalize_query::<_> |
532 | | |
533 | | /// Writes the normalized query without the '#' prefix. |
534 | 0 | pub(crate) fn normalize_fragment<S: Spec>( |
535 | 0 | f: &mut fmt::Formatter<'_>, |
536 | 0 | fragment: &str, |
537 | 0 | ) -> fmt::Result { |
538 | | // Apply percent-encoding normalization. |
539 | 0 | PctCaseNormalized::<S>::new(fragment).fmt(f) |
540 | 0 | } Unexecuted instantiation: iri_string::normalize::normalize_fragment::<iri_string::spec::UriSpec> Unexecuted instantiation: iri_string::normalize::normalize_fragment::<_> |
541 | | |
542 | | /// Normalized OR resolved IRI. |
543 | | /// |
544 | | /// Resolved IRI can be represented by this type. In that case, the result might |
545 | | /// not be normalized. If you want the IRI resolution result to be normalized, |
546 | | /// use [`enable_normalization`][`Self::enable_normalization`] method. |
547 | | /// |
548 | | /// [`Display`]: `core::fmt::Display` |
549 | | pub struct Normalized<'a, T: ?Sized> { |
550 | | /// Spec-agnostic normalization input. |
551 | | input: NormalizationInput<'a>, |
552 | | /// Expected result type. |
553 | | _ty_str: PhantomData<fn() -> T>, |
554 | | } |
555 | | |
556 | | impl<T: ?Sized> fmt::Debug for Normalized<'_, T> { |
557 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
558 | 0 | f.debug_struct("Normalized") |
559 | 0 | .field("input", &self.input) |
560 | 0 | .finish() |
561 | 0 | } |
562 | | } |
563 | | |
564 | | impl<'a, T: ?Sized> Normalized<'a, T> { |
565 | | /// Creates a new `Normalized` object from the given input. |
566 | | #[inline] |
567 | | #[must_use] |
568 | 0 | pub(crate) fn from_input(input: NormalizationInput<'a>) -> Self { |
569 | 0 | Self { |
570 | 0 | input, |
571 | 0 | _ty_str: PhantomData, |
572 | 0 | } |
573 | 0 | } Unexecuted instantiation: <iri_string::normalize::Normalized<iri_string::types::generic::normal::RiStr<iri_string::spec::UriSpec>>>::from_input Unexecuted instantiation: <iri_string::normalize::Normalized<_>>::from_input |
574 | | |
575 | | /// Enables the normalization. |
576 | | /// |
577 | | /// This lets the normalizer apply the case normalization, percent-encoding |
578 | | /// normalization, and dot segments removal. |
579 | | #[inline] |
580 | 0 | pub fn enable_normalization(&mut self) { |
581 | 0 | self.input.op.mode = NormalizationMode::Default; |
582 | 0 | } |
583 | | |
584 | | /// Enables the normalization that preserve relative path under some condition. |
585 | | /// |
586 | | /// Note that this normalization algorithm is not compatible with RFC 3986 |
587 | | /// algorithm for some inputs. |
588 | | /// |
589 | | /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] |
590 | | /// for detail. |
591 | | #[inline] |
592 | 0 | pub fn enable_normalization_preserving_authorityless_relative_path(&mut self) { |
593 | 0 | self.input.op.mode = NormalizationMode::PreserveAuthoritylessRelativePath; |
594 | 0 | } |
595 | | |
596 | | /// Returns `Self` with normalization enabled. |
597 | | #[inline] |
598 | | #[must_use] |
599 | 0 | pub fn and_normalize(mut self) -> Self { |
600 | 0 | self.enable_normalization(); |
601 | 0 | self |
602 | 0 | } |
603 | | |
604 | | /// Returns `Self` with special normalization enabled. |
605 | | /// |
606 | | /// Note that this normalization algorithm is not compatible with RFC 3986 |
607 | | /// algorithm for some inputs. |
608 | | /// |
609 | | /// See [`RiStr::normalize_but_preserve_authorityless_relative_path()`] |
610 | | /// for detail. |
611 | | #[inline] |
612 | | #[must_use] |
613 | 0 | pub fn and_normalize_but_preserve_authorityless_relative_path(mut self) -> Self { |
614 | 0 | self.enable_normalization_preserving_authorityless_relative_path(); |
615 | 0 | self |
616 | 0 | } |
617 | | |
618 | | /// Checks if the path is normalizable by RFC 3986 algorithm. |
619 | | /// |
620 | | /// Returns `Ok(())` when normalizable, returns `Err(_)` if not. |
621 | | #[inline] |
622 | 0 | pub fn ensure_rfc3986_normalizable(&self) -> Result<(), Error> { |
623 | 0 | self.input.ensure_rfc3986_normalizable() |
624 | 0 | } |
625 | | } |
626 | | |
627 | | impl<S: Spec> fmt::Display for Normalized<'_, RiStr<S>> { |
628 | | #[inline] |
629 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
630 | 0 | NormalizedInner::<S>::from_input(self.input).fmt(f) |
631 | 0 | } Unexecuted instantiation: <iri_string::normalize::Normalized<iri_string::types::generic::normal::RiStr<iri_string::spec::UriSpec>> as core::fmt::Display>::fmt Unexecuted instantiation: <iri_string::normalize::Normalized<iri_string::types::generic::normal::RiStr<_>> as core::fmt::Display>::fmt |
632 | | } |
633 | | |
634 | | impl<S: Spec> fmt::Display for Normalized<'_, RiAbsoluteStr<S>> { |
635 | | #[inline] |
636 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
637 | 0 | NormalizedInner::<S>::from_input(self.input).fmt(f) |
638 | 0 | } |
639 | | } |
640 | | |
641 | | #[cfg(feature = "alloc")] |
642 | | impl<S: Spec> ToDedicatedString for Normalized<'_, RiStr<S>> { |
643 | | type Target = RiString<S>; |
644 | | |
645 | 0 | fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { |
646 | 0 | let s = self.try_to_string()?; |
647 | 0 | Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) |
648 | 0 | } |
649 | | } |
650 | | |
651 | | #[cfg(feature = "alloc")] |
652 | | impl<S: Spec> From<Normalized<'_, RiStr<S>>> for RiString<S> { |
653 | | #[inline] |
654 | 0 | fn from(v: Normalized<'_, RiStr<S>>) -> Self { |
655 | 0 | v.to_dedicated_string() |
656 | 0 | } |
657 | | } |
658 | | |
659 | | #[cfg(feature = "alloc")] |
660 | | impl<S: Spec> From<&Normalized<'_, RiStr<S>>> for RiString<S> { |
661 | | #[inline] |
662 | 0 | fn from(v: &Normalized<'_, RiStr<S>>) -> Self { |
663 | 0 | v.to_dedicated_string() |
664 | 0 | } |
665 | | } |
666 | | |
667 | | #[cfg(feature = "alloc")] |
668 | | impl<S: Spec> ToDedicatedString for Normalized<'_, RiAbsoluteStr<S>> { |
669 | | type Target = RiAbsoluteString<S>; |
670 | | |
671 | 0 | fn try_to_dedicated_string(&self) -> Result<Self::Target, TryReserveError> { |
672 | 0 | let s = self.try_to_string()?; |
673 | 0 | Ok(TryFrom::try_from(s).expect("[validity] the normalization result must be a valid IRI")) |
674 | 0 | } |
675 | | } |
676 | | |
677 | | #[cfg(feature = "alloc")] |
678 | | impl<S: Spec> From<Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { |
679 | | #[inline] |
680 | 0 | fn from(v: Normalized<'_, RiAbsoluteStr<S>>) -> Self { |
681 | 0 | v.to_dedicated_string() |
682 | 0 | } |
683 | | } |
684 | | |
685 | | #[cfg(feature = "alloc")] |
686 | | impl<S: Spec> From<&Normalized<'_, RiAbsoluteStr<S>>> for RiAbsoluteString<S> { |
687 | | #[inline] |
688 | 0 | fn from(v: &Normalized<'_, RiAbsoluteStr<S>>) -> Self { |
689 | 0 | v.to_dedicated_string() |
690 | 0 | } |
691 | | } |