/src/librsvg/rsvg/src/url_resolver.rs
Line | Count | Source |
1 | | //! Determine which URLs are allowed for loading. |
2 | | |
3 | | use std::fmt; |
4 | | use std::ops::Deref; |
5 | | use url::Url; |
6 | | |
7 | | use crate::error::AllowedUrlError; |
8 | | |
9 | | /// Decides which URLs are allowed to be loaded. |
10 | | /// |
11 | | /// Currently only contains the base URL. |
12 | | /// |
13 | | /// The plan is to add: |
14 | | /// base_only: Only allow to load content from the same base URL. By default |
15 | | // this restriction is enabled and requires to provide base_url. |
16 | | /// include_xml: Allows to use xi:include with XML. Enabled by default. |
17 | | /// include_text: Allows to use xi:include with text. Enabled by default. |
18 | | /// local_only: Only allow to load content from the local filesystem. |
19 | | /// Enabled by default. |
20 | | #[derive(Clone)] |
21 | | pub struct UrlResolver { |
22 | | /// Base URL; all relative references will be resolved with respect to this. |
23 | | pub base_url: Option<Url>, |
24 | | } |
25 | | |
26 | | impl UrlResolver { |
27 | | /// Creates a `UrlResolver` with defaults, and sets the `base_url`. |
28 | 1.36k | pub fn new(base_url: Option<Url>) -> Self { |
29 | 1.36k | UrlResolver { base_url } |
30 | 1.36k | } |
31 | | |
32 | | /// Decides which URLs are allowed to be loaded based on the presence of a base URL. |
33 | | /// |
34 | | /// This function implements the policy described in "Security and locations of |
35 | | /// referenced files" in the [crate |
36 | | /// documentation](index.html#security-and-locations-of-referenced-files). |
37 | 20.9k | pub fn resolve_href(&self, href: &str) -> Result<AllowedUrl, AllowedUrlError> { |
38 | 20.9k | let url = Url::options() |
39 | 20.9k | .base_url(self.base_url.as_ref()) |
40 | 20.9k | .parse(href) |
41 | 20.9k | .map_err(AllowedUrlError::UrlParseError)?; |
42 | | |
43 | | // Allow loads of data: from any location |
44 | 15.0k | if url.scheme() == "data" { |
45 | 12.0k | return Ok(AllowedUrl(url)); |
46 | 2.98k | } |
47 | | |
48 | | // Queries are not allowed. |
49 | 2.98k | if url.query().is_some() { |
50 | 81 | return Err(AllowedUrlError::NoQueriesAllowed); |
51 | 2.90k | } |
52 | | |
53 | | // Fragment identifiers are not allowed. They should have been stripped |
54 | | // upstream, by NodeId. |
55 | 2.90k | if url.fragment().is_some() { |
56 | 249 | return Err(AllowedUrlError::NoFragmentIdentifierAllowed); |
57 | 2.65k | } |
58 | | |
59 | | // All other sources require a base url |
60 | 2.65k | if self.base_url.is_none() { |
61 | 2.65k | return Err(AllowedUrlError::BaseRequired); |
62 | 0 | } |
63 | | |
64 | 0 | let base_url = self.base_url.as_ref().unwrap(); |
65 | | |
66 | | // Deny loads from differing URI schemes |
67 | 0 | if url.scheme() != base_url.scheme() { |
68 | 0 | return Err(AllowedUrlError::DifferentUriSchemes); |
69 | 0 | } |
70 | | |
71 | | // resource: is allowed to load anything from other resources |
72 | 0 | if url.scheme() == "resource" { |
73 | 0 | return Ok(AllowedUrl(url)); |
74 | 0 | } |
75 | | |
76 | | // Non-file: isn't allowed to load anything |
77 | 0 | if url.scheme() != "file" { |
78 | 0 | return Err(AllowedUrlError::DisallowedScheme); |
79 | 0 | } |
80 | | |
81 | | // The rest of this function assumes file: URLs; guard against |
82 | | // incorrect refactoring. |
83 | 0 | assert!(url.scheme() == "file"); |
84 | 0 | assert!(base_url.scheme() == "file"); |
85 | | |
86 | | // Hostnames are not allowed. The URL crate rejects them on Unix; but on Windows |
87 | | // they are allowed. In that case, reject them here. |
88 | 0 | if url.host().is_some() { |
89 | 0 | return Err(AllowedUrlError::NoHostAllowed); |
90 | 0 | } |
91 | | |
92 | | // If we have a base_uri of "file:///foo/bar.svg", and resolve an href of ".", |
93 | | // Url.parse() will give us "file:///foo/". We don't want that, so check |
94 | | // if the last path segment is empty - it will not be empty for a normal file. |
95 | | |
96 | 0 | if let Some(mut segments) = url.path_segments() { |
97 | 0 | if segments |
98 | 0 | .next_back() |
99 | 0 | .expect("URL path segments always contain at last 1 element") |
100 | 0 | .is_empty() |
101 | | { |
102 | 0 | return Err(AllowedUrlError::NotSiblingOrChildOfBaseFile); |
103 | 0 | } |
104 | | } else { |
105 | 0 | unreachable!("the file: URL cannot have an empty path"); |
106 | | } |
107 | | |
108 | | // We have two file: URIs. Now canonicalize them (remove .. and symlinks, etc.) |
109 | | // and see if the directories match |
110 | | |
111 | 0 | let url_path = url |
112 | 0 | .to_file_path() |
113 | 0 | .map_err(|_| AllowedUrlError::InvalidPathInUrl)?; |
114 | 0 | let base_path = base_url |
115 | 0 | .to_file_path() |
116 | 0 | .map_err(|_| AllowedUrlError::InvalidPathInBaseUrl)?; |
117 | | |
118 | 0 | let base_parent = base_path.parent(); |
119 | 0 | if base_parent.is_none() { |
120 | 0 | return Err(AllowedUrlError::BaseIsRoot); |
121 | 0 | } |
122 | | |
123 | 0 | let base_parent = base_parent.unwrap(); |
124 | | |
125 | 0 | let path_canon = url_path |
126 | 0 | .canonicalize() |
127 | 0 | .map_err(|_| AllowedUrlError::CanonicalizationError)?; |
128 | 0 | let parent_canon = base_parent |
129 | 0 | .canonicalize() |
130 | 0 | .map_err(|_| AllowedUrlError::CanonicalizationError)?; |
131 | | |
132 | 0 | if path_canon.starts_with(parent_canon) { |
133 | | // Finally, convert the canonicalized path back to a URL. |
134 | 0 | let path_to_url = Url::from_file_path(path_canon).unwrap(); |
135 | 0 | Ok(AllowedUrl(path_to_url)) |
136 | | } else { |
137 | 0 | Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) |
138 | | } |
139 | 20.9k | } |
140 | | } |
141 | | |
142 | | /// Wrapper for URLs which are allowed to be loaded |
143 | | /// |
144 | | /// SVG files can reference other files (PNG/JPEG images, other SVGs, |
145 | | /// CSS files, etc.). This object is constructed by checking whether |
146 | | /// a specified `href` (a possibly-relative filename, for example) |
147 | | /// should be allowed to be loaded, given the base URL of the SVG |
148 | | /// being loaded. |
149 | | #[derive(Debug, Clone, PartialEq, Eq, Hash)] |
150 | | pub struct AllowedUrl(Url); |
151 | | |
152 | | impl Deref for AllowedUrl { |
153 | | type Target = Url; |
154 | | |
155 | 8.34k | fn deref(&self) -> &Url { |
156 | 8.34k | &self.0 |
157 | 8.34k | } |
158 | | } |
159 | | |
160 | | impl fmt::Display for AllowedUrl { |
161 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
162 | 0 | self.0.fmt(f) |
163 | 0 | } |
164 | | } |
165 | | |
166 | | #[cfg(test)] |
167 | | mod tests { |
168 | | use super::*; |
169 | | |
170 | | use std::path::PathBuf; |
171 | | |
172 | | #[test] |
173 | | fn disallows_relative_file_with_no_base_file() { |
174 | | let url_resolver = UrlResolver::new(None); |
175 | | assert!(matches!( |
176 | | url_resolver.resolve_href("foo.svg"), |
177 | | Err(AllowedUrlError::UrlParseError( |
178 | | url::ParseError::RelativeUrlWithoutBase |
179 | | )) |
180 | | )); |
181 | | } |
182 | | |
183 | | #[test] |
184 | | fn disallows_different_schemes() { |
185 | | let url_resolver = UrlResolver::new(Some( |
186 | | Url::parse("http://example.com/malicious.svg").unwrap(), |
187 | | )); |
188 | | assert!(matches!( |
189 | | url_resolver.resolve_href("file:///etc/passwd"), |
190 | | Err(AllowedUrlError::DifferentUriSchemes) |
191 | | )); |
192 | | } |
193 | | |
194 | | fn make_file_uri(p: &str) -> String { |
195 | | if cfg!(windows) { |
196 | | format!("file:///c:{}", p) |
197 | | } else { |
198 | | format!("file://{}", p) |
199 | | } |
200 | | } |
201 | | |
202 | | #[test] |
203 | | fn disallows_base_is_root() { |
204 | | let url_resolver = UrlResolver::new(Some(Url::parse(&make_file_uri("/")).unwrap())); |
205 | | assert!(matches!( |
206 | | url_resolver.resolve_href("foo.svg"), |
207 | | Err(AllowedUrlError::BaseIsRoot) |
208 | | )); |
209 | | } |
210 | | |
211 | | #[test] |
212 | | fn disallows_non_file_scheme() { |
213 | | let url_resolver = UrlResolver::new(Some(Url::parse("http://foo.bar/baz.svg").unwrap())); |
214 | | assert!(matches!( |
215 | | url_resolver.resolve_href("foo.svg"), |
216 | | Err(AllowedUrlError::DisallowedScheme) |
217 | | )); |
218 | | } |
219 | | |
220 | | #[test] |
221 | | fn disallows_hostname_in_base_url() { |
222 | | let base_url = Url::parse("file://www.example.com/some/file.svg").unwrap(); |
223 | | let url_resolver = UrlResolver::new(Some(base_url)); |
224 | | assert!(dbg!(url_resolver.resolve_href("bar/foo.svg")).is_err()); |
225 | | } |
226 | | |
227 | | #[test] |
228 | | fn disallows_hostname_in_href_no_base_url() { |
229 | | let url_resolver = UrlResolver::new(None); |
230 | | assert!(dbg!(url_resolver.resolve_href("file://www.example.com/foo.svg")).is_err()); |
231 | | } |
232 | | |
233 | | #[test] |
234 | | fn disallows_hostname_in_href_with_base_url() { |
235 | | let base_url = Url::parse("file:///foo/bar.svg").unwrap(); |
236 | | let url_resolver = UrlResolver::new(Some(base_url)); |
237 | | assert!(dbg!(url_resolver.resolve_href("file://www.example.com/foo.svg")).is_err()); |
238 | | } |
239 | | |
240 | | #[test] |
241 | | fn disallows_unc_path() { |
242 | | let base_url = Url::parse("file:///foo/bar.svg").unwrap(); |
243 | | let url_resolver = UrlResolver::new(Some(base_url)); |
244 | | assert!(dbg!(url_resolver.resolve_href("//server/share_name/foo/bar.svg")).is_err()); |
245 | | } |
246 | | |
247 | | #[test] |
248 | | fn allows_data_url_with_no_base_file() { |
249 | | let url_resolver = UrlResolver::new(None); |
250 | | assert_eq!( |
251 | | url_resolver |
252 | | .resolve_href("data:image/jpeg;base64,xxyyzz") |
253 | | .unwrap() |
254 | | .as_ref(), |
255 | | "data:image/jpeg;base64,xxyyzz", |
256 | | ); |
257 | | } |
258 | | |
259 | | fn url_from_test_fixtures(filename_relative_to_librsvg_srcdir: &str) -> Url { |
260 | | let path = PathBuf::from(filename_relative_to_librsvg_srcdir); |
261 | | let absolute = path |
262 | | .canonicalize() |
263 | | .expect("files from test fixtures are supposed to canonicalize"); |
264 | | Url::from_file_path(absolute).unwrap() |
265 | | } |
266 | | |
267 | | #[test] |
268 | | fn allows_relative() { |
269 | | let base_url = url_from_test_fixtures("tests/fixtures/loading/bar.svg"); |
270 | | let url_resolver = UrlResolver::new(Some(base_url)); |
271 | | |
272 | | let resolved = url_resolver.resolve_href("foo.svg").unwrap(); |
273 | | let resolved_str = resolved.as_str(); |
274 | | assert!(resolved_str.ends_with("/loading/foo.svg")); |
275 | | } |
276 | | |
277 | | #[test] |
278 | | fn allows_sibling() { |
279 | | let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( |
280 | | "tests/fixtures/loading/bar.svg", |
281 | | ))); |
282 | | let resolved = url_resolver |
283 | | .resolve_href(url_from_test_fixtures("tests/fixtures/loading/foo.svg").as_str()) |
284 | | .unwrap(); |
285 | | |
286 | | let resolved_str = resolved.as_str(); |
287 | | assert!(resolved_str.ends_with("/loading/foo.svg")); |
288 | | } |
289 | | |
290 | | #[test] |
291 | | fn allows_child_of_sibling() { |
292 | | let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( |
293 | | "tests/fixtures/loading/bar.svg", |
294 | | ))); |
295 | | let resolved = url_resolver |
296 | | .resolve_href(url_from_test_fixtures("tests/fixtures/loading/subdir/baz.svg").as_str()) |
297 | | .unwrap(); |
298 | | |
299 | | let resolved_str = resolved.as_str(); |
300 | | assert!(resolved_str.ends_with("/loading/subdir/baz.svg")); |
301 | | } |
302 | | |
303 | | // Ignore on Windows since we test for /etc/passwd |
304 | | #[cfg(unix)] |
305 | | #[test] |
306 | | fn disallows_non_sibling() { |
307 | | let url_resolver = UrlResolver::new(Some(url_from_test_fixtures( |
308 | | "tests/fixtures/loading/bar.svg", |
309 | | ))); |
310 | | assert!(matches!( |
311 | | url_resolver.resolve_href(&make_file_uri("/etc/passwd")), |
312 | | Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) |
313 | | )); |
314 | | } |
315 | | |
316 | | #[test] |
317 | | fn disallows_queries() { |
318 | | let url_resolver = UrlResolver::new(Some( |
319 | | Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), |
320 | | )); |
321 | | assert!(matches!( |
322 | | url_resolver.resolve_href(".?../../../../../../../../../../etc/passwd"), |
323 | | Err(AllowedUrlError::NoQueriesAllowed) |
324 | | )); |
325 | | } |
326 | | |
327 | | #[test] |
328 | | fn disallows_weird_relative_uris() { |
329 | | let url_resolver = UrlResolver::new(Some( |
330 | | Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), |
331 | | )); |
332 | | |
333 | | assert!( |
334 | | url_resolver |
335 | | .resolve_href(".@../../../../../../../../../../etc/passwd") |
336 | | .is_err() |
337 | | ); |
338 | | assert!( |
339 | | url_resolver |
340 | | .resolve_href(".$../../../../../../../../../../etc/passwd") |
341 | | .is_err() |
342 | | ); |
343 | | assert!( |
344 | | url_resolver |
345 | | .resolve_href(".%../../../../../../../../../../etc/passwd") |
346 | | .is_err() |
347 | | ); |
348 | | assert!( |
349 | | url_resolver |
350 | | .resolve_href(".*../../../../../../../../../../etc/passwd") |
351 | | .is_err() |
352 | | ); |
353 | | assert!( |
354 | | url_resolver |
355 | | .resolve_href("~/../../../../../../../../../../etc/passwd") |
356 | | .is_err() |
357 | | ); |
358 | | } |
359 | | |
360 | | #[test] |
361 | | fn disallows_dot_sibling() { |
362 | | let url_resolver = UrlResolver::new(Some( |
363 | | Url::parse(&make_file_uri("/example/bar.svg")).unwrap(), |
364 | | )); |
365 | | |
366 | | assert!(matches!( |
367 | | url_resolver.resolve_href("."), |
368 | | Err(AllowedUrlError::NotSiblingOrChildOfBaseFile) |
369 | | )); |
370 | | assert!(matches!( |
371 | | url_resolver.resolve_href(".#../../../../../../../../../../etc/passwd"), |
372 | | Err(AllowedUrlError::NoFragmentIdentifierAllowed) |
373 | | )); |
374 | | } |
375 | | |
376 | | #[test] |
377 | | fn disallows_fragment() { |
378 | | // UrlResolver::resolve_href() explicitly disallows fragment identifiers. |
379 | | // This is because they should have been stripped before calling that function, |
380 | | // by NodeId or the Iri machinery. |
381 | | let url_resolver = |
382 | | UrlResolver::new(Some(Url::parse("https://example.com/foo.svg").unwrap())); |
383 | | |
384 | | assert!(matches!( |
385 | | url_resolver.resolve_href("bar.svg#fragment"), |
386 | | Err(AllowedUrlError::NoFragmentIdentifierAllowed) |
387 | | )); |
388 | | } |
389 | | |
390 | | #[cfg(windows)] |
391 | | #[test] |
392 | | fn invalid_url_from_test_suite() { |
393 | | // This is required for Url to panic. |
394 | | let resolver = |
395 | | UrlResolver::new(Some(Url::parse("file:///c:/foo.svg").expect("initial url"))); |
396 | | // With this, it doesn't panic: |
397 | | // let resolver = UrlResolver::new(None); |
398 | | |
399 | | // The following panics, when using a base URL |
400 | | // match resolver.resolve_href("file://invalid.css") { |
401 | | // so, use a less problematic case, hopefully |
402 | | match resolver.resolve_href("file://") { |
403 | | Ok(_) => println!("yay!"), |
404 | | Err(e) => println!("err: {}", e), |
405 | | } |
406 | | } |
407 | | } |