/rust/registry/src/index.crates.io-1949cf8c6b5b557f/ammonia-4.1.2/src/lib.rs
Line | Count | Source |
1 | | // Copyright (C) Michael Howell and others |
2 | | // this library is released under the same terms as Rust itself. |
3 | | |
4 | | #![deny(unsafe_code)] |
5 | | #![deny(missing_docs)] |
6 | | |
7 | | //! Ammonia is a whitelist-based HTML sanitization library. It is designed to |
8 | | //! prevent cross-site scripting, layout breaking, and clickjacking caused |
9 | | //! by untrusted user-provided HTML being mixed into a larger web page. |
10 | | //! |
11 | | //! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do, |
12 | | //! so it is extremely resilient to syntactic obfuscation. |
13 | | //! |
14 | | //! Ammonia parses its input exactly according to the HTML5 specification; |
15 | | //! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©. |
16 | | //! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark]. |
17 | | //! |
18 | | //! # Examples |
19 | | //! |
20 | | //! ``` |
21 | | //! let result = ammonia::clean( |
22 | | //! "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>" |
23 | | //! ); |
24 | | //! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>"); |
25 | | //! ``` |
26 | | //! |
27 | | //! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo" |
28 | | //! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser" |
29 | | |
30 | | #[cfg(ammonia_unstable)] |
31 | | pub mod rcdom; |
32 | | |
33 | | #[cfg(not(ammonia_unstable))] |
34 | | mod rcdom; |
35 | | |
36 | | mod style; |
37 | | |
38 | | use html5ever::interface::Attribute; |
39 | | use html5ever::serialize::{serialize, SerializeOpts}; |
40 | | use html5ever::tree_builder::{NodeOrText, TreeSink}; |
41 | | use html5ever::{driver as html, local_name, ns, QualName}; |
42 | | use maplit::{hashmap, hashset}; |
43 | | use std::sync::LazyLock; |
44 | | use rcdom::{Handle, NodeData, RcDom, SerializableHandle}; |
45 | | use std::borrow::{Borrow, Cow}; |
46 | | use std::cell::Cell; |
47 | | use std::cmp::max; |
48 | | use std::collections::{HashMap, HashSet}; |
49 | | use std::fmt::{self, Display}; |
50 | | use std::io; |
51 | | use std::iter::IntoIterator as IntoIter; |
52 | | use std::mem; |
53 | | use std::rc::Rc; |
54 | | use std::str::FromStr; |
55 | | use tendril::stream::TendrilSink; |
56 | | use tendril::StrTendril; |
57 | | use tendril::{format_tendril, ByteTendril}; |
58 | | pub use url::Url; |
59 | | |
60 | | use html5ever::buffer_queue::BufferQueue; |
61 | | use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer}; |
62 | | pub use url; |
63 | | |
64 | | static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default); |
65 | | |
66 | | /// Clean HTML with a conservative set of defaults. |
67 | | /// |
68 | | /// * [tags](struct.Builder.html#defaults) |
69 | | /// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1) |
70 | | /// * [attributes on specific tags](struct.Builder.html#defaults-2) |
71 | | /// * [attributes on all tags](struct.Builder.html#defaults-6) |
72 | | /// * [url schemes](struct.Builder.html#defaults-7) |
73 | | /// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8) |
74 | | /// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9) |
75 | | /// * all `class=""` settings are blocked by default |
76 | | /// * comments are stripped by default |
77 | | /// * no generic attribute prefixes are turned on by default |
78 | | /// * no specific tag-attribute-value settings are configured by default |
79 | | /// |
80 | | /// [opener]: https://mathiasbynens.github.io/rel-noopener/ |
81 | | /// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer |
82 | | /// |
83 | | /// # Examples |
84 | | /// |
85 | | /// assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS") |
86 | 0 | pub fn clean(src: &str) -> String { |
87 | 0 | AMMONIA.clean(src).to_string() |
88 | 0 | } |
89 | | |
90 | | /// Turn an arbitrary string into unformatted HTML. |
91 | | /// |
92 | | /// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`. |
93 | | /// It is as strict as possible, encoding every character that has special meaning to the |
94 | | /// HTML parser. |
95 | | /// |
96 | | /// # Warnings |
97 | | /// |
98 | | /// This function cannot be used to package strings into a `<script>` or `<style>` tag; |
99 | | /// you need a JavaScript or CSS escaper to do that. |
100 | | /// |
101 | | /// // DO NOT DO THIS |
102 | | /// # use ammonia::clean_text; |
103 | | /// let untrusted = "Robert\"); abuse();//"; |
104 | | /// let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted)); |
105 | | /// |
106 | | /// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded. |
107 | | /// If you want to build an editor that works the way most folks expect them to, you should put a |
108 | | /// newline at the beginning of the tag, like this: |
109 | | /// |
110 | | /// # use ammonia::{Builder, clean_text}; |
111 | | /// let untrusted = "\n\nhi!"; |
112 | | /// let mut b = Builder::new(); |
113 | | /// b.add_tags(&["textarea"]); |
114 | | /// // This is the bad version |
115 | | /// // The user put two newlines at the beginning, but the first one was removed |
116 | | /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string(); |
117 | | /// assert_eq!("<textarea>\nhi!</textarea>", sanitized); |
118 | | /// // This is a good version |
119 | | /// // The user put two newlines at the beginning, and we add a third one, |
120 | | /// // so the result still has two |
121 | | /// let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string(); |
122 | | /// assert_eq!("<textarea>\n\nhi!</textarea>", sanitized); |
123 | | /// // This version is also often considered good |
124 | | /// // For many applications, leading and trailing whitespace is probably unwanted |
125 | | /// let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string(); |
126 | | /// assert_eq!("<textarea>hi!</textarea>", sanitized); |
127 | | /// |
128 | | /// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`. |
129 | | /// Only use this function for places where HTML accepts unrestricted text such as `title` attributes |
130 | | /// and paragraph contents. |
131 | 0 | pub fn clean_text(src: &str) -> String { |
132 | 0 | let mut ret_val = String::with_capacity(max(4, src.len())); |
133 | 0 | for c in src.chars() { |
134 | 0 | let replacement = match c { |
135 | | // this character, when confronted, will start a tag |
136 | 0 | '<' => "<", |
137 | | // in an unquoted attribute, will end the attribute value |
138 | 0 | '>' => ">", |
139 | | // in an attribute surrounded by double quotes, this character will end the attribute value |
140 | 0 | '\"' => """, |
141 | | // in an attribute surrounded by single quotes, this character will end the attribute value |
142 | 0 | '\'' => "'", |
143 | | // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes |
144 | 0 | '`' => "`", |
145 | | // in an unquoted attribute, this character will end the attribute |
146 | 0 | '/' => "/", |
147 | | // starts an entity reference |
148 | 0 | '&' => "&", |
149 | | // if at the beginning of an unquoted attribute, will get ignored |
150 | 0 | '=' => "=", |
151 | | // will end an unquoted attribute |
152 | 0 | ' ' => " ", |
153 | 0 | '\t' => "	", |
154 | 0 | '\n' => " ", |
155 | 0 | '\x0c' => "", |
156 | 0 | '\r' => " ", |
157 | | // a spec-compliant browser will perform this replacement anyway, but the middleware might not |
158 | 0 | '\0' => "�", |
159 | | // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM |
160 | | _ => { |
161 | 0 | ret_val.push(c); |
162 | 0 | continue; |
163 | | } |
164 | | }; |
165 | 0 | ret_val.push_str(replacement); |
166 | | } |
167 | 0 | ret_val |
168 | 0 | } |
169 | | |
170 | | /// Determine if a given string contains HTML |
171 | | /// |
172 | | /// This function is parses the full string into HTML and checks if the input contained any |
173 | | /// HTML syntax. |
174 | | /// |
175 | | /// # Note |
176 | | /// This function will return positively for strings that contain invalid HTML syntax like |
177 | | /// `<g>` and even `Vec::<u8>::new()`. |
178 | 0 | pub fn is_html(input: &str) -> bool { |
179 | 0 | let santok = SanitizationTokenizer::new(); |
180 | 0 | let mut chunk = ByteTendril::new(); |
181 | 0 | chunk.push_slice(input.as_bytes()); |
182 | 0 | let mut input = BufferQueue::default(); |
183 | 0 | input.push_back(chunk.try_reinterpret().unwrap()); |
184 | | |
185 | 0 | let tok = Tokenizer::new(santok, Default::default()); |
186 | 0 | let _ = tok.feed(&mut input); |
187 | 0 | tok.end(); |
188 | 0 | tok.sink.was_sanitized.get() |
189 | 0 | } |
190 | | |
191 | | #[derive(Clone)] |
192 | | struct SanitizationTokenizer { |
193 | | was_sanitized: Cell<bool>, |
194 | | } |
195 | | |
196 | | impl SanitizationTokenizer { |
197 | 0 | pub fn new() -> SanitizationTokenizer { |
198 | 0 | SanitizationTokenizer { |
199 | 0 | was_sanitized: false.into(), |
200 | 0 | } |
201 | 0 | } |
202 | | } |
203 | | |
204 | | impl TokenSink for SanitizationTokenizer { |
205 | | type Handle = (); |
206 | 0 | fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> { |
207 | 0 | match token { |
208 | 0 | Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {} |
209 | 0 | _ => { |
210 | 0 | self.was_sanitized.set(true); |
211 | 0 | } |
212 | | } |
213 | 0 | TokenSinkResult::Continue |
214 | 0 | } |
215 | 0 | fn end(&self) {} |
216 | | } |
217 | | |
218 | | /// An HTML sanitizer. |
219 | | /// |
220 | | /// Given a fragment of HTML, Ammonia will parse it according to the HTML5 |
221 | | /// parsing algorithm and sanitize any disallowed tags or attributes. This |
222 | | /// algorithm also takes care of things like unclosed and (some) misnested |
223 | | /// tags. |
224 | | /// |
225 | | /// # Examples |
226 | | /// |
227 | | /// use ammonia::{Builder, UrlRelative}; |
228 | | /// |
229 | | /// let a = Builder::default() |
230 | | /// .link_rel(None) |
231 | | /// .url_relative(UrlRelative::PassThrough) |
232 | | /// .clean("<a href=/>test") |
233 | | /// .to_string(); |
234 | | /// assert_eq!( |
235 | | /// a, |
236 | | /// "<a href=\"/\">test</a>"); |
237 | | /// |
238 | | /// # Panics |
239 | | /// |
240 | | /// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is |
241 | | /// configured with any of these (contradictory) settings: |
242 | | /// |
243 | | /// * The `rel` attribute is added to [`generic_attributes`] or the |
244 | | /// [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`. |
245 | | /// |
246 | | /// For example, this is going to panic, since [`link_rel`] is set to |
247 | | /// `Some("noopener noreferrer")` by default, |
248 | | /// and it makes no sense to simultaneously say that the user is allowed to |
249 | | /// set their own `rel` attribute while saying that every link shall be set to |
250 | | /// a particular value: |
251 | | /// |
252 | | /// ```should_panic |
253 | | /// use ammonia::Builder; |
254 | | /// use maplit::hashset; |
255 | | /// |
256 | | /// # fn main() { |
257 | | /// Builder::default() |
258 | | /// .generic_attributes(hashset!["rel"]) |
259 | | /// .clean(""); |
260 | | /// # } |
261 | | /// ``` |
262 | | /// |
263 | | /// This, however, is perfectly valid: |
264 | | /// |
265 | | /// ``` |
266 | | /// use ammonia::Builder; |
267 | | /// use maplit::hashset; |
268 | | /// |
269 | | /// # fn main() { |
270 | | /// Builder::default() |
271 | | /// .generic_attributes(hashset!["rel"]) |
272 | | /// .link_rel(None) |
273 | | /// .clean(""); |
274 | | /// # } |
275 | | /// ``` |
276 | | /// |
277 | | /// * The `class` attribute is in [`allowed_classes`] and is in the |
278 | | /// corresponding [`tag_attributes`] or in [`generic_attributes`]. |
279 | | /// |
280 | | /// This is done both to line up with the treatment of `rel`, |
281 | | /// and to prevent people from accidentally allowing arbitrary |
282 | | /// classes on a particular element. |
283 | | /// |
284 | | /// This will panic: |
285 | | /// |
286 | | /// ```should_panic |
287 | | /// use ammonia::Builder; |
288 | | /// use maplit::{hashmap, hashset}; |
289 | | /// |
290 | | /// # fn main() { |
291 | | /// Builder::default() |
292 | | /// .generic_attributes(hashset!["class"]) |
293 | | /// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) |
294 | | /// .clean(""); |
295 | | /// # } |
296 | | /// ``` |
297 | | /// |
298 | | /// This, however, is perfectly valid: |
299 | | /// |
300 | | /// ``` |
301 | | /// use ammonia::Builder; |
302 | | /// use maplit::{hashmap, hashset}; |
303 | | /// |
304 | | /// # fn main() { |
305 | | /// Builder::default() |
306 | | /// .allowed_classes(hashmap!["span" => hashset!["hidden"]]) |
307 | | /// .clean(""); |
308 | | /// # } |
309 | | /// ``` |
310 | | /// |
311 | | /// * A tag is in either [`tags`] or [`tag_attributes`] while also |
312 | | /// being in [`clean_content_tags`]. |
313 | | /// |
314 | | /// Both [`tags`] and [`tag_attributes`] are whitelists but |
315 | | /// [`clean_content_tags`] is a blacklist, so it doesn't make sense |
316 | | /// to have the same tag in both. |
317 | | /// |
318 | | /// For example, this will panic, since the `aside` tag is in |
319 | | /// [`tags`] by default: |
320 | | /// |
321 | | /// ```should_panic |
322 | | /// use ammonia::Builder; |
323 | | /// use maplit::hashset; |
324 | | /// |
325 | | /// # fn main() { |
326 | | /// Builder::default() |
327 | | /// .clean_content_tags(hashset!["aside"]) |
328 | | /// .clean(""); |
329 | | /// # } |
330 | | /// ``` |
331 | | /// |
332 | | /// This, however, is valid: |
333 | | /// |
334 | | /// ``` |
335 | | /// use ammonia::Builder; |
336 | | /// use maplit::hashset; |
337 | | /// |
338 | | /// # fn main() { |
339 | | /// Builder::default() |
340 | | /// .rm_tags(&["aside"]) |
341 | | /// .clean_content_tags(hashset!["aside"]) |
342 | | /// .clean(""); |
343 | | /// # } |
344 | | /// ``` |
345 | | /// |
346 | | /// [`clean`]: #method.clean |
347 | | /// [`clean_from_reader`]: #method.clean_from_reader |
348 | | /// [`generic_attributes`]: #method.generic_attributes |
349 | | /// [`tag_attributes`]: #method.tag_attributes |
350 | | /// [`generic_attributes`]: #method.generic_attributes |
351 | | /// [`link_rel`]: #method.link_rel |
352 | | /// [`allowed_classes`]: #method.allowed_classes |
353 | | /// [`id_prefix`]: #method.id_prefix |
354 | | /// [`tags`]: #method.tags |
355 | | /// [`clean_content_tags`]: #method.clean_content_tags |
356 | | #[derive(Debug)] |
357 | | pub struct Builder<'a> { |
358 | | tags: HashSet<&'a str>, |
359 | | clean_content_tags: HashSet<&'a str>, |
360 | | tag_attributes: HashMap<&'a str, HashSet<&'a str>>, |
361 | | tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
362 | | set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
363 | | generic_attributes: HashSet<&'a str>, |
364 | | url_schemes: HashSet<&'a str>, |
365 | | url_relative: UrlRelative<'a>, |
366 | | attribute_filter: Option<Box<dyn AttributeFilter>>, |
367 | | link_rel: Option<&'a str>, |
368 | | allowed_classes: HashMap<&'a str, HashSet<&'a str>>, |
369 | | strip_comments: bool, |
370 | | id_prefix: Option<&'a str>, |
371 | | generic_attribute_prefixes: Option<HashSet<&'a str>>, |
372 | | style_properties: Option<HashSet<&'a str>>, |
373 | | } |
374 | | |
375 | | impl<'a> Default for Builder<'a> { |
376 | 0 | fn default() -> Self { |
377 | | #[rustfmt::skip] |
378 | 0 | let tags = hashset![ |
379 | 0 | "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi", |
380 | 0 | "bdo", "blockquote", "br", "caption", "center", "cite", "code", |
381 | 0 | "col", "colgroup", "data", "dd", "del", "details", "dfn", "div", |
382 | 0 | "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2", |
383 | 0 | "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img", |
384 | 0 | "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre", |
385 | 0 | "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span", |
386 | 0 | "strike", "strong", "sub", "summary", "sup", "table", "tbody", |
387 | 0 | "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr" |
388 | | ]; |
389 | 0 | let clean_content_tags = hashset!["script", "style"]; |
390 | 0 | let generic_attributes = hashset!["lang", "title"]; |
391 | 0 | let tag_attributes = hashmap![ |
392 | 0 | "a" => hashset![ |
393 | 0 | "href", "hreflang" |
394 | | ], |
395 | 0 | "bdo" => hashset![ |
396 | | "dir" |
397 | | ], |
398 | 0 | "blockquote" => hashset![ |
399 | | "cite" |
400 | | ], |
401 | 0 | "col" => hashset![ |
402 | 0 | "align", "char", "charoff", "span" |
403 | | ], |
404 | 0 | "colgroup" => hashset![ |
405 | 0 | "align", "char", "charoff", "span" |
406 | | ], |
407 | 0 | "del" => hashset![ |
408 | 0 | "cite", "datetime" |
409 | | ], |
410 | 0 | "hr" => hashset![ |
411 | 0 | "align", "size", "width" |
412 | | ], |
413 | 0 | "img" => hashset![ |
414 | 0 | "align", "alt", "height", "src", "width" |
415 | | ], |
416 | 0 | "ins" => hashset![ |
417 | 0 | "cite", "datetime" |
418 | | ], |
419 | 0 | "ol" => hashset![ |
420 | | "start" |
421 | | ], |
422 | 0 | "q" => hashset![ |
423 | | "cite" |
424 | | ], |
425 | 0 | "table" => hashset![ |
426 | 0 | "align", "char", "charoff", "summary" |
427 | | ], |
428 | 0 | "tbody" => hashset![ |
429 | 0 | "align", "char", "charoff" |
430 | | ], |
431 | 0 | "td" => hashset![ |
432 | 0 | "align", "char", "charoff", "colspan", "headers", "rowspan" |
433 | | ], |
434 | 0 | "tfoot" => hashset![ |
435 | 0 | "align", "char", "charoff" |
436 | | ], |
437 | 0 | "th" => hashset![ |
438 | 0 | "align", "char", "charoff", "colspan", "headers", "rowspan", "scope" |
439 | | ], |
440 | 0 | "thead" => hashset![ |
441 | 0 | "align", "char", "charoff" |
442 | | ], |
443 | 0 | "tr" => hashset![ |
444 | 0 | "align", "char", "charoff" |
445 | | ], |
446 | | ]; |
447 | 0 | let tag_attribute_values = hashmap![]; |
448 | 0 | let set_tag_attribute_values = hashmap![]; |
449 | 0 | let url_schemes = hashset![ |
450 | | "bitcoin", |
451 | 0 | "ftp", |
452 | 0 | "ftps", |
453 | 0 | "geo", |
454 | 0 | "http", |
455 | 0 | "https", |
456 | 0 | "im", |
457 | 0 | "irc", |
458 | 0 | "ircs", |
459 | 0 | "magnet", |
460 | 0 | "mailto", |
461 | 0 | "mms", |
462 | 0 | "mx", |
463 | 0 | "news", |
464 | 0 | "nntp", |
465 | 0 | "openpgp4fpr", |
466 | 0 | "sip", |
467 | 0 | "sms", |
468 | 0 | "smsto", |
469 | 0 | "ssh", |
470 | 0 | "tel", |
471 | 0 | "url", |
472 | 0 | "webcal", |
473 | 0 | "wtai", |
474 | 0 | "xmpp" |
475 | | ]; |
476 | 0 | let allowed_classes = hashmap![]; |
477 | | |
478 | 0 | Builder { |
479 | 0 | tags, |
480 | 0 | clean_content_tags, |
481 | 0 | tag_attributes, |
482 | 0 | tag_attribute_values, |
483 | 0 | set_tag_attribute_values, |
484 | 0 | generic_attributes, |
485 | 0 | url_schemes, |
486 | 0 | url_relative: UrlRelative::PassThrough, |
487 | 0 | attribute_filter: None, |
488 | 0 | link_rel: Some("noopener noreferrer"), |
489 | 0 | allowed_classes, |
490 | 0 | strip_comments: true, |
491 | 0 | id_prefix: None, |
492 | 0 | generic_attribute_prefixes: None, |
493 | 0 | style_properties: None, |
494 | 0 | } |
495 | 0 | } |
496 | | } |
497 | | |
498 | | impl<'a> Builder<'a> { |
499 | | /// Sets the tags that are allowed. |
500 | | /// |
501 | | /// # Examples |
502 | | /// |
503 | | /// use ammonia::Builder; |
504 | | /// use maplit::hashset; |
505 | | /// |
506 | | /// # fn main() { |
507 | | /// let tags = hashset!["my-tag"]; |
508 | | /// let a = Builder::new() |
509 | | /// .tags(tags) |
510 | | /// .clean("<my-tag>") |
511 | | /// .to_string(); |
512 | | /// assert_eq!(a, "<my-tag></my-tag>"); |
513 | | /// # } |
514 | | /// |
515 | | /// # Defaults |
516 | | /// |
517 | | /// ```notest |
518 | | /// a, abbr, acronym, area, article, aside, b, bdi, |
519 | | /// bdo, blockquote, br, caption, center, cite, code, |
520 | | /// col, colgroup, data, dd, del, details, dfn, div, |
521 | | /// dl, dt, em, figcaption, figure, footer, h1, h2, |
522 | | /// h3, h4, h5, h6, header, hgroup, hr, i, img, |
523 | | /// ins, kbd, li, map, mark, nav, ol, p, pre, |
524 | | /// q, rp, rt, rtc, ruby, s, samp, small, span, |
525 | | /// strike, strong, sub, summary, sup, table, tbody, |
526 | | /// td, th, thead, time, tr, tt, u, ul, var, wbr |
527 | | /// ``` |
528 | 0 | pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
529 | 0 | self.tags = value; |
530 | 0 | self |
531 | 0 | } |
532 | | |
533 | | /// Add additonal whitelisted tags without overwriting old ones. |
534 | | /// |
535 | | /// Does nothing if the tag is already there. |
536 | | /// |
537 | | /// # Examples |
538 | | /// |
539 | | /// let a = ammonia::Builder::default() |
540 | | /// .add_tags(&["my-tag"]) |
541 | | /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
542 | | /// assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a); |
543 | 0 | pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
544 | 0 | &mut self, |
545 | 0 | it: I, |
546 | 0 | ) -> &mut Self { |
547 | 0 | self.tags.extend(it.into_iter().map(Borrow::borrow)); |
548 | 0 | self |
549 | 0 | } |
550 | | |
551 | | /// Remove already-whitelisted tags. |
552 | | /// |
553 | | /// Does nothing if the tags is already gone. |
554 | | /// |
555 | | /// # Examples |
556 | | /// |
557 | | /// let a = ammonia::Builder::default() |
558 | | /// .rm_tags(&["span"]) |
559 | | /// .clean("<span></span>").to_string(); |
560 | | /// assert_eq!("", a); |
561 | 0 | pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
562 | 0 | &mut self, |
563 | 0 | it: I, |
564 | 0 | ) -> &mut Self { |
565 | 0 | for i in it { |
566 | 0 | self.tags.remove(i.borrow()); |
567 | 0 | } |
568 | 0 | self |
569 | 0 | } |
570 | | |
571 | | /// Returns a copy of the set of whitelisted tags. |
572 | | /// |
573 | | /// # Examples |
574 | | /// |
575 | | /// use maplit::hashset; |
576 | | /// |
577 | | /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
578 | | /// |
579 | | /// let mut b = ammonia::Builder::default(); |
580 | | /// b.tags(Clone::clone(&tags)); |
581 | | /// assert_eq!(tags, b.clone_tags()); |
582 | 0 | pub fn clone_tags(&self) -> HashSet<&'a str> { |
583 | 0 | self.tags.clone() |
584 | 0 | } |
585 | | |
586 | | /// Sets the tags whose contents will be completely removed from the output. |
587 | | /// |
588 | | /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
589 | | /// a panic. |
590 | | /// |
591 | | /// # Examples |
592 | | /// |
593 | | /// use ammonia::Builder; |
594 | | /// use maplit::hashset; |
595 | | /// |
596 | | /// # fn main() { |
597 | | /// let tag_blacklist = hashset!["script", "style"]; |
598 | | /// let a = Builder::new() |
599 | | /// .clean_content_tags(tag_blacklist) |
600 | | /// .clean("<script>alert('hello')</script><style>a { background: #fff }</style>") |
601 | | /// .to_string(); |
602 | | /// assert_eq!(a, ""); |
603 | | /// # } |
604 | | /// |
605 | | /// # Defaults |
606 | | /// |
607 | | /// ```notest |
608 | | /// script, style |
609 | | /// ``` |
610 | 0 | pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self { |
611 | 0 | self.clean_content_tags = value; |
612 | 0 | self |
613 | 0 | } |
614 | | |
615 | | /// Add additonal blacklisted clean-content tags without overwriting old ones. |
616 | | /// |
617 | | /// Does nothing if the tag is already there. |
618 | | /// |
619 | | /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause |
620 | | /// a panic. |
621 | | /// |
622 | | /// # Examples |
623 | | /// |
624 | | /// let a = ammonia::Builder::default() |
625 | | /// .add_clean_content_tags(&["my-tag"]) |
626 | | /// .clean("<my-tag>test</my-tag><span>mess</span>").to_string(); |
627 | | /// assert_eq!("<span>mess</span>", a); |
628 | 0 | pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
629 | 0 | &mut self, |
630 | 0 | it: I, |
631 | 0 | ) -> &mut Self { |
632 | 0 | self.clean_content_tags |
633 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
634 | 0 | self |
635 | 0 | } |
636 | | |
637 | | /// Remove already-blacklisted clean-content tags. |
638 | | /// |
639 | | /// Does nothing if the tags aren't blacklisted. |
640 | | /// |
641 | | /// # Examples |
642 | | /// use ammonia::Builder; |
643 | | /// use maplit::hashset; |
644 | | /// |
645 | | /// # fn main() { |
646 | | /// let tag_blacklist = hashset!["script"]; |
647 | | /// let a = ammonia::Builder::default() |
648 | | /// .clean_content_tags(tag_blacklist) |
649 | | /// .rm_clean_content_tags(&["script"]) |
650 | | /// .clean("<script>XSS</script>").to_string(); |
651 | | /// assert_eq!("XSS", a); |
652 | | /// # } |
653 | 0 | pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
654 | 0 | &mut self, |
655 | 0 | it: I, |
656 | 0 | ) -> &mut Self { |
657 | 0 | for i in it { |
658 | 0 | self.clean_content_tags.remove(i.borrow()); |
659 | 0 | } |
660 | 0 | self |
661 | 0 | } |
662 | | |
663 | | /// Returns a copy of the set of blacklisted clean-content tags. |
664 | | /// |
665 | | /// # Examples |
666 | | /// # use maplit::hashset; |
667 | | /// |
668 | | /// let tags = hashset!["my-tag-1", "my-tag-2"]; |
669 | | /// |
670 | | /// let mut b = ammonia::Builder::default(); |
671 | | /// b.clean_content_tags(Clone::clone(&tags)); |
672 | | /// assert_eq!(tags, b.clone_clean_content_tags()); |
673 | 0 | pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> { |
674 | 0 | self.clean_content_tags.clone() |
675 | 0 | } |
676 | | |
677 | | /// Sets the HTML attributes that are allowed on specific tags. |
678 | | /// |
679 | | /// The value is structured as a map from tag names to a set of attribute names. |
680 | | /// |
681 | | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
682 | | /// |
683 | | /// # Examples |
684 | | /// |
685 | | /// use ammonia::Builder; |
686 | | /// use maplit::{hashmap, hashset}; |
687 | | /// |
688 | | /// # fn main() { |
689 | | /// let tags = hashset!["my-tag"]; |
690 | | /// let tag_attributes = hashmap![ |
691 | | /// "my-tag" => hashset!["val"] |
692 | | /// ]; |
693 | | /// let a = Builder::new().tags(tags).tag_attributes(tag_attributes) |
694 | | /// .clean("<my-tag val=1>") |
695 | | /// .to_string(); |
696 | | /// assert_eq!(a, "<my-tag val=\"1\"></my-tag>"); |
697 | | /// # } |
698 | | /// |
699 | | /// # Defaults |
700 | | /// |
701 | | /// ```notest |
702 | | /// a => |
703 | | /// href, hreflang |
704 | | /// bdo => |
705 | | /// dir |
706 | | /// blockquote => |
707 | | /// cite |
708 | | /// col => |
709 | | /// align, char, charoff, span |
710 | | /// colgroup => |
711 | | /// align, char, charoff, span |
712 | | /// del => |
713 | | /// cite, datetime |
714 | | /// hr => |
715 | | /// align, size, width |
716 | | /// img => |
717 | | /// align, alt, height, src, width |
718 | | /// ins => |
719 | | /// cite, datetime |
720 | | /// ol => |
721 | | /// start |
722 | | /// q => |
723 | | /// cite |
724 | | /// table => |
725 | | /// align, char, charoff, summary |
726 | | /// tbody => |
727 | | /// align, char, charoff |
728 | | /// td => |
729 | | /// align, char, charoff, colspan, headers, rowspan |
730 | | /// tfoot => |
731 | | /// align, char, charoff |
732 | | /// th => |
733 | | /// align, char, charoff, colspan, headers, rowspan, scope |
734 | | /// thead => |
735 | | /// align, char, charoff |
736 | | /// tr => |
737 | | /// align, char, charoff |
738 | | /// ``` |
739 | 0 | pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
740 | 0 | self.tag_attributes = value; |
741 | 0 | self |
742 | 0 | } |
743 | | |
744 | | /// Add additonal whitelisted tag-specific attributes without overwriting old ones. |
745 | | /// |
746 | | /// # Examples |
747 | | /// |
748 | | /// let a = ammonia::Builder::default() |
749 | | /// .add_tags(&["my-tag"]) |
750 | | /// .add_tag_attributes("my-tag", &["my-attr"]) |
751 | | /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
752 | | /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
753 | 0 | pub fn add_tag_attributes< |
754 | 0 | T: 'a + ?Sized + Borrow<str>, |
755 | 0 | U: 'a + ?Sized + Borrow<str>, |
756 | 0 | I: IntoIter<Item = &'a T>, |
757 | 0 | >( |
758 | 0 | &mut self, |
759 | 0 | tag: &'a U, |
760 | 0 | it: I, |
761 | 0 | ) -> &mut Self { |
762 | 0 | self.tag_attributes |
763 | 0 | .entry(tag.borrow()) |
764 | 0 | .or_default() |
765 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
766 | 0 | self |
767 | 0 | } |
768 | | |
769 | | /// Remove already-whitelisted tag-specific attributes. |
770 | | /// |
771 | | /// Does nothing if the attribute is already gone. |
772 | | /// |
773 | | /// # Examples |
774 | | /// |
775 | | /// let a = ammonia::Builder::default() |
776 | | /// .rm_tag_attributes("a", &["href"]) |
777 | | /// .clean("<a href=\"/\"></a>").to_string(); |
778 | | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
779 | 0 | pub fn rm_tag_attributes< |
780 | 0 | 'b, |
781 | 0 | 'c, |
782 | 0 | T: 'b + ?Sized + Borrow<str>, |
783 | 0 | U: 'c + ?Sized + Borrow<str>, |
784 | 0 | I: IntoIter<Item = &'b T>, |
785 | 0 | >( |
786 | 0 | &mut self, |
787 | 0 | tag: &'c U, |
788 | 0 | it: I, |
789 | 0 | ) -> &mut Self { |
790 | 0 | if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) { |
791 | 0 | for i in it { |
792 | 0 | tag.remove(i.borrow()); |
793 | 0 | } |
794 | 0 | } |
795 | 0 | self |
796 | 0 | } |
797 | | |
798 | | /// Returns a copy of the set of whitelisted tag-specific attributes. |
799 | | /// |
800 | | /// # Examples |
801 | | /// use maplit::{hashmap, hashset}; |
802 | | /// |
803 | | /// let tag_attributes = hashmap![ |
804 | | /// "my-tag" => hashset!["my-attr-1", "my-attr-2"] |
805 | | /// ]; |
806 | | /// |
807 | | /// let mut b = ammonia::Builder::default(); |
808 | | /// b.tag_attributes(Clone::clone(&tag_attributes)); |
809 | | /// assert_eq!(tag_attributes, b.clone_tag_attributes()); |
810 | 0 | pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
811 | 0 | self.tag_attributes.clone() |
812 | 0 | } |
813 | | |
814 | | /// Sets the values of HTML attributes that are allowed on specific tags. |
815 | | /// |
816 | | /// The value is structured as a map from tag names to a map from attribute names to a set of |
817 | | /// attribute values. |
818 | | /// |
819 | | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
820 | | /// |
821 | | /// # Examples |
822 | | /// |
823 | | /// use ammonia::Builder; |
824 | | /// use maplit::{hashmap, hashset}; |
825 | | /// |
826 | | /// # fn main() { |
827 | | /// let tags = hashset!["my-tag"]; |
828 | | /// let tag_attribute_values = hashmap![ |
829 | | /// "my-tag" => hashmap![ |
830 | | /// "my-attr" => hashset!["val"], |
831 | | /// ], |
832 | | /// ]; |
833 | | /// let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values) |
834 | | /// .clean("<my-tag my-attr=val>") |
835 | | /// .to_string(); |
836 | | /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
837 | | /// # } |
838 | | /// |
839 | | /// # Defaults |
840 | | /// |
841 | | /// None. |
842 | 0 | pub fn tag_attribute_values( |
843 | 0 | &mut self, |
844 | 0 | value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>, |
845 | 0 | ) -> &mut Self { |
846 | 0 | self.tag_attribute_values = value; |
847 | 0 | self |
848 | 0 | } |
849 | | |
850 | | /// Add additonal whitelisted tag-specific attribute values without overwriting old ones. |
851 | | /// |
852 | | /// # Examples |
853 | | /// |
854 | | /// let a = ammonia::Builder::default() |
855 | | /// .add_tags(&["my-tag"]) |
856 | | /// .add_tag_attribute_values("my-tag", "my-attr", &[""]) |
857 | | /// .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string(); |
858 | | /// assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a); |
859 | 0 | pub fn add_tag_attribute_values< |
860 | 0 | T: 'a + ?Sized + Borrow<str>, |
861 | 0 | U: 'a + ?Sized + Borrow<str>, |
862 | 0 | V: 'a + ?Sized + Borrow<str>, |
863 | 0 | I: IntoIter<Item = &'a T>, |
864 | 0 | >( |
865 | 0 | &mut self, |
866 | 0 | tag: &'a U, |
867 | 0 | attribute: &'a V, |
868 | 0 | it: I, |
869 | 0 | ) -> &mut Self { |
870 | 0 | self.tag_attribute_values |
871 | 0 | .entry(tag.borrow()) |
872 | 0 | .or_default() |
873 | 0 | .entry(attribute.borrow()) |
874 | 0 | .or_default() |
875 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
876 | | |
877 | 0 | self |
878 | 0 | } |
879 | | |
880 | | /// Remove already-whitelisted tag-specific attribute values. |
881 | | /// |
882 | | /// Does nothing if the attribute or the value is already gone. |
883 | | /// |
884 | | /// # Examples |
885 | | /// |
886 | | /// let a = ammonia::Builder::default() |
887 | | /// .rm_tag_attributes("a", &["href"]) |
888 | | /// .add_tag_attribute_values("a", "href", &["/"]) |
889 | | /// .rm_tag_attribute_values("a", "href", &["/"]) |
890 | | /// .clean("<a href=\"/\"></a>").to_string(); |
891 | | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
892 | 0 | pub fn rm_tag_attribute_values< |
893 | 0 | 'b, |
894 | 0 | 'c, |
895 | 0 | T: 'b + ?Sized + Borrow<str>, |
896 | 0 | U: 'c + ?Sized + Borrow<str>, |
897 | 0 | V: 'c + ?Sized + Borrow<str>, |
898 | 0 | I: IntoIter<Item = &'b T>, |
899 | 0 | >( |
900 | 0 | &mut self, |
901 | 0 | tag: &'c U, |
902 | 0 | attribute: &'c V, |
903 | 0 | it: I, |
904 | 0 | ) -> &mut Self { |
905 | 0 | if let Some(attrs) = self |
906 | 0 | .tag_attribute_values |
907 | 0 | .get_mut(tag.borrow()) |
908 | 0 | .and_then(|map| map.get_mut(attribute.borrow())) |
909 | | { |
910 | 0 | for i in it { |
911 | 0 | attrs.remove(i.borrow()); |
912 | 0 | } |
913 | 0 | } |
914 | 0 | self |
915 | 0 | } |
916 | | |
917 | | /// Returns a copy of the set of whitelisted tag-specific attribute values. |
918 | | /// |
919 | | /// # Examples |
920 | | /// |
921 | | /// use maplit::{hashmap, hashset}; |
922 | | /// |
923 | | /// let attribute_values = hashmap![ |
924 | | /// "my-attr-1" => hashset!["foo"], |
925 | | /// "my-attr-2" => hashset!["baz", "bar"], |
926 | | /// ]; |
927 | | /// let tag_attribute_values = hashmap![ |
928 | | /// "my-tag" => attribute_values |
929 | | /// ]; |
930 | | /// |
931 | | /// let mut b = ammonia::Builder::default(); |
932 | | /// b.tag_attribute_values(Clone::clone(&tag_attribute_values)); |
933 | | /// assert_eq!(tag_attribute_values, b.clone_tag_attribute_values()); |
934 | 0 | pub fn clone_tag_attribute_values( |
935 | 0 | &self, |
936 | 0 | ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> { |
937 | 0 | self.tag_attribute_values.clone() |
938 | 0 | } |
939 | | |
940 | | /// Sets the values of HTML attributes that are to be set on specific tags. |
941 | | /// |
942 | | /// The value is structured as a map from tag names to a map from attribute names to an |
943 | | /// attribute value. |
944 | | /// |
945 | | /// If a tag is not itself whitelisted, adding entries to this map will do nothing. |
946 | | /// |
947 | | /// # Examples |
948 | | /// |
949 | | /// use ammonia::Builder; |
950 | | /// use maplit::{hashmap, hashset}; |
951 | | /// |
952 | | /// # fn main() { |
953 | | /// let tags = hashset!["my-tag"]; |
954 | | /// let set_tag_attribute_values = hashmap![ |
955 | | /// "my-tag" => hashmap![ |
956 | | /// "my-attr" => "val", |
957 | | /// ], |
958 | | /// ]; |
959 | | /// let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values) |
960 | | /// .clean("<my-tag>") |
961 | | /// .to_string(); |
962 | | /// assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>"); |
963 | | /// # } |
964 | | /// |
965 | | /// # Defaults |
966 | | /// |
967 | | /// None. |
968 | 0 | pub fn set_tag_attribute_values( |
969 | 0 | &mut self, |
970 | 0 | value: HashMap<&'a str, HashMap<&'a str, &'a str>>, |
971 | 0 | ) -> &mut Self { |
972 | 0 | self.set_tag_attribute_values = value; |
973 | 0 | self |
974 | 0 | } |
975 | | |
976 | | /// Add an attribute value to set on a specific element. |
977 | | /// |
978 | | /// # Examples |
979 | | /// |
980 | | /// let a = ammonia::Builder::default() |
981 | | /// .add_tags(&["my-tag"]) |
982 | | /// .set_tag_attribute_value("my-tag", "my-attr", "val") |
983 | | /// .clean("<my-tag>test</my-tag> <span>mess</span>").to_string(); |
984 | | /// assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a); |
985 | 0 | pub fn set_tag_attribute_value< |
986 | 0 | T: 'a + ?Sized + Borrow<str>, |
987 | 0 | A: 'a + ?Sized + Borrow<str>, |
988 | 0 | V: 'a + ?Sized + Borrow<str>, |
989 | 0 | >( |
990 | 0 | &mut self, |
991 | 0 | tag: &'a T, |
992 | 0 | attribute: &'a A, |
993 | 0 | value: &'a V, |
994 | 0 | ) -> &mut Self { |
995 | 0 | self.set_tag_attribute_values |
996 | 0 | .entry(tag.borrow()) |
997 | 0 | .or_default() |
998 | 0 | .insert(attribute.borrow(), value.borrow()); |
999 | 0 | self |
1000 | 0 | } |
1001 | | |
1002 | | /// Remove existing tag-specific attribute values to be set. |
1003 | | /// |
1004 | | /// Does nothing if the attribute is already gone. |
1005 | | /// |
1006 | | /// # Examples |
1007 | | /// |
1008 | | /// let a = ammonia::Builder::default() |
1009 | | /// // this does nothing, since no value is set for this tag attribute yet |
1010 | | /// .rm_set_tag_attribute_value("a", "target") |
1011 | | /// .set_tag_attribute_value("a", "target", "_blank") |
1012 | | /// .rm_set_tag_attribute_value("a", "target") |
1013 | | /// .clean("<a href=\"/\"></a>").to_string(); |
1014 | | /// assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a); |
1015 | 0 | pub fn rm_set_tag_attribute_value< |
1016 | 0 | T: 'a + ?Sized + Borrow<str>, |
1017 | 0 | A: 'a + ?Sized + Borrow<str>, |
1018 | 0 | >( |
1019 | 0 | &mut self, |
1020 | 0 | tag: &'a T, |
1021 | 0 | attribute: &'a A, |
1022 | 0 | ) -> &mut Self { |
1023 | 0 | if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) { |
1024 | 0 | attributes.remove(attribute.borrow()); |
1025 | 0 | } |
1026 | 0 | self |
1027 | 0 | } |
1028 | | |
1029 | | /// Returns the value that will be set for the attribute on the element, if any. |
1030 | | /// |
1031 | | /// # Examples |
1032 | | /// |
1033 | | /// let mut b = ammonia::Builder::default(); |
1034 | | /// b.set_tag_attribute_value("a", "target", "_blank"); |
1035 | | /// let value = b.get_set_tag_attribute_value("a", "target"); |
1036 | | /// assert_eq!(value, Some("_blank")); |
1037 | 0 | pub fn get_set_tag_attribute_value< |
1038 | 0 | T: 'a + ?Sized + Borrow<str>, |
1039 | 0 | A: 'a + ?Sized + Borrow<str>, |
1040 | 0 | >( |
1041 | 0 | &self, |
1042 | 0 | tag: &'a T, |
1043 | 0 | attribute: &'a A, |
1044 | 0 | ) -> Option<&'a str> { |
1045 | 0 | self.set_tag_attribute_values |
1046 | 0 | .get(tag.borrow()) |
1047 | 0 | .and_then(|map| map.get(attribute.borrow())) |
1048 | 0 | .copied() |
1049 | 0 | } |
1050 | | |
1051 | | /// Returns a copy of the set of tag-specific attribute values to be set. |
1052 | | /// |
1053 | | /// # Examples |
1054 | | /// |
1055 | | /// use maplit::{hashmap, hashset}; |
1056 | | /// |
1057 | | /// let attribute_values = hashmap![ |
1058 | | /// "my-attr-1" => "foo", |
1059 | | /// "my-attr-2" => "bar", |
1060 | | /// ]; |
1061 | | /// let set_tag_attribute_values = hashmap![ |
1062 | | /// "my-tag" => attribute_values, |
1063 | | /// ]; |
1064 | | /// |
1065 | | /// let mut b = ammonia::Builder::default(); |
1066 | | /// b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values)); |
1067 | | /// assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values()); |
1068 | 0 | pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> { |
1069 | 0 | self.set_tag_attribute_values.clone() |
1070 | 0 | } |
1071 | | |
1072 | | /// Sets the prefix of attributes that are allowed on any tag. |
1073 | | /// |
1074 | | /// # Examples |
1075 | | /// |
1076 | | /// use ammonia::Builder; |
1077 | | /// use maplit::hashset; |
1078 | | /// |
1079 | | /// # fn main() { |
1080 | | /// let prefixes = hashset!["data-"]; |
1081 | | /// let a = Builder::new() |
1082 | | /// .generic_attribute_prefixes(prefixes) |
1083 | | /// .clean("<b data-val=1>") |
1084 | | /// .to_string(); |
1085 | | /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
1086 | | /// # } |
1087 | | /// |
1088 | | /// # Defaults |
1089 | | /// |
1090 | | /// No attribute prefixes are allowed by default. |
1091 | 0 | pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1092 | 0 | self.generic_attribute_prefixes = Some(value); |
1093 | 0 | self |
1094 | 0 | } |
1095 | | |
1096 | | /// Add additional whitelisted attribute prefix without overwriting old ones. |
1097 | | /// |
1098 | | /// # Examples |
1099 | | /// |
1100 | | /// let a = ammonia::Builder::default() |
1101 | | /// .add_generic_attribute_prefixes(&["my-"]) |
1102 | | /// .clean("<span my-attr>mess</span>").to_string(); |
1103 | | /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
1104 | 0 | pub fn add_generic_attribute_prefixes< |
1105 | 0 | T: 'a + ?Sized + Borrow<str>, |
1106 | 0 | I: IntoIter<Item = &'a T>, |
1107 | 0 | >( |
1108 | 0 | &mut self, |
1109 | 0 | it: I, |
1110 | 0 | ) -> &mut Self { |
1111 | 0 | self.generic_attribute_prefixes |
1112 | 0 | .get_or_insert_with(HashSet::new) |
1113 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
1114 | 0 | self |
1115 | 0 | } |
1116 | | |
1117 | | /// Remove already-whitelisted attribute prefixes. |
1118 | | /// |
1119 | | /// Does nothing if the attribute prefix is already gone. |
1120 | | /// |
1121 | | /// # Examples |
1122 | | /// |
1123 | | /// let a = ammonia::Builder::default() |
1124 | | /// .add_generic_attribute_prefixes(&["data-", "code-"]) |
1125 | | /// .rm_generic_attribute_prefixes(&["data-"]) |
1126 | | /// .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string(); |
1127 | | /// assert_eq!("<span code-test=\"foo\"></span>", a); |
1128 | 0 | pub fn rm_generic_attribute_prefixes< |
1129 | 0 | 'b, |
1130 | 0 | T: 'b + ?Sized + Borrow<str>, |
1131 | 0 | I: IntoIter<Item = &'b T>, |
1132 | 0 | >( |
1133 | 0 | &mut self, |
1134 | 0 | it: I, |
1135 | 0 | ) -> &mut Self { |
1136 | 0 | if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| { |
1137 | 0 | for i in it { |
1138 | 0 | let _ = prefixes.remove(i.borrow()); |
1139 | 0 | } |
1140 | 0 | prefixes.is_empty() |
1141 | 0 | }) { |
1142 | 0 | self.generic_attribute_prefixes = None; |
1143 | 0 | } |
1144 | 0 | self |
1145 | 0 | } |
1146 | | |
1147 | | /// Returns a copy of the set of whitelisted attribute prefixes. |
1148 | | /// |
1149 | | /// # Examples |
1150 | | /// |
1151 | | /// use maplit::hashset; |
1152 | | /// |
1153 | | /// let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"]; |
1154 | | /// |
1155 | | /// let mut b = ammonia::Builder::default(); |
1156 | | /// b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes)); |
1157 | | /// assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes()); |
1158 | 0 | pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> { |
1159 | 0 | self.generic_attribute_prefixes.clone() |
1160 | 0 | } |
1161 | | |
1162 | | /// Sets the attributes that are allowed on any tag. |
1163 | | /// |
1164 | | /// # Examples |
1165 | | /// |
1166 | | /// use ammonia::Builder; |
1167 | | /// use maplit::hashset; |
1168 | | /// |
1169 | | /// # fn main() { |
1170 | | /// let attributes = hashset!["data-val"]; |
1171 | | /// let a = Builder::new() |
1172 | | /// .generic_attributes(attributes) |
1173 | | /// .clean("<b data-val=1>") |
1174 | | /// .to_string(); |
1175 | | /// assert_eq!(a, "<b data-val=\"1\"></b>"); |
1176 | | /// # } |
1177 | | /// |
1178 | | /// # Defaults |
1179 | | /// |
1180 | | /// ```notest |
1181 | | /// lang, title |
1182 | | /// ``` |
1183 | 0 | pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1184 | 0 | self.generic_attributes = value; |
1185 | 0 | self |
1186 | 0 | } |
1187 | | |
1188 | | /// Add additonal whitelisted attributes without overwriting old ones. |
1189 | | /// |
1190 | | /// # Examples |
1191 | | /// |
1192 | | /// let a = ammonia::Builder::default() |
1193 | | /// .add_generic_attributes(&["my-attr"]) |
1194 | | /// .clean("<span my-attr>mess</span>").to_string(); |
1195 | | /// assert_eq!("<span my-attr=\"\">mess</span>", a); |
1196 | 0 | pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
1197 | 0 | &mut self, |
1198 | 0 | it: I, |
1199 | 0 | ) -> &mut Self { |
1200 | 0 | self.generic_attributes |
1201 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
1202 | 0 | self |
1203 | 0 | } |
1204 | | |
1205 | | /// Remove already-whitelisted attributes. |
1206 | | /// |
1207 | | /// Does nothing if the attribute is already gone. |
1208 | | /// |
1209 | | /// # Examples |
1210 | | /// |
1211 | | /// let a = ammonia::Builder::default() |
1212 | | /// .rm_generic_attributes(&["title"]) |
1213 | | /// .clean("<span title=\"cool\"></span>").to_string(); |
1214 | | /// assert_eq!("<span></span>", a); |
1215 | 0 | pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
1216 | 0 | &mut self, |
1217 | 0 | it: I, |
1218 | 0 | ) -> &mut Self { |
1219 | 0 | for i in it { |
1220 | 0 | self.generic_attributes.remove(i.borrow()); |
1221 | 0 | } |
1222 | 0 | self |
1223 | 0 | } |
1224 | | |
1225 | | /// Returns a copy of the set of whitelisted attributes. |
1226 | | /// |
1227 | | /// # Examples |
1228 | | /// |
1229 | | /// use maplit::hashset; |
1230 | | /// |
1231 | | /// let generic_attributes = hashset!["my-attr-1", "my-attr-2"]; |
1232 | | /// |
1233 | | /// let mut b = ammonia::Builder::default(); |
1234 | | /// b.generic_attributes(Clone::clone(&generic_attributes)); |
1235 | | /// assert_eq!(generic_attributes, b.clone_generic_attributes()); |
1236 | 0 | pub fn clone_generic_attributes(&self) -> HashSet<&'a str> { |
1237 | 0 | self.generic_attributes.clone() |
1238 | 0 | } |
1239 | | |
1240 | | /// Sets the URL schemes permitted on `href` and `src` attributes. |
1241 | | /// |
1242 | | /// # Examples |
1243 | | /// |
1244 | | /// use ammonia::Builder; |
1245 | | /// use maplit::hashset; |
1246 | | /// |
1247 | | /// # fn main() { |
1248 | | /// let url_schemes = hashset![ |
1249 | | /// "http", "https", "mailto", "magnet" |
1250 | | /// ]; |
1251 | | /// let a = Builder::new().url_schemes(url_schemes) |
1252 | | /// .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>") |
1253 | | /// .to_string(); |
1254 | | /// |
1255 | | /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
1256 | | /// // in the cleaned HTML. |
1257 | | /// assert_eq!(a, |
1258 | | /// "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>"); |
1259 | | /// # } |
1260 | | /// |
1261 | | /// # Defaults |
1262 | | /// |
1263 | | /// ```notest |
1264 | | /// bitcoin, ftp, ftps, geo, http, https, im, irc, |
1265 | | /// ircs, magnet, mailto, mms, mx, news, nntp, |
1266 | | /// openpgp4fpr, sip, sms, smsto, ssh, tel, url, |
1267 | | /// webcal, wtai, xmpp |
1268 | | /// ``` |
1269 | 0 | pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1270 | 0 | self.url_schemes = value; |
1271 | 0 | self |
1272 | 0 | } |
1273 | | |
1274 | | /// Add additonal whitelisted URL schemes without overwriting old ones. |
1275 | | /// |
1276 | | /// # Examples |
1277 | | /// |
1278 | | /// let a = ammonia::Builder::default() |
1279 | | /// .add_url_schemes(&["my-scheme"]) |
1280 | | /// .clean("<a href=my-scheme:home>mess</span>").to_string(); |
1281 | | /// assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a); |
1282 | 0 | pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>( |
1283 | 0 | &mut self, |
1284 | 0 | it: I, |
1285 | 0 | ) -> &mut Self { |
1286 | 0 | self.url_schemes.extend(it.into_iter().map(Borrow::borrow)); |
1287 | 0 | self |
1288 | 0 | } |
1289 | | |
1290 | | /// Remove already-whitelisted attributes. |
1291 | | /// |
1292 | | /// Does nothing if the attribute is already gone. |
1293 | | /// |
1294 | | /// # Examples |
1295 | | /// |
1296 | | /// let a = ammonia::Builder::default() |
1297 | | /// .rm_url_schemes(&["ftp"]) |
1298 | | /// .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string(); |
1299 | | /// assert_eq!("<a rel=\"noopener noreferrer\"></a>", a); |
1300 | 0 | pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>( |
1301 | 0 | &mut self, |
1302 | 0 | it: I, |
1303 | 0 | ) -> &mut Self { |
1304 | 0 | for i in it { |
1305 | 0 | self.url_schemes.remove(i.borrow()); |
1306 | 0 | } |
1307 | 0 | self |
1308 | 0 | } |
1309 | | |
1310 | | /// Returns a copy of the set of whitelisted URL schemes. |
1311 | | /// |
1312 | | /// # Examples |
1313 | | /// use maplit::hashset; |
1314 | | /// |
1315 | | /// let url_schemes = hashset!["my-scheme-1", "my-scheme-2"]; |
1316 | | /// |
1317 | | /// let mut b = ammonia::Builder::default(); |
1318 | | /// b.url_schemes(Clone::clone(&url_schemes)); |
1319 | | /// assert_eq!(url_schemes, b.clone_url_schemes()); |
1320 | 0 | pub fn clone_url_schemes(&self) -> HashSet<&'a str> { |
1321 | 0 | self.url_schemes.clone() |
1322 | 0 | } |
1323 | | |
1324 | | /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny. |
1325 | | /// |
1326 | | /// # Examples |
1327 | | /// |
1328 | | /// use ammonia::{Builder, UrlRelative}; |
1329 | | /// |
1330 | | /// let a = Builder::new().url_relative(UrlRelative::PassThrough) |
1331 | | /// .clean("<a href=/>Home</a>") |
1332 | | /// .to_string(); |
1333 | | /// |
1334 | | /// // See `link_rel` for information on the rel="noopener noreferrer" attribute |
1335 | | /// // in the cleaned HTML. |
1336 | | /// assert_eq!( |
1337 | | /// a, |
1338 | | /// "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>"); |
1339 | | /// |
1340 | | /// # Defaults |
1341 | | /// |
1342 | | /// ```notest |
1343 | | /// UrlRelative::PassThrough |
1344 | | /// ``` |
1345 | 0 | pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self { |
1346 | 0 | self.url_relative = value; |
1347 | 0 | self |
1348 | 0 | } |
1349 | | |
1350 | | /// Allows rewriting of all attributes using a callback. |
1351 | | /// |
1352 | | /// The callback takes name of the element, attribute and its value. |
1353 | | /// Returns `None` to remove the attribute, or a value to use. |
1354 | | /// |
1355 | | /// Rewriting of attributes with URLs is done before `url_relative()`. |
1356 | | /// |
1357 | | /// # Panics |
1358 | | /// |
1359 | | /// If more than one callback is set. |
1360 | | /// |
1361 | | /// # Examples |
1362 | | /// |
1363 | | /// ```rust |
1364 | | /// use ammonia::Builder; |
1365 | | /// let a = Builder::new() |
1366 | | /// .attribute_filter(|element, attribute, value| { |
1367 | | /// match (element, attribute) { |
1368 | | /// ("img", "src") => None, |
1369 | | /// _ => Some(value.into()) |
1370 | | /// } |
1371 | | /// }) |
1372 | | /// .link_rel(None) |
1373 | | /// .clean("<a href=/><img alt=Home src=foo></a>") |
1374 | | /// .to_string(); |
1375 | | /// assert_eq!(a, |
1376 | | /// r#"<a href="/"><img alt="Home"></a>"#); |
1377 | | /// ``` |
1378 | 0 | pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self |
1379 | 0 | where |
1380 | 0 | CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static, |
1381 | | { |
1382 | 0 | assert!( |
1383 | 0 | self.attribute_filter.is_none(), |
1384 | | "attribute_filter can be set only once" |
1385 | | ); |
1386 | 0 | self.attribute_filter = Some(Box::new(callback)); |
1387 | 0 | self |
1388 | 0 | } |
1389 | | |
1390 | | /// Returns `true` if the relative URL resolver is set to `Deny`. |
1391 | | /// |
1392 | | /// # Examples |
1393 | | /// |
1394 | | /// use ammonia::{Builder, UrlRelative}; |
1395 | | /// let mut a = Builder::default(); |
1396 | | /// a.url_relative(UrlRelative::Deny); |
1397 | | /// assert!(a.is_url_relative_deny()); |
1398 | | /// a.url_relative(UrlRelative::PassThrough); |
1399 | | /// assert!(!a.is_url_relative_deny()); |
1400 | 0 | pub fn is_url_relative_deny(&self) -> bool { |
1401 | 0 | matches!(self.url_relative, UrlRelative::Deny) |
1402 | 0 | } |
1403 | | |
1404 | | /// Returns `true` if the relative URL resolver is set to `PassThrough`. |
1405 | | /// |
1406 | | /// # Examples |
1407 | | /// |
1408 | | /// use ammonia::{Builder, UrlRelative}; |
1409 | | /// let mut a = Builder::default(); |
1410 | | /// a.url_relative(UrlRelative::Deny); |
1411 | | /// assert!(!a.is_url_relative_pass_through()); |
1412 | | /// a.url_relative(UrlRelative::PassThrough); |
1413 | | /// assert!(a.is_url_relative_pass_through()); |
1414 | 0 | pub fn is_url_relative_pass_through(&self) -> bool { |
1415 | 0 | matches!(self.url_relative, UrlRelative::PassThrough) |
1416 | 0 | } |
1417 | | |
1418 | | /// Returns `true` if the relative URL resolver is set to `Custom`. |
1419 | | /// |
1420 | | /// # Examples |
1421 | | /// |
1422 | | /// use ammonia::{Builder, UrlRelative}; |
1423 | | /// use std::borrow::Cow; |
1424 | | /// fn test(a: &str) -> Option<Cow<str>> { None } |
1425 | | /// # fn main() { |
1426 | | /// let mut a = Builder::default(); |
1427 | | /// a.url_relative(UrlRelative::Custom(Box::new(test))); |
1428 | | /// assert!(a.is_url_relative_custom()); |
1429 | | /// a.url_relative(UrlRelative::PassThrough); |
1430 | | /// assert!(!a.is_url_relative_custom()); |
1431 | | /// a.url_relative(UrlRelative::Deny); |
1432 | | /// assert!(!a.is_url_relative_custom()); |
1433 | | /// # } |
1434 | 0 | pub fn is_url_relative_custom(&self) -> bool { |
1435 | 0 | matches!(self.url_relative, UrlRelative::Custom(_)) |
1436 | 0 | } |
1437 | | |
1438 | | /// Configures a `rel` attribute that will be added on links. |
1439 | | /// |
1440 | | /// If `rel` is in the generic or tag attributes, this must be set to `None`. |
1441 | | /// Common `rel` values to include: |
1442 | | /// |
1443 | | /// * `noopener`: This prevents [a particular type of XSS attack], |
1444 | | /// and should usually be turned on for untrusted HTML. |
1445 | | /// * `noreferrer`: This prevents the browser from [sending the source URL] |
1446 | | /// to the website that is linked to. |
1447 | | /// * `nofollow`: This prevents search engines from [using this link for |
1448 | | /// ranking], which disincentivizes spammers. |
1449 | | /// |
1450 | | /// To turn on rel-insertion, call this function with a space-separated list. |
1451 | | /// Ammonia does not parse rel-attributes; |
1452 | | /// it just puts the given string into the attribute directly. |
1453 | | /// |
1454 | | /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/ |
1455 | | /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer |
1456 | | /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow |
1457 | | /// |
1458 | | /// # Examples |
1459 | | /// |
1460 | | /// use ammonia::Builder; |
1461 | | /// |
1462 | | /// let a = Builder::new().link_rel(None) |
1463 | | /// .clean("<a href=https://rust-lang.org/>Rust</a>") |
1464 | | /// .to_string(); |
1465 | | /// assert_eq!( |
1466 | | /// a, |
1467 | | /// "<a href=\"https://rust-lang.org/\">Rust</a>"); |
1468 | | /// |
1469 | | /// # Defaults |
1470 | | /// |
1471 | | /// ```notest |
1472 | | /// Some("noopener noreferrer") |
1473 | | /// ``` |
1474 | 0 | pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self { |
1475 | 0 | self.link_rel = value; |
1476 | 0 | self |
1477 | 0 | } |
1478 | | |
1479 | | /// Returns the settings for links' `rel` attribute, if one is set. |
1480 | | /// |
1481 | | /// # Examples |
1482 | | /// |
1483 | | /// use ammonia::{Builder, UrlRelative}; |
1484 | | /// let mut a = Builder::default(); |
1485 | | /// a.link_rel(Some("a b")); |
1486 | | /// assert_eq!(a.get_link_rel(), Some("a b")); |
1487 | 0 | pub fn get_link_rel(&self) -> Option<&str> { |
1488 | 0 | self.link_rel |
1489 | 0 | } |
1490 | | |
1491 | | /// Sets the CSS classes that are allowed on specific tags. |
1492 | | /// |
1493 | | /// The values is structured as a map from tag names to a set of class names. |
1494 | | /// |
1495 | | /// If the `class` attribute is itself whitelisted for a tag, then adding entries to |
1496 | | /// this map will cause a panic. |
1497 | | /// |
1498 | | /// # Examples |
1499 | | /// |
1500 | | /// use ammonia::Builder; |
1501 | | /// use maplit::{hashmap, hashset}; |
1502 | | /// |
1503 | | /// # fn main() { |
1504 | | /// let allowed_classes = hashmap![ |
1505 | | /// "code" => hashset!["rs", "ex", "c", "cxx", "js"] |
1506 | | /// ]; |
1507 | | /// let a = Builder::new() |
1508 | | /// .allowed_classes(allowed_classes) |
1509 | | /// .clean("<code class=rs>fn main() {}</code>") |
1510 | | /// .to_string(); |
1511 | | /// assert_eq!( |
1512 | | /// a, |
1513 | | /// "<code class=\"rs\">fn main() {}</code>"); |
1514 | | /// # } |
1515 | | /// |
1516 | | /// # Defaults |
1517 | | /// |
1518 | | /// The set of allowed classes is empty by default. |
1519 | 0 | pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self { |
1520 | 0 | self.allowed_classes = value; |
1521 | 0 | self |
1522 | 0 | } |
1523 | | |
1524 | | /// Add additonal whitelisted classes without overwriting old ones. |
1525 | | /// |
1526 | | /// # Examples |
1527 | | /// |
1528 | | /// let a = ammonia::Builder::default() |
1529 | | /// .add_allowed_classes("a", &["onebox"]) |
1530 | | /// .clean("<a href=/ class=onebox>mess</span>").to_string(); |
1531 | | /// assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a); |
1532 | 0 | pub fn add_allowed_classes< |
1533 | 0 | T: 'a + ?Sized + Borrow<str>, |
1534 | 0 | U: 'a + ?Sized + Borrow<str>, |
1535 | 0 | I: IntoIter<Item = &'a T>, |
1536 | 0 | >( |
1537 | 0 | &mut self, |
1538 | 0 | tag: &'a U, |
1539 | 0 | it: I, |
1540 | 0 | ) -> &mut Self { |
1541 | 0 | self.allowed_classes |
1542 | 0 | .entry(tag.borrow()) |
1543 | 0 | .or_default() |
1544 | 0 | .extend(it.into_iter().map(Borrow::borrow)); |
1545 | 0 | self |
1546 | 0 | } |
1547 | | |
1548 | | /// Remove already-whitelisted attributes. |
1549 | | /// |
1550 | | /// Does nothing if the attribute is already gone. |
1551 | | /// |
1552 | | /// # Examples |
1553 | | /// |
1554 | | /// let a = ammonia::Builder::default() |
1555 | | /// .add_allowed_classes("span", &["active"]) |
1556 | | /// .rm_allowed_classes("span", &["active"]) |
1557 | | /// .clean("<span class=active>").to_string(); |
1558 | | /// assert_eq!("<span class=\"\"></span>", a); |
1559 | 0 | pub fn rm_allowed_classes< |
1560 | 0 | 'b, |
1561 | 0 | 'c, |
1562 | 0 | T: 'b + ?Sized + Borrow<str>, |
1563 | 0 | U: 'c + ?Sized + Borrow<str>, |
1564 | 0 | I: IntoIter<Item = &'b T>, |
1565 | 0 | >( |
1566 | 0 | &mut self, |
1567 | 0 | tag: &'c U, |
1568 | 0 | it: I, |
1569 | 0 | ) -> &mut Self { |
1570 | 0 | if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) { |
1571 | 0 | for i in it { |
1572 | 0 | tag.remove(i.borrow()); |
1573 | 0 | } |
1574 | 0 | } |
1575 | 0 | self |
1576 | 0 | } |
1577 | | |
1578 | | /// Returns a copy of the set of whitelisted class attributes. |
1579 | | /// |
1580 | | /// # Examples |
1581 | | /// |
1582 | | /// use maplit::{hashmap, hashset}; |
1583 | | /// |
1584 | | /// let allowed_classes = hashmap![ |
1585 | | /// "my-tag" => hashset!["my-class-1", "my-class-2"] |
1586 | | /// ]; |
1587 | | /// |
1588 | | /// let mut b = ammonia::Builder::default(); |
1589 | | /// b.allowed_classes(Clone::clone(&allowed_classes)); |
1590 | | /// assert_eq!(allowed_classes, b.clone_allowed_classes()); |
1591 | 0 | pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> { |
1592 | 0 | self.allowed_classes.clone() |
1593 | 0 | } |
1594 | | |
1595 | | /// Configures the handling of HTML comments. |
1596 | | /// |
1597 | | /// If this option is false, comments will be preserved. |
1598 | | /// |
1599 | | /// # Examples |
1600 | | /// |
1601 | | /// use ammonia::Builder; |
1602 | | /// |
1603 | | /// let a = Builder::new().strip_comments(false) |
1604 | | /// .clean("<!-- yes -->") |
1605 | | /// .to_string(); |
1606 | | /// assert_eq!( |
1607 | | /// a, |
1608 | | /// "<!-- yes -->"); |
1609 | | /// |
1610 | | /// # Defaults |
1611 | | /// |
1612 | | /// `true` |
1613 | 0 | pub fn strip_comments(&mut self, value: bool) -> &mut Self { |
1614 | 0 | self.strip_comments = value; |
1615 | 0 | self |
1616 | 0 | } |
1617 | | |
1618 | | /// Returns `true` if comment stripping is turned on. |
1619 | | /// |
1620 | | /// # Examples |
1621 | | /// |
1622 | | /// let mut a = ammonia::Builder::new(); |
1623 | | /// a.strip_comments(true); |
1624 | | /// assert!(a.will_strip_comments()); |
1625 | | /// a.strip_comments(false); |
1626 | | /// assert!(!a.will_strip_comments()); |
1627 | 0 | pub fn will_strip_comments(&self) -> bool { |
1628 | 0 | self.strip_comments |
1629 | 0 | } |
1630 | | |
1631 | | /// Prefixes all "id" attribute values with a given string. Note that the tag and |
1632 | | /// attribute themselves must still be whitelisted. |
1633 | | /// |
1634 | | /// # Examples |
1635 | | /// |
1636 | | /// use ammonia::Builder; |
1637 | | /// use maplit::hashset; |
1638 | | /// |
1639 | | /// # fn main() { |
1640 | | /// let attributes = hashset!["id"]; |
1641 | | /// let a = Builder::new() |
1642 | | /// .generic_attributes(attributes) |
1643 | | /// .id_prefix(Some("safe-")) |
1644 | | /// .clean("<b id=42>") |
1645 | | /// .to_string(); |
1646 | | /// assert_eq!(a, "<b id=\"safe-42\"></b>"); |
1647 | | /// # } |
1648 | | |
1649 | | /// |
1650 | | /// # Defaults |
1651 | | /// |
1652 | | /// `None` |
1653 | 0 | pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self { |
1654 | 0 | self.id_prefix = value; |
1655 | 0 | self |
1656 | 0 | } |
1657 | | |
1658 | | /// Only allows the specified properties in `style` attributes. |
1659 | | /// |
1660 | | /// Irrelevant if `style` is not an allowed attribute. |
1661 | | /// |
1662 | | /// Note that if style filtering is enabled style properties will be normalised e.g. |
1663 | | /// invalid declarations and @rules will be removed, with only syntactically valid |
1664 | | /// declarations kept. |
1665 | | /// |
1666 | | /// # Examples |
1667 | | /// |
1668 | | /// use ammonia::Builder; |
1669 | | /// use maplit::hashset; |
1670 | | /// |
1671 | | /// # fn main() { |
1672 | | /// let attributes = hashset!["style"]; |
1673 | | /// let properties = hashset!["color"]; |
1674 | | /// let a = Builder::new() |
1675 | | /// .generic_attributes(attributes) |
1676 | | /// .filter_style_properties(properties) |
1677 | | /// .clean("<p style=\"font-weight: heavy; color: red\">my html</p>") |
1678 | | /// .to_string(); |
1679 | | /// assert_eq!(a, "<p style=\"color:red\">my html</p>"); |
1680 | | /// # } |
1681 | 0 | pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self { |
1682 | 0 | self.style_properties = Some(value); |
1683 | 0 | self |
1684 | 0 | } |
1685 | | |
1686 | | /// Constructs a [`Builder`] instance configured with the [default options]. |
1687 | | /// |
1688 | | /// # Examples |
1689 | | /// |
1690 | | /// use ammonia::{Builder, Url, UrlRelative}; |
1691 | | /// # use std::error::Error; |
1692 | | /// |
1693 | | /// # fn do_main() -> Result<(), Box<dyn Error>> { |
1694 | | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
1695 | | /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
1696 | | /// |
1697 | | /// let result = Builder::new() // <-- |
1698 | | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1699 | | /// .clean(input) |
1700 | | /// .to_string(); |
1701 | | /// assert_eq!(result, output); |
1702 | | /// # Ok(()) |
1703 | | /// # } |
1704 | | /// # fn main() { do_main().unwrap() } |
1705 | | /// |
1706 | | /// [default options]: fn.clean.html |
1707 | | /// [`Builder`]: struct.Builder.html |
1708 | 0 | pub fn new() -> Self { |
1709 | 0 | Self::default() |
1710 | 0 | } |
1711 | | |
1712 | | /// Constructs a [`Builder`] instance configured with no allowed tags. |
1713 | | /// |
1714 | | /// # Examples |
1715 | | /// |
1716 | | /// use ammonia::{Builder, Url, UrlRelative}; |
1717 | | /// # use std::error::Error; |
1718 | | /// |
1719 | | /// # fn do_main() -> Result<(), Box<dyn Error>> { |
1720 | | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>."; |
1721 | | /// let output = "This is an Ammonia example using the empty() function."; |
1722 | | /// |
1723 | | /// let result = Builder::empty() // <-- |
1724 | | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1725 | | /// .clean(input) |
1726 | | /// .to_string(); |
1727 | | /// assert_eq!(result, output); |
1728 | | /// # Ok(()) |
1729 | | /// # } |
1730 | | /// # fn main() { do_main().unwrap() } |
1731 | | /// |
1732 | | /// [default options]: fn.clean.html |
1733 | | /// [`Builder`]: struct.Builder.html |
1734 | 0 | pub fn empty() -> Self { |
1735 | 0 | Self { |
1736 | 0 | tags: hashset![], |
1737 | 0 | ..Self::default() |
1738 | 0 | } |
1739 | 0 | } |
1740 | | |
1741 | | /// Sanitizes an HTML fragment in a string according to the configured options. |
1742 | | /// |
1743 | | /// # Examples |
1744 | | /// |
1745 | | /// use ammonia::{Builder, Url, UrlRelative}; |
1746 | | /// # use std::error::Error; |
1747 | | /// |
1748 | | /// # fn do_main() -> Result<(), Box<dyn Error>> { |
1749 | | /// let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>."; |
1750 | | /// let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>."; |
1751 | | /// |
1752 | | /// let result = Builder::new() |
1753 | | /// .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?)) |
1754 | | /// .clean(input) |
1755 | | /// .to_string(); // <-- |
1756 | | /// assert_eq!(result, output); |
1757 | | /// # Ok(()) |
1758 | | /// # } |
1759 | | /// # fn main() { do_main().unwrap() } |
1760 | 0 | pub fn clean(&self, src: &str) -> Document { |
1761 | 0 | let parser = Self::make_parser(); |
1762 | 0 | let dom = parser.one(src); |
1763 | 0 | self.clean_dom(dom) |
1764 | 0 | } |
1765 | | |
1766 | | /// Sanitizes an HTML fragment from a reader according to the configured options. |
1767 | | /// |
1768 | | /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just |
1769 | | /// like when using [`String::from_utf8_lossy`]. |
1770 | | /// |
1771 | | /// To avoid consuming the reader, a mutable reference can be passed to this method. |
1772 | | /// |
1773 | | /// # Examples |
1774 | | /// |
1775 | | /// use ammonia::Builder; |
1776 | | /// # use std::error::Error; |
1777 | | /// |
1778 | | /// # fn do_main() -> Result<(), Box<dyn Error>> { |
1779 | | /// let a = Builder::new() |
1780 | | /// .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b` |
1781 | | /// .to_string(); |
1782 | | /// assert_eq!(a, ""); |
1783 | | /// # Ok(()) } |
1784 | | /// # fn main() { do_main().unwrap() } |
1785 | | /// |
1786 | | /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy |
1787 | 0 | pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document> |
1788 | 0 | where |
1789 | 0 | R: io::Read, |
1790 | | { |
1791 | 0 | let parser = Self::make_parser().from_utf8(); |
1792 | 0 | let dom = parser.read_from(&mut src)?; |
1793 | 0 | Ok(self.clean_dom(dom)) |
1794 | 0 | } |
1795 | | |
1796 | | /// Clean a post-parsing DOM. |
1797 | | /// |
1798 | | /// This is not a public API because RcDom isn't really stable. |
1799 | | /// We want to be able to take breaking changes to html5ever itself |
1800 | | /// without having to break Ammonia's API. |
1801 | 0 | fn clean_dom(&self, dom: RcDom) -> Document { |
1802 | 0 | let mut stack = Vec::new(); |
1803 | 0 | let mut removed = Vec::new(); |
1804 | 0 | let link_rel = self |
1805 | 0 | .link_rel |
1806 | 0 | .map(|link_rel| format_tendril!("{}", link_rel)); |
1807 | 0 | if link_rel.is_some() { |
1808 | 0 | assert!(self.generic_attributes.get("rel").is_none()); |
1809 | 0 | assert!(self |
1810 | 0 | .tag_attributes |
1811 | 0 | .get("a") |
1812 | 0 | .and_then(|a| a.get("rel")) |
1813 | 0 | .is_none()); |
1814 | 0 | } |
1815 | 0 | assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class")); |
1816 | 0 | for tag_name in self.allowed_classes.keys() { |
1817 | 0 | assert!(self |
1818 | 0 | .tag_attributes |
1819 | 0 | .get(tag_name) |
1820 | 0 | .and_then(|a| a.get("class")) |
1821 | 0 | .is_none()); |
1822 | | } |
1823 | 0 | for tag_name in &self.clean_content_tags { |
1824 | 0 | assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time"); |
1825 | 0 | assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time"); |
1826 | | } |
1827 | 0 | let body = { |
1828 | 0 | let children = dom.document.children.borrow(); |
1829 | 0 | children[0].clone() |
1830 | | }; |
1831 | 0 | stack.extend( |
1832 | 0 | mem::take(&mut *body.children.borrow_mut()) |
1833 | 0 | .into_iter() |
1834 | 0 | .rev(), |
1835 | | ); |
1836 | | // This design approach is used to prevent pathological content from producing |
1837 | | // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`, |
1838 | | // of course, contains nodes that need to be dropped (we can't just drop them, |
1839 | | // because they could have a very deep child tree). |
1840 | 0 | while let Some(mut node) = stack.pop() { |
1841 | 0 | let parent = node.parent |
1842 | 0 | .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed") |
1843 | 0 | .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped"); |
1844 | 0 | if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) { |
1845 | 0 | removed.push(node); |
1846 | 0 | continue; |
1847 | 0 | } |
1848 | 0 | let pass = self.clean_child(&mut node); |
1849 | 0 | if pass { |
1850 | 0 | self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix); |
1851 | 0 | dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone())); |
1852 | 0 | } else { |
1853 | 0 | for sub in node.children.borrow_mut().iter_mut() { |
1854 | 0 | sub.parent.replace(Some(Rc::downgrade(&parent))); |
1855 | 0 | } |
1856 | | } |
1857 | 0 | stack.extend( |
1858 | 0 | mem::take(&mut *node.children.borrow_mut()) |
1859 | 0 | .into_iter() |
1860 | 0 | .rev(), |
1861 | | ); |
1862 | 0 | if !pass { |
1863 | 0 | removed.push(node); |
1864 | 0 | } |
1865 | | } |
1866 | | // Now, imperatively clean up all of the child nodes. |
1867 | | // Otherwise, we could wind up with a DoS, either caused by a memory leak, |
1868 | | // or caused by a stack overflow. |
1869 | 0 | while let Some(node) = removed.pop() { |
1870 | 0 | removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]); |
1871 | 0 | } |
1872 | 0 | Document(dom) |
1873 | 0 | } |
1874 | | |
1875 | | /// Returns `true` if a node and all its content should be removed. |
1876 | 0 | fn clean_node_content(&self, node: &Handle) -> bool { |
1877 | 0 | match node.data { |
1878 | | NodeData::Text { .. } |
1879 | | | NodeData::Comment { .. } |
1880 | | | NodeData::Doctype { .. } |
1881 | | | NodeData::Document |
1882 | 0 | | NodeData::ProcessingInstruction { .. } => false, |
1883 | 0 | NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local), |
1884 | | } |
1885 | 0 | } |
1886 | | |
1887 | | /// Remove unwanted attributes, and check if the node should be kept or not. |
1888 | | /// |
1889 | | /// The root node doesn't need cleaning because we create the root node ourselves, |
1890 | | /// and it doesn't get serialized, and ... it just exists to give the parser |
1891 | | /// a context (in this case, a div-like block context). |
1892 | 0 | fn clean_child(&self, child: &mut Handle) -> bool { |
1893 | 0 | match child.data { |
1894 | 0 | NodeData::Text { .. } => true, |
1895 | 0 | NodeData::Comment { .. } => !self.strip_comments, |
1896 | | NodeData::Doctype { .. } |
1897 | | | NodeData::Document |
1898 | 0 | | NodeData::ProcessingInstruction { .. } => false, |
1899 | | NodeData::Element { |
1900 | 0 | ref name, |
1901 | 0 | ref attrs, |
1902 | | .. |
1903 | | } => { |
1904 | 0 | if self.tags.contains(&*name.local) { |
1905 | 0 | let attr_filter = |attr: &html5ever::Attribute| { |
1906 | 0 | let whitelisted = self.generic_attributes.contains(&*attr.name.local) |
1907 | 0 | || self.generic_attribute_prefixes.as_ref().map(|prefixes| { |
1908 | 0 | prefixes.iter().any(|&p| attr.name.local.starts_with(p)) |
1909 | 0 | }) == Some(true) |
1910 | 0 | || self |
1911 | 0 | .tag_attributes |
1912 | 0 | .get(&*name.local) |
1913 | 0 | .map(|ta| ta.contains(&*attr.name.local)) |
1914 | 0 | == Some(true) |
1915 | 0 | || self |
1916 | 0 | .tag_attribute_values |
1917 | 0 | .get(&*name.local) |
1918 | 0 | .and_then(|tav| tav.get(&*attr.name.local)) |
1919 | 0 | .map(|vs| { |
1920 | 0 | let attr_val = attr.value.to_lowercase(); |
1921 | 0 | vs.iter().any(|v| v.to_lowercase() == attr_val) |
1922 | 0 | }) |
1923 | 0 | == Some(true); |
1924 | 0 | if !whitelisted { |
1925 | | // If the class attribute is not whitelisted, |
1926 | | // but there is a whitelisted set of allowed_classes, |
1927 | | // do not strip out the class attribute. |
1928 | | // Banned classes will be filtered later. |
1929 | 0 | &*attr.name.local == "class" |
1930 | 0 | && self.allowed_classes.contains_key(&*name.local) |
1931 | 0 | } else if is_url_attr(&name.local, &attr.name.local) { |
1932 | 0 | let url = Url::parse(&attr.value); |
1933 | 0 | if let Ok(url) = url { |
1934 | 0 | self.url_schemes.contains(url.scheme()) |
1935 | 0 | } else if url == Err(url::ParseError::RelativeUrlWithoutBase) { |
1936 | 0 | !matches!(self.url_relative, UrlRelative::Deny) |
1937 | | } else { |
1938 | 0 | false |
1939 | | } |
1940 | | } else { |
1941 | 0 | true |
1942 | | } |
1943 | 0 | }; |
1944 | 0 | attrs.borrow_mut().retain(attr_filter); |
1945 | 0 | true |
1946 | | } else { |
1947 | 0 | false |
1948 | | } |
1949 | | } |
1950 | | } |
1951 | 0 | } |
1952 | | |
1953 | | // Check for unexpected namespace changes. |
1954 | | // |
1955 | | // The issue happens if developers added to the list of allowed tags any |
1956 | | // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state, |
1957 | | // that is: |
1958 | | // |
1959 | | // * title |
1960 | | // * textarea |
1961 | | // * xmp |
1962 | | // * iframe |
1963 | | // * noembed |
1964 | | // * noframes |
1965 | | // * plaintext |
1966 | | // * noscript |
1967 | | // * style |
1968 | | // * script |
1969 | | // |
1970 | | // An example in the wild is Plume, that allows iframe [1]. So in next |
1971 | | // examples I'll assume the following policy: |
1972 | | // |
1973 | | // Builder::new() |
1974 | | // .add_tags(&["iframe"]) |
1975 | | // |
1976 | | // In HTML namespace `<iframe>` is parsed specially; that is, its content is |
1977 | | // treated as text. For instance, the following html: |
1978 | | // |
1979 | | // <iframe><a>test |
1980 | | // |
1981 | | // Is parsed into the following DOM tree: |
1982 | | // |
1983 | | // iframe |
1984 | | // └─ #text: <a>test |
1985 | | // |
1986 | | // So iframe cannot have any children other than a text node. |
1987 | | // |
1988 | | // The same is not true, though, in "foreign content"; that is, within |
1989 | | // <svg> or <math> tags. The following html: |
1990 | | // |
1991 | | // <svg><iframe><a>test |
1992 | | // |
1993 | | // is parsed differently: |
1994 | | // |
1995 | | // svg |
1996 | | // └─ iframe |
1997 | | // └─ a |
1998 | | // └─ #text: test |
1999 | | // |
2000 | | // So in SVG namespace iframe can have children. |
2001 | | // |
2002 | | // Ammonia disallows <svg> but it keeps its content after deleting it. And |
2003 | | // the parser internally keeps track of the namespace of the element. So |
2004 | | // assume we have the following snippet: |
2005 | | // |
2006 | | // <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test |
2007 | | // |
2008 | | // It is parsed into: |
2009 | | // |
2010 | | // svg |
2011 | | // └─ iframe |
2012 | | // └─ a title="</iframe><img src onerror=alert(1)>" |
2013 | | // └─ #text: test |
2014 | | // |
2015 | | // This DOM tree is harmless from ammonia point of view because the piece |
2016 | | // of code that looks like XSS is in a title attribute. Hence, the |
2017 | | // resulting "safe" HTML from ammonia would be: |
2018 | | // |
2019 | | // <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener |
2020 | | // noreferrer">test</a></iframe> |
2021 | | // |
2022 | | // However, at this point, the information about namespace is lost, which |
2023 | | // means that the browser will parse this snippet into: |
2024 | | // |
2025 | | // ├─ iframe |
2026 | | // │ └─ #text: <a title=" |
2027 | | // ├─ img src="" onerror="alert(1)" |
2028 | | // └─ #text: " rel="noopener noreferrer">test |
2029 | | // |
2030 | | // Leading to XSS. |
2031 | | // |
2032 | | // To solve this issue, check for unexpected namespace switches after cleanup. |
2033 | | // Elements which change namespace at an unexpected point are removed. |
2034 | | // This function returns `true` if `child` should be kept, and `false` if it |
2035 | | // should be removed. |
2036 | | // |
2037 | | // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21 |
2038 | 0 | fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool { |
2039 | 0 | let (parent, child) = match (&parent.data, &child.data) { |
2040 | 0 | (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn), |
2041 | 0 | _ => return true, |
2042 | | }; |
2043 | | // The only way to switch from html to svg is with the <svg> tag |
2044 | 0 | if parent.ns == ns!(html) && child.ns == ns!(svg) { |
2045 | 0 | child.local == local_name!("svg") |
2046 | | // The only way to switch from html to mathml is with the <math> tag |
2047 | 0 | } else if parent.ns == ns!(html) && child.ns == ns!(mathml) { |
2048 | 0 | child.local == local_name!("math") |
2049 | | // The only way to switch from mathml to svg/html is with a text integration point |
2050 | 0 | } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) { |
2051 | | // https://html.spec.whatwg.org/#mathml |
2052 | 0 | matches!( |
2053 | 0 | &*parent.local, |
2054 | 0 | "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml" |
2055 | 0 | ) && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true } |
2056 | | // The only way to switch from svg to mathml/html is with an html integration point |
2057 | 0 | } else if parent.ns == ns!(svg) && child.ns != ns!(svg) { |
2058 | | // https://html.spec.whatwg.org/#svg-0 |
2059 | 0 | matches!(&*parent.local, "foreignObject") |
2060 | 0 | && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true } |
2061 | 0 | } else if child.ns == ns!(svg) { |
2062 | 0 | is_svg_tag(&child.local) |
2063 | 0 | } else if child.ns == ns!(mathml) { |
2064 | 0 | is_mathml_tag(&child.local) |
2065 | 0 | } else if child.ns == ns!(html) { |
2066 | 0 | is_html_tag(&child.local) |
2067 | | } else { |
2068 | | // There are no other supported ways to switch namespace |
2069 | 0 | parent.ns == child.ns |
2070 | | } |
2071 | 0 | } |
2072 | | |
2073 | | /// Add and transform special-cased attributes and elements. |
2074 | | /// |
2075 | | /// This function handles: |
2076 | | /// |
2077 | | /// * relative URL rewriting |
2078 | | /// * adding `<a rel>` attributes |
2079 | | /// * filtering out banned style properties |
2080 | | /// * filtering out banned classes |
2081 | 0 | fn adjust_node_attributes( |
2082 | 0 | &self, |
2083 | 0 | child: &mut Handle, |
2084 | 0 | link_rel: &Option<StrTendril>, |
2085 | 0 | id_prefix: Option<&'a str>, |
2086 | 0 | ) { |
2087 | | if let NodeData::Element { |
2088 | 0 | ref name, |
2089 | 0 | ref attrs, |
2090 | | .. |
2091 | 0 | } = child.data |
2092 | | { |
2093 | 0 | if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) { |
2094 | 0 | let mut attrs = attrs.borrow_mut(); |
2095 | 0 | for (&set_name, &set_value) in set_attrs { |
2096 | | // set the value of the attribute if the attribute is already present |
2097 | 0 | if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name) |
2098 | | { |
2099 | 0 | if &*attr.value != set_value { |
2100 | 0 | attr.value = set_value.into(); |
2101 | 0 | } |
2102 | 0 | } else { |
2103 | 0 | // otherwise, add the attribute |
2104 | 0 | let attr = Attribute { |
2105 | 0 | name: QualName::new(None, ns!(), set_name.into()), |
2106 | 0 | value: set_value.into(), |
2107 | 0 | }; |
2108 | 0 | attrs.push(attr); |
2109 | 0 | } |
2110 | | } |
2111 | 0 | } |
2112 | 0 | if let Some(ref link_rel) = *link_rel { |
2113 | 0 | if &*name.local == "a" { |
2114 | 0 | attrs.borrow_mut().push(Attribute { |
2115 | 0 | name: QualName::new(None, ns!(), local_name!("rel")), |
2116 | 0 | value: link_rel.clone(), |
2117 | 0 | }) |
2118 | 0 | } |
2119 | 0 | } |
2120 | 0 | if let Some(ref id_prefix) = id_prefix { |
2121 | 0 | for attr in &mut *attrs.borrow_mut() { |
2122 | 0 | if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) { |
2123 | 0 | attr.value = format_tendril!("{}{}", id_prefix, attr.value); |
2124 | 0 | } |
2125 | | } |
2126 | 0 | } |
2127 | 0 | if let Some(ref attr_filter) = self.attribute_filter { |
2128 | 0 | let mut drop_attrs = Vec::new(); |
2129 | 0 | let mut attrs = attrs.borrow_mut(); |
2130 | 0 | for (i, attr) in &mut attrs.iter_mut().enumerate() { |
2131 | 0 | let replace_with = if let Some(new) = |
2132 | 0 | attr_filter.filter(&name.local, &attr.name.local, &attr.value) |
2133 | | { |
2134 | 0 | if *new != *attr.value { |
2135 | 0 | Some(format_tendril!("{}", new)) |
2136 | | } else { |
2137 | 0 | None // no need to replace the attr if filter returned the same value |
2138 | | } |
2139 | | } else { |
2140 | 0 | drop_attrs.push(i); |
2141 | 0 | None |
2142 | | }; |
2143 | 0 | if let Some(replace_with) = replace_with { |
2144 | 0 | attr.value = replace_with; |
2145 | 0 | } |
2146 | | } |
2147 | 0 | for i in drop_attrs.into_iter().rev() { |
2148 | 0 | attrs.swap_remove(i); |
2149 | 0 | } |
2150 | 0 | } |
2151 | | { |
2152 | 0 | let mut drop_attrs = Vec::new(); |
2153 | 0 | let mut attrs = attrs.borrow_mut(); |
2154 | 0 | for (i, attr) in attrs.iter_mut().enumerate() { |
2155 | 0 | if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) { |
2156 | 0 | let new_value = self.url_relative.evaluate(&attr.value); |
2157 | 0 | if let Some(new_value) = new_value { |
2158 | 0 | attr.value = new_value; |
2159 | 0 | } else { |
2160 | 0 | drop_attrs.push(i); |
2161 | 0 | } |
2162 | 0 | } |
2163 | | } |
2164 | | // Swap remove scrambles the vector after the current point. |
2165 | | // We will not do anything except with items before the current point. |
2166 | | // The `rev()` is, as such, necessary for correctness. |
2167 | | // We could use regular `remove(usize)` and a forward iterator, |
2168 | | // but that's slower. |
2169 | 0 | for i in drop_attrs.into_iter().rev() { |
2170 | 0 | attrs.swap_remove(i); |
2171 | 0 | } |
2172 | | } |
2173 | 0 | if let Some(allowed_values) = &self.style_properties { |
2174 | 0 | for attr in &mut *attrs.borrow_mut() { |
2175 | 0 | if &attr.name.local == "style" { |
2176 | 0 | attr.value = style::filter_style_attribute(&attr.value, allowed_values).into(); |
2177 | 0 | } |
2178 | | } |
2179 | 0 | } |
2180 | 0 | if let Some(allowed_values) = self.allowed_classes.get(&*name.local) { |
2181 | 0 | for attr in &mut *attrs.borrow_mut() { |
2182 | 0 | if &attr.name.local == "class" { |
2183 | 0 | let mut classes = vec![]; |
2184 | | // https://html.spec.whatwg.org/#global-attributes:classes-2 |
2185 | 0 | for class in attr.value.split_ascii_whitespace() { |
2186 | 0 | if allowed_values.contains(class) { |
2187 | 0 | classes.push(class.to_owned()); |
2188 | 0 | } |
2189 | | } |
2190 | 0 | attr.value = format_tendril!("{}", classes.join(" ")); |
2191 | 0 | } |
2192 | | } |
2193 | 0 | } |
2194 | 0 | } |
2195 | 0 | } |
2196 | | |
2197 | | /// Initializes an HTML fragment parser. |
2198 | | /// |
2199 | | /// Ammonia conforms to the HTML5 fragment parsing rules, |
2200 | | /// by parsing the given fragment as if it were included in a <div> tag. |
2201 | 0 | fn make_parser() -> html::Parser<RcDom> { |
2202 | 0 | html::parse_fragment( |
2203 | 0 | RcDom::default(), |
2204 | 0 | html::ParseOpts::default(), |
2205 | 0 | QualName::new(None, ns!(html), local_name!("div")), |
2206 | 0 | vec![], |
2207 | | false, |
2208 | | ) |
2209 | 0 | } |
2210 | | } |
2211 | | |
2212 | | /// Given an element name and attribute name, determine if the given attribute contains a URL. |
2213 | 0 | fn is_url_attr(element: &str, attr: &str) -> bool { |
2214 | 0 | attr == "href" |
2215 | 0 | || attr == "src" |
2216 | 0 | || (element == "form" && attr == "action") |
2217 | 0 | || (element == "object" && attr == "data") |
2218 | 0 | || ((element == "button" || element == "input") && attr == "formaction") |
2219 | 0 | || (element == "a" && attr == "ping") |
2220 | 0 | || (element == "video" && attr == "poster") |
2221 | 0 | } |
2222 | | |
2223 | 0 | fn is_html_tag(element: &str) -> bool { |
2224 | 0 | (!is_svg_tag(element) && !is_mathml_tag(element)) |
2225 | 0 | || matches!( |
2226 | 0 | element, |
2227 | 0 | "title" | "style" | "font" | "a" | "script" | "span" |
2228 | | ) |
2229 | 0 | } |
2230 | | |
2231 | | /// Given an element name, check if it's SVG |
2232 | 0 | fn is_svg_tag(element: &str) -> bool { |
2233 | | // https://svgwg.org/svg2-draft/eltindex.html |
2234 | 0 | matches!( |
2235 | 0 | element, |
2236 | 0 | "a" | "animate" |
2237 | 0 | | "animateMotion" |
2238 | 0 | | "animateTransform" |
2239 | 0 | | "circle" |
2240 | 0 | | "clipPath" |
2241 | 0 | | "defs" |
2242 | 0 | | "desc" |
2243 | 0 | | "discard" |
2244 | 0 | | "ellipse" |
2245 | 0 | | "feBlend" |
2246 | 0 | | "feColorMatrix" |
2247 | 0 | | "feComponentTransfer" |
2248 | 0 | | "feComposite" |
2249 | 0 | | "feConvolveMatrix" |
2250 | 0 | | "feDiffuseLighting" |
2251 | 0 | | "feDisplacementMap" |
2252 | 0 | | "feDistantLight" |
2253 | 0 | | "feDropShadow" |
2254 | 0 | | "feFlood" |
2255 | 0 | | "feFuncA" |
2256 | 0 | | "feFuncB" |
2257 | 0 | | "feFuncG" |
2258 | 0 | | "feFuncR" |
2259 | 0 | | "feGaussianBlur" |
2260 | 0 | | "feImage" |
2261 | 0 | | "feMerge" |
2262 | 0 | | "feMergeNode" |
2263 | 0 | | "feMorphology" |
2264 | 0 | | "feOffset" |
2265 | 0 | | "fePointLight" |
2266 | 0 | | "feSpecularLighting" |
2267 | 0 | | "feSpotLight" |
2268 | 0 | | "feTile" |
2269 | 0 | | "feTurbulence" |
2270 | 0 | | "filter" |
2271 | 0 | | "foreignObject" |
2272 | 0 | | "g" |
2273 | 0 | | "image" |
2274 | 0 | | "line" |
2275 | 0 | | "linearGradient" |
2276 | 0 | | "marker" |
2277 | 0 | | "mask" |
2278 | 0 | | "metadata" |
2279 | 0 | | "mpath" |
2280 | 0 | | "path" |
2281 | 0 | | "pattern" |
2282 | 0 | | "polygon" |
2283 | 0 | | "polyline" |
2284 | 0 | | "radialGradient" |
2285 | 0 | | "rect" |
2286 | 0 | | "script" |
2287 | 0 | | "set" |
2288 | 0 | | "stop" |
2289 | 0 | | "style" |
2290 | 0 | | "svg" |
2291 | 0 | | "switch" |
2292 | 0 | | "symbol" |
2293 | 0 | | "text" |
2294 | 0 | | "textPath" |
2295 | 0 | | "title" |
2296 | 0 | | "tspan" |
2297 | 0 | | "use" |
2298 | 0 | | "view" |
2299 | | ) |
2300 | 0 | } |
2301 | | |
2302 | | /// Given an element name, check if it's Math |
2303 | 0 | fn is_mathml_tag(element: &str) -> bool { |
2304 | | // https://svgwg.org/svg2-draft/eltindex.html |
2305 | 0 | matches!( |
2306 | 0 | element, |
2307 | 0 | "abs" |
2308 | 0 | | "and" |
2309 | 0 | | "annotation" |
2310 | 0 | | "annotation-xml" |
2311 | 0 | | "apply" |
2312 | 0 | | "approx" |
2313 | 0 | | "arccos" |
2314 | 0 | | "arccosh" |
2315 | 0 | | "arccot" |
2316 | 0 | | "arccoth" |
2317 | 0 | | "arccsc" |
2318 | 0 | | "arccsch" |
2319 | 0 | | "arcsec" |
2320 | 0 | | "arcsech" |
2321 | 0 | | "arcsin" |
2322 | 0 | | "arcsinh" |
2323 | 0 | | "arctan" |
2324 | 0 | | "arctanh" |
2325 | 0 | | "arg" |
2326 | 0 | | "bind" |
2327 | 0 | | "bvar" |
2328 | 0 | | "card" |
2329 | 0 | | "cartesianproduct" |
2330 | 0 | | "cbytes" |
2331 | 0 | | "ceiling" |
2332 | 0 | | "cerror" |
2333 | 0 | | "ci" |
2334 | 0 | | "cn" |
2335 | 0 | | "codomain" |
2336 | 0 | | "complexes" |
2337 | 0 | | "compose" |
2338 | 0 | | "condition" |
2339 | 0 | | "conjugate" |
2340 | 0 | | "cos" |
2341 | 0 | | "cosh" |
2342 | 0 | | "cot" |
2343 | 0 | | "coth" |
2344 | 0 | | "cs" |
2345 | 0 | | "csc" |
2346 | 0 | | "csch" |
2347 | 0 | | "csymbol" |
2348 | 0 | | "curl" |
2349 | 0 | | "declare" |
2350 | 0 | | "degree" |
2351 | 0 | | "determinant" |
2352 | 0 | | "diff" |
2353 | 0 | | "divergence" |
2354 | 0 | | "divide" |
2355 | 0 | | "domain" |
2356 | 0 | | "domainofapplication" |
2357 | 0 | | "emptyset" |
2358 | 0 | | "eq" |
2359 | 0 | | "equivalent" |
2360 | 0 | | "eulergamma" |
2361 | 0 | | "exists" |
2362 | 0 | | "exp" |
2363 | 0 | | "exponentiale" |
2364 | 0 | | "factorial" |
2365 | 0 | | "factorof" |
2366 | 0 | | "false" |
2367 | 0 | | "floor" |
2368 | 0 | | "fn" |
2369 | 0 | | "forall" |
2370 | 0 | | "gcd" |
2371 | 0 | | "geq" |
2372 | 0 | | "grad" |
2373 | 0 | | "gt" |
2374 | 0 | | "ident" |
2375 | 0 | | "image" |
2376 | 0 | | "imaginary" |
2377 | 0 | | "imaginaryi" |
2378 | 0 | | "implies" |
2379 | 0 | | "in" |
2380 | 0 | | "infinity" |
2381 | 0 | | "int" |
2382 | 0 | | "integers" |
2383 | 0 | | "intersect" |
2384 | 0 | | "interval" |
2385 | 0 | | "inverse" |
2386 | 0 | | "lambda" |
2387 | 0 | | "laplacian" |
2388 | 0 | | "lcm" |
2389 | 0 | | "leq" |
2390 | 0 | | "limit" |
2391 | 0 | | "list" |
2392 | 0 | | "ln" |
2393 | 0 | | "log" |
2394 | 0 | | "logbase" |
2395 | 0 | | "lowlimit" |
2396 | 0 | | "lt" |
2397 | 0 | | "maction" |
2398 | 0 | | "maligngroup" |
2399 | 0 | | "malignmark" |
2400 | 0 | | "math" |
2401 | 0 | | "matrix" |
2402 | 0 | | "matrixrow" |
2403 | 0 | | "max" |
2404 | 0 | | "mean" |
2405 | 0 | | "median" |
2406 | 0 | | "menclose" |
2407 | 0 | | "merror" |
2408 | 0 | | "mfenced" |
2409 | 0 | | "mfrac" |
2410 | 0 | | "mglyph" |
2411 | 0 | | "mi" |
2412 | 0 | | "min" |
2413 | 0 | | "minus" |
2414 | 0 | | "mlabeledtr" |
2415 | 0 | | "mlongdiv" |
2416 | 0 | | "mmultiscripts" |
2417 | 0 | | "mn" |
2418 | 0 | | "mo" |
2419 | 0 | | "mode" |
2420 | 0 | | "moment" |
2421 | 0 | | "momentabout" |
2422 | 0 | | "mover" |
2423 | 0 | | "mpadded" |
2424 | 0 | | "mphantom" |
2425 | 0 | | "mprescripts" |
2426 | 0 | | "mroot" |
2427 | 0 | | "mrow" |
2428 | 0 | | "ms" |
2429 | 0 | | "mscarries" |
2430 | 0 | | "mscarry" |
2431 | 0 | | "msgroup" |
2432 | 0 | | "msline" |
2433 | 0 | | "mspace" |
2434 | 0 | | "msqrt" |
2435 | 0 | | "msrow" |
2436 | 0 | | "mstack" |
2437 | 0 | | "mstyle" |
2438 | 0 | | "msub" |
2439 | 0 | | "msubsup" |
2440 | 0 | | "msup" |
2441 | 0 | | "mtable" |
2442 | 0 | | "mtd" |
2443 | 0 | | "mtext" |
2444 | 0 | | "mtr" |
2445 | 0 | | "munder" |
2446 | 0 | | "munderover" |
2447 | 0 | | "naturalnumbers" |
2448 | 0 | | "neq" |
2449 | 0 | | "none" |
2450 | 0 | | "not" |
2451 | 0 | | "notanumber" |
2452 | 0 | | "notin" |
2453 | 0 | | "notprsubset" |
2454 | 0 | | "notsubset" |
2455 | 0 | | "or" |
2456 | 0 | | "otherwise" |
2457 | 0 | | "outerproduct" |
2458 | 0 | | "partialdiff" |
2459 | 0 | | "pi" |
2460 | 0 | | "piece" |
2461 | 0 | | "piecewise" |
2462 | 0 | | "plus" |
2463 | 0 | | "power" |
2464 | 0 | | "primes" |
2465 | 0 | | "product" |
2466 | 0 | | "prsubset" |
2467 | 0 | | "quotient" |
2468 | 0 | | "rationals" |
2469 | 0 | | "real" |
2470 | 0 | | "reals" |
2471 | 0 | | "reln" |
2472 | 0 | | "rem" |
2473 | 0 | | "root" |
2474 | 0 | | "scalarproduct" |
2475 | 0 | | "sdev" |
2476 | 0 | | "sec" |
2477 | 0 | | "sech" |
2478 | 0 | | "selector" |
2479 | 0 | | "semantics" |
2480 | 0 | | "sep" |
2481 | 0 | | "set" |
2482 | 0 | | "setdiff" |
2483 | 0 | | "share" |
2484 | 0 | | "sin" |
2485 | 0 | | "sinh" |
2486 | 0 | | "span" |
2487 | 0 | | "subset" |
2488 | 0 | | "sum" |
2489 | 0 | | "tan" |
2490 | 0 | | "tanh" |
2491 | 0 | | "tendsto" |
2492 | 0 | | "times" |
2493 | 0 | | "transpose" |
2494 | 0 | | "true" |
2495 | 0 | | "union" |
2496 | 0 | | "uplimit" |
2497 | 0 | | "variance" |
2498 | 0 | | "vector" |
2499 | 0 | | "vectorproduct" |
2500 | 0 | | "xor" |
2501 | | ) |
2502 | 0 | } |
2503 | | |
2504 | 0 | fn is_url_relative(url: &str) -> bool { |
2505 | 0 | matches!( |
2506 | 0 | Url::parse(url), |
2507 | | Err(url::ParseError::RelativeUrlWithoutBase) |
2508 | | ) |
2509 | 0 | } |
2510 | | |
2511 | | /// Policy for [relative URLs], that is, URLs that do not specify the scheme in full. |
2512 | | /// |
2513 | | /// This policy kicks in, if set, for any attribute named `src` or `href`, |
2514 | | /// as well as the `data` attribute of an `object` tag. |
2515 | | /// |
2516 | | /// [relative URLs]: struct.Builder.html#method.url_relative |
2517 | | /// |
2518 | | /// # Examples |
2519 | | /// |
2520 | | /// ## `Deny` |
2521 | | /// |
2522 | | /// * `<a href="test">` is a file-relative URL, and will be removed |
2523 | | /// * `<a href="/test">` is a domain-relative URL, and will be removed |
2524 | | /// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed |
2525 | | /// * `<a href="http://example.com/test">` is an absolute URL, and will be kept |
2526 | | /// |
2527 | | /// ## `PassThrough` |
2528 | | /// |
2529 | | /// No changes will be made to any URLs, except if a disallowed scheme is used. |
2530 | | /// |
2531 | | /// ## `RewriteWithBase` |
2532 | | /// |
2533 | | /// If the base is set to `http://notriddle.com/some-directory/some-file` |
2534 | | /// |
2535 | | /// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">` |
2536 | | /// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">` |
2537 | | /// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">` |
2538 | | /// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is |
2539 | | /// |
2540 | | /// ## `Custom` |
2541 | | /// |
2542 | | /// Pass the relative URL to a function. |
2543 | | /// If it returns `Some(string)`, then that one gets used. |
2544 | | /// Otherwise, it will remove the attribute (like `Deny` does). |
2545 | | /// |
2546 | | /// use std::borrow::Cow; |
2547 | | /// fn is_absolute_path(url: &str) -> bool { |
2548 | | /// let u = url.as_bytes(); |
2549 | | /// // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
2550 | | /// // `/a/b/c` is an absolute path, and what we want to do stuff to. |
2551 | | /// u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/') |
2552 | | /// } |
2553 | | /// fn evaluate(url: &str) -> Option<Cow<str>> { |
2554 | | /// if is_absolute_path(url) { |
2555 | | /// Some(Cow::Owned(String::from("/root") + url)) |
2556 | | /// } else { |
2557 | | /// Some(Cow::Borrowed(url)) |
2558 | | /// } |
2559 | | /// } |
2560 | | /// fn main() { |
2561 | | /// let a = ammonia::Builder::new() |
2562 | | /// .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate))) |
2563 | | /// .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") |
2564 | | /// .to_string(); |
2565 | | /// assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); |
2566 | | /// } |
2567 | | /// |
2568 | | /// This function is only applied to relative URLs. |
2569 | | /// To filter all of the URLs, |
2570 | | /// use the not-yet-implemented Content Security Policy. |
2571 | | #[non_exhaustive] |
2572 | | pub enum UrlRelative<'a> { |
2573 | | /// Relative URLs will be completely stripped from the document. |
2574 | | Deny, |
2575 | | /// Relative URLs will be passed through unchanged. |
2576 | | PassThrough, |
2577 | | /// Relative URLs will be changed into absolute URLs, based on this base URL. |
2578 | | RewriteWithBase(Url), |
2579 | | /// Force absolute and relative paths into a particular directory. |
2580 | | /// |
2581 | | /// Since the resolver does not affect fully-qualified URLs, it doesn't |
2582 | | /// prevent users from linking wherever they want. This feature only |
2583 | | /// serves to make content more portable. |
2584 | | /// |
2585 | | /// # Examples |
2586 | | /// |
2587 | | /// <table> |
2588 | | /// <thead> |
2589 | | /// <tr> |
2590 | | /// <th>root</th> |
2591 | | /// <th>path</th> |
2592 | | /// <th>url</th> |
2593 | | /// <th>result</th> |
2594 | | /// </tr> |
2595 | | /// </thead> |
2596 | | /// <tbody> |
2597 | | /// <tr> |
2598 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2599 | | /// <td>README.md</td> |
2600 | | /// <td></td> |
2601 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
2602 | | /// </tr><tr> |
2603 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2604 | | /// <td>README.md</td> |
2605 | | /// <td>/</td> |
2606 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2607 | | /// </tr><tr> |
2608 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2609 | | /// <td>README.md</td> |
2610 | | /// <td>/CONTRIBUTING.md</td> |
2611 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2612 | | /// </tr><tr> |
2613 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2614 | | /// <td>README.md</td> |
2615 | | /// <td></td> |
2616 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td> |
2617 | | /// </tr><tr> |
2618 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2619 | | /// <td>README.md</td> |
2620 | | /// <td>/</td> |
2621 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/</td> |
2622 | | /// </tr><tr> |
2623 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master</td> |
2624 | | /// <td>README.md</td> |
2625 | | /// <td>/CONTRIBUTING.md</td> |
2626 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td> |
2627 | | /// </tr><tr> |
2628 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2629 | | /// <td></td> |
2630 | | /// <td></td> |
2631 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2632 | | /// </tr><tr> |
2633 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2634 | | /// <td></td> |
2635 | | /// <td>/</td> |
2636 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2637 | | /// </tr><tr> |
2638 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/</td> |
2639 | | /// <td></td> |
2640 | | /// <td>/CONTRIBUTING.md</td> |
2641 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2642 | | /// </tr><tr> |
2643 | | /// <td>https://github.com/</td> |
2644 | | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2645 | | /// <td></td> |
2646 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td> |
2647 | | /// </tr><tr> |
2648 | | /// <td>https://github.com/</td> |
2649 | | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2650 | | /// <td>/</td> |
2651 | | /// <td>https://github.com/</td> |
2652 | | /// </tr><tr> |
2653 | | /// <td>https://github.com/</td> |
2654 | | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2655 | | /// <td>CONTRIBUTING.md</td> |
2656 | | /// <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td> |
2657 | | /// </tr><tr> |
2658 | | /// <td>https://github.com/</td> |
2659 | | /// <td>rust-ammonia/ammonia/blob/master/README.md</td> |
2660 | | /// <td>/CONTRIBUTING.md</td> |
2661 | | /// <td>https://github.com/CONTRIBUTING.md</td> |
2662 | | /// </tr> |
2663 | | /// </tbody> |
2664 | | /// </table> |
2665 | | RewriteWithRoot { |
2666 | | /// The URL that is treated as the root by the resolver. |
2667 | | root: Url, |
2668 | | /// The "current path" used to resolve relative paths. |
2669 | | path: String, |
2670 | | }, |
2671 | | /// Rewrite URLs with a custom function. |
2672 | | Custom(Box<dyn UrlRelativeEvaluate<'a>>), |
2673 | | } |
2674 | | |
2675 | | impl<'a> UrlRelative<'a> { |
2676 | 0 | fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> { |
2677 | 0 | match self { |
2678 | 0 | UrlRelative::RewriteWithBase(ref url_base) => url_base |
2679 | 0 | .join(url) |
2680 | 0 | .ok() |
2681 | 0 | .and_then(|x| StrTendril::from_str(x.as_str()).ok()), |
2682 | 0 | UrlRelative::RewriteWithRoot { ref root, ref path } => { |
2683 | 0 | (match url.as_bytes() { |
2684 | | // Scheme-relative URL |
2685 | 0 | [b'/', b'/', ..] => root.join(url), |
2686 | | // Path-absolute URL |
2687 | 0 | b"/" => root.join("."), |
2688 | 0 | [b'/', ..] => root.join(&url[1..]), |
2689 | | // Path-relative URL |
2690 | 0 | _ => root.join(path).and_then(|r| r.join(url)), |
2691 | | }) |
2692 | 0 | .ok() |
2693 | 0 | .and_then(|x| StrTendril::from_str(x.as_str()).ok()) |
2694 | | } |
2695 | 0 | UrlRelative::Custom(ref evaluate) => evaluate |
2696 | 0 | .evaluate(url) |
2697 | 0 | .as_ref() |
2698 | 0 | .map(Cow::as_ref) |
2699 | 0 | .map(StrTendril::from_str) |
2700 | 0 | .and_then(Result::ok), |
2701 | 0 | UrlRelative::PassThrough => StrTendril::from_str(url).ok(), |
2702 | 0 | UrlRelative::Deny => None, |
2703 | | } |
2704 | 0 | } |
2705 | | } |
2706 | | |
2707 | | impl<'a> fmt::Debug for UrlRelative<'a> { |
2708 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2709 | 0 | match *self { |
2710 | 0 | UrlRelative::Deny => write!(f, "UrlRelative::Deny"), |
2711 | 0 | UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"), |
2712 | 0 | UrlRelative::RewriteWithBase(ref base) => { |
2713 | 0 | write!(f, "UrlRelative::RewriteWithBase({})", base) |
2714 | | } |
2715 | 0 | UrlRelative::RewriteWithRoot { ref root, ref path } => { |
2716 | 0 | write!( |
2717 | 0 | f, |
2718 | | "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}" |
2719 | | ) |
2720 | | } |
2721 | 0 | UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"), |
2722 | | } |
2723 | 0 | } |
2724 | | } |
2725 | | |
2726 | | /// Types that implement this trait can be used to convert a relative URL into an absolute URL. |
2727 | | /// |
2728 | | /// This evaluator is only called when the URL is relative; absolute URLs are not evaluated. |
2729 | | /// |
2730 | | /// See [`url_relative`][url_relative] for more details. |
2731 | | /// |
2732 | | /// [url_relative]: struct.Builder.html#method.url_relative |
2733 | | pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a { |
2734 | | /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
2735 | | fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>; |
2736 | | } |
2737 | | impl<'a, T> UrlRelativeEvaluate<'a> for T |
2738 | | where |
2739 | | T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a, |
2740 | | { |
2741 | 0 | fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> { |
2742 | 0 | self(url) |
2743 | 0 | } |
2744 | | } |
2745 | | |
2746 | | impl fmt::Debug for dyn AttributeFilter { |
2747 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2748 | 0 | f.write_str("AttributeFilter") |
2749 | 0 | } |
2750 | | } |
2751 | | |
2752 | | /// Types that implement this trait can be used to remove or rewrite arbitrary attributes. |
2753 | | /// |
2754 | | /// See [`attribute_filter`][attribute_filter] for more details. |
2755 | | /// |
2756 | | /// [attribute_filter]: struct.Builder.html#method.attribute_filter |
2757 | | pub trait AttributeFilter: Send + Sync { |
2758 | | /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string. |
2759 | | fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>; |
2760 | | } |
2761 | | |
2762 | | impl<T> AttributeFilter for T |
2763 | | where |
2764 | | T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static, |
2765 | | { |
2766 | 0 | fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> { |
2767 | 0 | self(element, attribute, value) |
2768 | 0 | } |
2769 | | } |
2770 | | |
2771 | | /// A sanitized HTML document. |
2772 | | /// |
2773 | | /// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by |
2774 | | /// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows |
2775 | | /// users to avoid buffering the serialized representation to a [`String`] when desired. |
2776 | | /// |
2777 | | /// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface. |
2778 | | /// |
2779 | | /// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so |
2780 | | /// the complete fragment needs to be stored in memory during processing. |
2781 | | /// |
2782 | | /// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html |
2783 | | /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
2784 | | /// |
2785 | | /// # Examples |
2786 | | /// |
2787 | | /// use ammonia::Builder; |
2788 | | /// |
2789 | | /// let input = "<!-- comments will be stripped -->This is an Ammonia example."; |
2790 | | /// let output = "This is an Ammonia example."; |
2791 | | /// |
2792 | | /// let document = Builder::new() |
2793 | | /// .clean(input); |
2794 | | /// assert_eq!(document.to_string(), output); |
2795 | | pub struct Document(RcDom); |
2796 | | |
2797 | | impl Document { |
2798 | | /// Serializes a `Document` instance to a writer. |
2799 | | /// |
2800 | | /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step. |
2801 | | /// |
2802 | | /// To avoid consuming the writer, a mutable reference can be passed, like in the example below. |
2803 | | /// |
2804 | | /// Note that the in-memory representation of `Document` is larger than the serialized |
2805 | | /// `String`. |
2806 | | /// |
2807 | | /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html |
2808 | | /// |
2809 | | /// # Examples |
2810 | | /// |
2811 | | /// use ammonia::Builder; |
2812 | | /// |
2813 | | /// let input = "Some <style></style>HTML here"; |
2814 | | /// let expected = b"Some HTML here"; |
2815 | | /// |
2816 | | /// let document = Builder::new() |
2817 | | /// .clean(input); |
2818 | | /// |
2819 | | /// let mut sanitized = Vec::new(); |
2820 | | /// document.write_to(&mut sanitized) |
2821 | | /// .expect("Writing to a string should not fail (except on OOM)"); |
2822 | | /// assert_eq!(sanitized, expected); |
2823 | 0 | pub fn write_to<W>(&self, writer: W) -> io::Result<()> |
2824 | 0 | where |
2825 | 0 | W: io::Write, |
2826 | | { |
2827 | 0 | let opts = Self::serialize_opts(); |
2828 | 0 | let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
2829 | 0 | serialize(writer, &inner, opts) |
2830 | 0 | } |
2831 | | |
2832 | | /// Exposes the `Document` instance as an [`rcdom::Handle`]. |
2833 | | /// |
2834 | | /// This method returns the inner object backing the `Document` instance. This allows |
2835 | | /// making further changes to the DOM without introducing redundant serialization and |
2836 | | /// parsing. |
2837 | | /// |
2838 | | /// Note that this method should be considered unstable and sits outside of the semver |
2839 | | /// stability guarantees. It may change, break, or go away at any time, either because |
2840 | | /// of `html5ever` changes or `ammonia` implementation changes. |
2841 | | /// |
2842 | | /// For this method to be accessible, a `cfg` flag is required. The easiest way is to |
2843 | | /// use the `RUSTFLAGS` environment variable: |
2844 | | /// |
2845 | | /// ```text |
2846 | | /// RUSTFLAGS='--cfg ammonia_unstable' cargo build |
2847 | | /// ``` |
2848 | | /// |
2849 | | /// on Unix-like platforms, or |
2850 | | /// |
2851 | | /// ```text |
2852 | | /// set RUSTFLAGS=--cfg ammonia_unstable |
2853 | | /// cargo build |
2854 | | /// ``` |
2855 | | /// |
2856 | | /// on Windows. |
2857 | | /// |
2858 | | /// This requirement also applies to crates that transitively depend on crates that use |
2859 | | /// this flag. |
2860 | | /// |
2861 | | /// # Examples |
2862 | | /// |
2863 | | /// use ammonia::Builder; |
2864 | | /// use maplit::hashset; |
2865 | | /// use html5ever::serialize::{serialize, SerializeOpts}; |
2866 | | /// |
2867 | | /// # use std::error::Error; |
2868 | | /// # fn do_main() -> Result<(), Box<dyn Error>> { |
2869 | | /// let input = "<a>one link</a> and <a>one more</a>"; |
2870 | | /// let expected = "<a>one more</a> and <a>one link</a>"; |
2871 | | /// |
2872 | | /// let document = Builder::new() |
2873 | | /// .link_rel(None) |
2874 | | /// .clean(input); |
2875 | | /// |
2876 | | /// let mut node = document.to_dom_node(); |
2877 | | /// node.children.borrow_mut().reverse(); |
2878 | | /// |
2879 | | /// let mut buf = Vec::new(); |
2880 | | /// serialize(&mut buf, &node, SerializeOpts::default())?; |
2881 | | /// let output = String::from_utf8(buf)?; |
2882 | | /// |
2883 | | /// assert_eq!(output, expected); |
2884 | | /// # Ok(()) |
2885 | | /// # } |
2886 | | /// # fn main() { do_main().unwrap() } |
2887 | | #[cfg(ammonia_unstable)] |
2888 | | pub fn to_dom_node(&self) -> Handle { |
2889 | | self.0.document.children.borrow()[0].clone() |
2890 | | } |
2891 | | |
2892 | 0 | fn serialize_opts() -> SerializeOpts { |
2893 | 0 | SerializeOpts::default() |
2894 | 0 | } |
2895 | | } |
2896 | | |
2897 | | impl Clone for Document { |
2898 | 0 | fn clone(&self) -> Self { |
2899 | 0 | let parser = Builder::make_parser(); |
2900 | 0 | let dom = parser.one(&self.to_string()[..]); |
2901 | 0 | Document(dom) |
2902 | 0 | } |
2903 | | } |
2904 | | |
2905 | | /// Convert a `Document` to stringified HTML. |
2906 | | /// |
2907 | | /// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the |
2908 | | /// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`. |
2909 | | /// |
2910 | | /// [`Document`]: ammonia::Document |
2911 | | /// [`Display`]: std::fmt::Display |
2912 | | /// [`ToString::to_string`]: std::string::ToString |
2913 | | /// |
2914 | | /// # Examples |
2915 | | /// |
2916 | | /// use ammonia::Builder; |
2917 | | /// |
2918 | | /// let input = "Some <style></style>HTML here"; |
2919 | | /// let output = "Some HTML here"; |
2920 | | /// |
2921 | | /// let document = Builder::new() |
2922 | | /// .clean(input); |
2923 | | /// assert_eq!(document.to_string(), output); |
2924 | | impl Display for Document { |
2925 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2926 | 0 | let opts = Self::serialize_opts(); |
2927 | 0 | let mut ret_val = Vec::new(); |
2928 | 0 | let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into(); |
2929 | 0 | serialize(&mut ret_val, &inner, opts) |
2930 | 0 | .expect("Writing to a string shouldn't fail (expect on OOM)"); |
2931 | 0 | String::from_utf8(ret_val) |
2932 | 0 | .expect("html5ever only supports UTF8") |
2933 | 0 | .fmt(f) |
2934 | 0 | } |
2935 | | } |
2936 | | |
2937 | | impl fmt::Debug for Document { |
2938 | 0 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { |
2939 | 0 | write!(f, "Document({})", self) |
2940 | 0 | } |
2941 | | } |
2942 | | |
2943 | | impl From<Document> for String { |
2944 | 0 | fn from(document: Document) -> Self { |
2945 | 0 | document.to_string() |
2946 | 0 | } |
2947 | | } |
2948 | | |
2949 | | #[cfg(test)] |
2950 | | mod test { |
2951 | | use super::*; |
2952 | | #[test] |
2953 | | fn deeply_nested_whitelisted() { |
2954 | | clean(&"<b>".repeat(60_000)); |
2955 | | } |
2956 | | #[test] |
2957 | | fn deeply_nested_blacklisted() { |
2958 | | clean(&"<b-b>".repeat(60_000)); |
2959 | | } |
2960 | | #[test] |
2961 | | fn deeply_nested_alternating() { |
2962 | | clean(&"<b-b>".repeat(35_000)); |
2963 | | } |
2964 | | #[test] |
2965 | | fn included_angles() { |
2966 | | let fragment = "1 < 2"; |
2967 | | let result = clean(fragment); |
2968 | | assert_eq!(result, "1 < 2"); |
2969 | | } |
2970 | | #[test] |
2971 | | fn remove_script() { |
2972 | | let fragment = "an <script>evil()</script> example"; |
2973 | | let result = clean(fragment); |
2974 | | assert_eq!(result, "an example"); |
2975 | | } |
2976 | | #[test] |
2977 | | fn ignore_link() { |
2978 | | let fragment = "a <a href=\"http://www.google.com\">good</a> example"; |
2979 | | let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\ |
2980 | | good</a> example"; |
2981 | | let result = clean(fragment); |
2982 | | assert_eq!(result, expected); |
2983 | | } |
2984 | | #[test] |
2985 | | fn remove_unsafe_link() { |
2986 | | let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example"; |
2987 | | let result = clean(fragment); |
2988 | | assert_eq!( |
2989 | | result, |
2990 | | "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example" |
2991 | | ); |
2992 | | } |
2993 | | #[test] |
2994 | | fn remove_js_link() { |
2995 | | let fragment = "an <a href=\"javascript:evil()\">evil</a> example"; |
2996 | | let result = clean(fragment); |
2997 | | assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example"); |
2998 | | } |
2999 | | #[test] |
3000 | | fn tag_rebalance() { |
3001 | | let fragment = "<b>AWESOME!"; |
3002 | | let result = clean(fragment); |
3003 | | assert_eq!(result, "<b>AWESOME!</b>"); |
3004 | | } |
3005 | | #[test] |
3006 | | fn allow_url_relative() { |
3007 | | let fragment = "<a href=test>Test</a>"; |
3008 | | let result = Builder::new() |
3009 | | .url_relative(UrlRelative::PassThrough) |
3010 | | .clean(fragment) |
3011 | | .to_string(); |
3012 | | assert_eq!( |
3013 | | result, |
3014 | | "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
3015 | | ); |
3016 | | } |
3017 | | #[test] |
3018 | | fn rewrite_url_relative() { |
3019 | | let fragment = "<a href=test>Test</a>"; |
3020 | | let result = Builder::new() |
3021 | | .url_relative(UrlRelative::RewriteWithBase( |
3022 | | Url::parse("http://example.com/").unwrap(), |
3023 | | )) |
3024 | | .clean(fragment) |
3025 | | .to_string(); |
3026 | | assert_eq!( |
3027 | | result, |
3028 | | "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>" |
3029 | | ); |
3030 | | } |
3031 | | #[test] |
3032 | | fn rewrite_url_relative_with_invalid_url() { |
3033 | | // Reduced from https://github.com/Bauke/ammonia-crash-test |
3034 | | let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##; |
3035 | | let result = Builder::new() |
3036 | | .url_relative(UrlRelative::RewriteWithBase( |
3037 | | Url::parse("http://example.com/").unwrap(), |
3038 | | )) |
3039 | | .clean(fragment) |
3040 | | .to_string(); |
3041 | | assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##); |
3042 | | } |
3043 | | #[test] |
3044 | | fn attribute_filter_nop() { |
3045 | | let fragment = "<a href=test>Test</a>"; |
3046 | | let result = Builder::new() |
3047 | | .attribute_filter(|elem, attr, value| { |
3048 | | assert_eq!("a", elem); |
3049 | | assert!( |
3050 | | matches!( |
3051 | | (attr, value), |
3052 | | ("href", "test") | ("rel", "noopener noreferrer") |
3053 | | ), |
3054 | | "{}", |
3055 | | value.to_string() |
3056 | | ); |
3057 | | Some(value.into()) |
3058 | | }) |
3059 | | .clean(fragment) |
3060 | | .to_string(); |
3061 | | assert_eq!( |
3062 | | result, |
3063 | | "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
3064 | | ); |
3065 | | } |
3066 | | |
3067 | | #[test] |
3068 | | fn attribute_filter_drop() { |
3069 | | let fragment = "Test<img alt=test src=imgtest>"; |
3070 | | let result = Builder::new() |
3071 | | .attribute_filter(|elem, attr, value| { |
3072 | | assert_eq!("img", elem); |
3073 | | match (attr, value) { |
3074 | | ("src", "imgtest") => None, |
3075 | | ("alt", "test") => Some(value.into()), |
3076 | | _ => panic!("unexpected"), |
3077 | | } |
3078 | | }) |
3079 | | .clean(fragment) |
3080 | | .to_string(); |
3081 | | assert_eq!(result, r#"Test<img alt="test">"#); |
3082 | | } |
3083 | | |
3084 | | #[test] |
3085 | | fn url_filter_absolute() { |
3086 | | let fragment = "Test<img alt=test src=imgtest>"; |
3087 | | let result = Builder::new() |
3088 | | .attribute_filter(|elem, attr, value| { |
3089 | | assert_eq!("img", elem); |
3090 | | match (attr, value) { |
3091 | | ("src", "imgtest") => { |
3092 | | Some(format!("https://example.com/images/{}", value).into()) |
3093 | | } |
3094 | | ("alt", "test") => None, |
3095 | | _ => panic!("unexpected"), |
3096 | | } |
3097 | | }) |
3098 | | .url_relative(UrlRelative::RewriteWithBase( |
3099 | | Url::parse("http://wrong.invalid/").unwrap(), |
3100 | | )) |
3101 | | .clean(fragment) |
3102 | | .to_string(); |
3103 | | assert_eq!( |
3104 | | result, |
3105 | | r#"Test<img src="https://example.com/images/imgtest">"# |
3106 | | ); |
3107 | | } |
3108 | | |
3109 | | #[test] |
3110 | | fn url_filter_relative() { |
3111 | | let fragment = "Test<img alt=test src=imgtest>"; |
3112 | | let result = Builder::new() |
3113 | | .attribute_filter(|elem, attr, value| { |
3114 | | assert_eq!("img", elem); |
3115 | | match (attr, value) { |
3116 | | ("src", "imgtest") => Some("rewrite".into()), |
3117 | | ("alt", "test") => Some("altalt".into()), |
3118 | | _ => panic!("unexpected"), |
3119 | | } |
3120 | | }) |
3121 | | .url_relative(UrlRelative::RewriteWithBase( |
3122 | | Url::parse("https://example.com/base/#").unwrap(), |
3123 | | )) |
3124 | | .clean(fragment) |
3125 | | .to_string(); |
3126 | | assert_eq!( |
3127 | | result, |
3128 | | r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"# |
3129 | | ); |
3130 | | } |
3131 | | |
3132 | | #[test] |
3133 | | fn rewrite_url_relative_no_rel() { |
3134 | | let fragment = "<a href=test>Test</a>"; |
3135 | | let result = Builder::new() |
3136 | | .url_relative(UrlRelative::RewriteWithBase( |
3137 | | Url::parse("http://example.com/").unwrap(), |
3138 | | )) |
3139 | | .link_rel(None) |
3140 | | .clean(fragment) |
3141 | | .to_string(); |
3142 | | assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>"); |
3143 | | } |
3144 | | #[test] |
3145 | | fn deny_url_relative() { |
3146 | | let fragment = "<a href=test>Test</a>"; |
3147 | | let result = Builder::new() |
3148 | | .url_relative(UrlRelative::Deny) |
3149 | | .clean(fragment) |
3150 | | .to_string(); |
3151 | | assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>"); |
3152 | | } |
3153 | | #[test] |
3154 | | fn replace_rel() { |
3155 | | let fragment = "<a href=test rel=\"garbage\">Test</a>"; |
3156 | | let result = Builder::new() |
3157 | | .url_relative(UrlRelative::PassThrough) |
3158 | | .clean(fragment) |
3159 | | .to_string(); |
3160 | | assert_eq!( |
3161 | | result, |
3162 | | "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>" |
3163 | | ); |
3164 | | } |
3165 | | #[test] |
3166 | | fn consider_rel_still_banned() { |
3167 | | let fragment = "<a href=test rel=\"garbage\">Test</a>"; |
3168 | | let result = Builder::new() |
3169 | | .url_relative(UrlRelative::PassThrough) |
3170 | | .link_rel(None) |
3171 | | .clean(fragment) |
3172 | | .to_string(); |
3173 | | assert_eq!(result, "<a href=\"test\">Test</a>"); |
3174 | | } |
3175 | | #[test] |
3176 | | fn object_data() { |
3177 | | let fragment = "<span data=\"javascript:evil()\">Test</span>\ |
3178 | | <object data=\"javascript:evil()\"></object>M"; |
3179 | | let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#; |
3180 | | let result = Builder::new() |
3181 | | .tags(hashset!["span", "object"]) |
3182 | | .generic_attributes(hashset!["data"]) |
3183 | | .clean(fragment) |
3184 | | .to_string(); |
3185 | | assert_eq!(result, expected); |
3186 | | } |
3187 | | #[test] |
3188 | | fn remove_attributes() { |
3189 | | let fragment = "<table border=\"1\"><tr></tr></table>"; |
3190 | | let result = Builder::new().clean(fragment); |
3191 | | assert_eq!( |
3192 | | result.to_string(), |
3193 | | "<table><tbody><tr></tr></tbody></table>" |
3194 | | ); |
3195 | | } |
3196 | | #[test] |
3197 | | fn quotes_in_attrs() { |
3198 | | let fragment = "<b title='\"'>contents</b>"; |
3199 | | let result = clean(fragment); |
3200 | | assert_eq!(result, "<b title=\""\">contents</b>"); |
3201 | | } |
3202 | | #[test] |
3203 | | #[should_panic] |
3204 | | fn panic_if_rel_is_allowed_and_replaced_generic() { |
3205 | | Builder::new() |
3206 | | .link_rel(Some("noopener noreferrer")) |
3207 | | .generic_attributes(hashset!["rel"]) |
3208 | | .clean("something"); |
3209 | | } |
3210 | | #[test] |
3211 | | #[should_panic] |
3212 | | fn panic_if_rel_is_allowed_and_replaced_a() { |
3213 | | Builder::new() |
3214 | | .link_rel(Some("noopener noreferrer")) |
3215 | | .tag_attributes(hashmap![ |
3216 | | "a" => hashset!["rel"], |
3217 | | ]) |
3218 | | .clean("something"); |
3219 | | } |
3220 | | #[test] |
3221 | | fn no_panic_if_rel_is_allowed_and_replaced_span() { |
3222 | | Builder::new() |
3223 | | .link_rel(Some("noopener noreferrer")) |
3224 | | .tag_attributes(hashmap![ |
3225 | | "span" => hashset!["rel"], |
3226 | | ]) |
3227 | | .clean("<span rel=\"what\">s</span>"); |
3228 | | } |
3229 | | #[test] |
3230 | | fn no_panic_if_rel_is_allowed_and_not_replaced_generic() { |
3231 | | Builder::new() |
3232 | | .link_rel(None) |
3233 | | .generic_attributes(hashset!["rel"]) |
3234 | | .clean("<a rel=\"what\">s</a>"); |
3235 | | } |
3236 | | #[test] |
3237 | | fn no_panic_if_rel_is_allowed_and_not_replaced_a() { |
3238 | | Builder::new() |
3239 | | .link_rel(None) |
3240 | | .tag_attributes(hashmap![ |
3241 | | "a" => hashset!["rel"], |
3242 | | ]) |
3243 | | .clean("<a rel=\"what\">s</a>"); |
3244 | | } |
3245 | | #[test] |
3246 | | fn dont_close_void_elements() { |
3247 | | let fragment = "<br>"; |
3248 | | let result = clean(fragment); |
3249 | | assert_eq!(result.to_string(), "<br>"); |
3250 | | } |
3251 | | #[should_panic] |
3252 | | #[test] |
3253 | | fn panic_on_allowed_classes_tag_attributes() { |
3254 | | let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
3255 | | Builder::new() |
3256 | | .link_rel(None) |
3257 | | .tag_attributes(hashmap![ |
3258 | | "p" => hashset!["class"], |
3259 | | "a" => hashset!["class"], |
3260 | | ]) |
3261 | | .allowed_classes(hashmap![ |
3262 | | "p" => hashset!["foo", "bar"], |
3263 | | "a" => hashset!["baz"], |
3264 | | ]) |
3265 | | .clean(fragment); |
3266 | | } |
3267 | | #[should_panic] |
3268 | | #[test] |
3269 | | fn panic_on_allowed_classes_generic_attributes() { |
3270 | | let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
3271 | | Builder::new() |
3272 | | .link_rel(None) |
3273 | | .generic_attributes(hashset!["class", "href", "some-foo"]) |
3274 | | .allowed_classes(hashmap![ |
3275 | | "p" => hashset!["foo", "bar"], |
3276 | | "a" => hashset!["baz"], |
3277 | | ]) |
3278 | | .clean(fragment); |
3279 | | } |
3280 | | #[test] |
3281 | | fn remove_non_allowed_classes() { |
3282 | | let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
3283 | | let result = Builder::new() |
3284 | | .link_rel(None) |
3285 | | .allowed_classes(hashmap![ |
3286 | | "p" => hashset!["foo", "bar"], |
3287 | | "a" => hashset!["baz"], |
3288 | | ]) |
3289 | | .clean(fragment); |
3290 | | assert_eq!( |
3291 | | result.to_string(), |
3292 | | "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" |
3293 | | ); |
3294 | | } |
3295 | | #[test] |
3296 | | fn remove_non_allowed_classes_with_tag_class() { |
3297 | | let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>"; |
3298 | | let result = Builder::new() |
3299 | | .link_rel(None) |
3300 | | .tag_attributes(hashmap![ |
3301 | | "div" => hashset!["class"], |
3302 | | ]) |
3303 | | .allowed_classes(hashmap![ |
3304 | | "p" => hashset!["foo", "bar"], |
3305 | | "a" => hashset!["baz"], |
3306 | | ]) |
3307 | | .clean(fragment); |
3308 | | assert_eq!( |
3309 | | result.to_string(), |
3310 | | "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>" |
3311 | | ); |
3312 | | } |
3313 | | #[test] |
3314 | | fn allowed_classes_ascii_whitespace() { |
3315 | | // According to https://infra.spec.whatwg.org/#ascii-whitespace, |
3316 | | // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are |
3317 | | // considered to be ASCII whitespace. Unicode whitespace characters |
3318 | | // and VT (\x0B) aren't ASCII whitespace. |
3319 | | let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">"; |
3320 | | let result = Builder::new() |
3321 | | .allowed_classes(hashmap![ |
3322 | | "p" => hashset!["a", "b", "c", "d", "e", "f", "g"], |
3323 | | ]) |
3324 | | .clean(fragment); |
3325 | | assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#); |
3326 | | } |
3327 | | #[test] |
3328 | | fn remove_non_allowed_attributes_with_tag_attribute_values() { |
3329 | | let fragment = "<p data-label=\"baz\" name=\"foo\"></p>"; |
3330 | | let result = Builder::new() |
3331 | | .tag_attribute_values(hashmap![ |
3332 | | "p" => hashmap![ |
3333 | | "data-label" => hashset!["bar"], |
3334 | | ], |
3335 | | ]) |
3336 | | .tag_attributes(hashmap![ |
3337 | | "p" => hashset!["name"], |
3338 | | ]) |
3339 | | .clean(fragment); |
3340 | | assert_eq!(result.to_string(), "<p name=\"foo\"></p>",); |
3341 | | } |
3342 | | #[test] |
3343 | | fn keep_allowed_attributes_with_tag_attribute_values() { |
3344 | | let fragment = "<p data-label=\"bar\" name=\"foo\"></p>"; |
3345 | | let result = Builder::new() |
3346 | | .tag_attribute_values(hashmap![ |
3347 | | "p" => hashmap![ |
3348 | | "data-label" => hashset!["bar"], |
3349 | | ], |
3350 | | ]) |
3351 | | .tag_attributes(hashmap![ |
3352 | | "p" => hashset!["name"], |
3353 | | ]) |
3354 | | .clean(fragment); |
3355 | | assert_eq!( |
3356 | | result.to_string(), |
3357 | | "<p data-label=\"bar\" name=\"foo\"></p>", |
3358 | | ); |
3359 | | } |
3360 | | #[test] |
3361 | | fn tag_attribute_values_case_insensitive() { |
3362 | | let fragment = "<input type=\"CHECKBOX\" name=\"foo\">"; |
3363 | | let result = Builder::new() |
3364 | | .tags(hashset!["input"]) |
3365 | | .tag_attribute_values(hashmap![ |
3366 | | "input" => hashmap![ |
3367 | | "type" => hashset!["checkbox"], |
3368 | | ], |
3369 | | ]) |
3370 | | .tag_attributes(hashmap![ |
3371 | | "input" => hashset!["name"], |
3372 | | ]) |
3373 | | .clean(fragment); |
3374 | | assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",); |
3375 | | } |
3376 | | #[test] |
3377 | | fn set_tag_attribute_values() { |
3378 | | let fragment = "<a href=\"https://example.com/\">Link</a>"; |
3379 | | let result = Builder::new() |
3380 | | .link_rel(None) |
3381 | | .add_tag_attributes("a", &["target"]) |
3382 | | .set_tag_attribute_value("a", "target", "_blank") |
3383 | | .clean(fragment); |
3384 | | assert_eq!( |
3385 | | result.to_string(), |
3386 | | "<a href=\"https://example.com/\" target=\"_blank\">Link</a>", |
3387 | | ); |
3388 | | } |
3389 | | #[test] |
3390 | | fn update_existing_set_tag_attribute_values() { |
3391 | | let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>"; |
3392 | | let result = Builder::new() |
3393 | | .link_rel(None) |
3394 | | .add_tag_attributes("a", &["target"]) |
3395 | | .set_tag_attribute_value("a", "target", "_blank") |
3396 | | .clean(fragment); |
3397 | | assert_eq!( |
3398 | | result.to_string(), |
3399 | | "<a target=\"_blank\" href=\"https://example.com/\">Link</a>", |
3400 | | ); |
3401 | | } |
3402 | | #[test] |
3403 | | fn unwhitelisted_set_tag_attribute_values() { |
3404 | | let fragment = "<span>hi</span><my-elem>"; |
3405 | | let result = Builder::new() |
3406 | | .set_tag_attribute_value("my-elem", "my-attr", "val") |
3407 | | .clean(fragment); |
3408 | | assert_eq!(result.to_string(), "<span>hi</span>",); |
3409 | | } |
3410 | | #[test] |
3411 | | fn remove_entity_link() { |
3412 | | let fragment = "<a href=\"javascript:a\ |
3413 | | lert('XSS')\">Click me!</a>"; |
3414 | | let result = clean(fragment); |
3415 | | assert_eq!( |
3416 | | result.to_string(), |
3417 | | "<a rel=\"noopener noreferrer\">Click me!</a>" |
3418 | | ); |
3419 | | } |
3420 | | #[test] |
3421 | | fn remove_relative_url_evaluate() { |
3422 | | fn is_absolute_path(url: &str) -> bool { |
3423 | | let u = url.as_bytes(); |
3424 | | // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
3425 | | // `/a/b/c` is an absolute path, and what we want to do stuff to. |
3426 | | u.first() == Some(&b'/') && u.get(1) != Some(&b'/') |
3427 | | } |
3428 | | fn is_banned(url: &str) -> bool { |
3429 | | let u = url.as_bytes(); |
3430 | | u.first() == Some(&b'b') && u.get(1) == Some(&b'a') |
3431 | | } |
3432 | | fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
3433 | | if is_absolute_path(url) { |
3434 | | Some(Cow::Owned(String::from("/root") + url)) |
3435 | | } else if is_banned(url) { |
3436 | | None |
3437 | | } else { |
3438 | | Some(Cow::Borrowed(url)) |
3439 | | } |
3440 | | } |
3441 | | let a = Builder::new() |
3442 | | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3443 | | .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>") |
3444 | | .to_string(); |
3445 | | assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>"); |
3446 | | } |
3447 | | #[test] |
3448 | | fn remove_relative_url_evaluate_b() { |
3449 | | fn is_absolute_path(url: &str) -> bool { |
3450 | | let u = url.as_bytes(); |
3451 | | // `//a/b/c` is "protocol-relative", meaning "a" is a hostname |
3452 | | // `/a/b/c` is an absolute path, and what we want to do stuff to. |
3453 | | u.first() == Some(&b'/') && u.get(1) != Some(&b'/') |
3454 | | } |
3455 | | fn is_banned(url: &str) -> bool { |
3456 | | let u = url.as_bytes(); |
3457 | | u.first() == Some(&b'b') && u.get(1) == Some(&b'a') |
3458 | | } |
3459 | | fn evaluate(url: &str) -> Option<Cow<'_, str>> { |
3460 | | if is_absolute_path(url) { |
3461 | | Some(Cow::Owned(String::from("/root") + url)) |
3462 | | } else if is_banned(url) { |
3463 | | None |
3464 | | } else { |
3465 | | Some(Cow::Borrowed(url)) |
3466 | | } |
3467 | | } |
3468 | | let a = Builder::new() |
3469 | | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3470 | | .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>") |
3471 | | .to_string(); |
3472 | | assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>"); |
3473 | | } |
3474 | | #[test] |
3475 | | fn remove_relative_url_evaluate_c() { |
3476 | | // Don't run on absolute URLs. |
3477 | | fn evaluate(_: &str) -> Option<Cow<'_, str>> { |
3478 | | return Some(Cow::Owned(String::from("invalid"))); |
3479 | | } |
3480 | | let a = Builder::new() |
3481 | | .url_relative(UrlRelative::Custom(Box::new(evaluate))) |
3482 | | .clean("<a href=\"https://www.google.com/\">google</a>") |
3483 | | .to_string(); |
3484 | | assert_eq!( |
3485 | | a, |
3486 | | "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>" |
3487 | | ); |
3488 | | } |
3489 | | #[test] |
3490 | | fn clean_children_of_bad_element() { |
3491 | | let fragment = "<bad><evil>a</evil>b</bad>"; |
3492 | | let result = Builder::new().clean(fragment); |
3493 | | assert_eq!(result.to_string(), "ab"); |
3494 | | } |
3495 | | #[test] |
3496 | | fn reader_input() { |
3497 | | let fragment = b"an <script>evil()</script> example"; |
3498 | | let result = Builder::new().clean_from_reader(&fragment[..]); |
3499 | | assert!(result.is_ok()); |
3500 | | assert_eq!(result.unwrap().to_string(), "an example"); |
3501 | | } |
3502 | | #[test] |
3503 | | fn reader_non_utf8() { |
3504 | | let fragment = b"non-utf8 \xF0\x90\x80string"; |
3505 | | let result = Builder::new().clean_from_reader(&fragment[..]); |
3506 | | assert!(result.is_ok()); |
3507 | | assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string"); |
3508 | | } |
3509 | | #[test] |
3510 | | fn display_impl() { |
3511 | | let fragment = r#"a <a>link</a>"#; |
3512 | | let result = Builder::new().link_rel(None).clean(fragment); |
3513 | | assert_eq!(format!("{}", result), "a <a>link</a>"); |
3514 | | } |
3515 | | #[test] |
3516 | | fn debug_impl() { |
3517 | | let fragment = r#"a <a>link</a>"#; |
3518 | | let result = Builder::new().link_rel(None).clean(fragment); |
3519 | | assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)"); |
3520 | | } |
3521 | | #[cfg(ammonia_unstable)] |
3522 | | #[test] |
3523 | | fn to_dom_node() { |
3524 | | let fragment = r#"a <a>link</a>"#; |
3525 | | let result = Builder::new().link_rel(None).clean(fragment); |
3526 | | let _node = result.to_dom_node(); |
3527 | | } |
3528 | | #[test] |
3529 | | fn string_from_document() { |
3530 | | let fragment = r#"a <a>link"#; |
3531 | | let result = String::from(Builder::new().link_rel(None).clean(fragment)); |
3532 | | assert_eq!(format!("{}", result), "a <a>link</a>"); |
3533 | | } |
3534 | | fn require_sync<T: Sync>(_: T) {} |
3535 | | fn require_send<T: Send>(_: T) {} |
3536 | | #[test] |
3537 | | fn require_sync_and_send() { |
3538 | | require_sync(Builder::new()); |
3539 | | require_send(Builder::new()); |
3540 | | } |
3541 | | #[test] |
3542 | | fn id_prefixed() { |
3543 | | let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>"; |
3544 | | let result = String::from( |
3545 | | Builder::new() |
3546 | | .tag_attributes(hashmap![ |
3547 | | "a" => hashset!["id"], |
3548 | | ]) |
3549 | | .id_prefix(Some("prefix-")) |
3550 | | .clean(fragment), |
3551 | | ); |
3552 | | assert_eq!( |
3553 | | result.to_string(), |
3554 | | "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>" |
3555 | | ); |
3556 | | } |
3557 | | #[test] |
3558 | | fn id_already_prefixed() { |
3559 | | let fragment = "<a id=\"prefix-hello\"></a>"; |
3560 | | let result = String::from( |
3561 | | Builder::new() |
3562 | | .tag_attributes(hashmap![ |
3563 | | "a" => hashset!["id"], |
3564 | | ]) |
3565 | | .id_prefix(Some("prefix-")) |
3566 | | .clean(fragment), |
3567 | | ); |
3568 | | assert_eq!( |
3569 | | result.to_string(), |
3570 | | "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>" |
3571 | | ); |
3572 | | } |
3573 | | #[test] |
3574 | | fn clean_content_tags() { |
3575 | | let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>"; |
3576 | | let result = String::from( |
3577 | | Builder::new() |
3578 | | .clean_content_tags(hashset!["script"]) |
3579 | | .clean(fragment), |
3580 | | ); |
3581 | | assert_eq!(result.to_string(), ""); |
3582 | | } |
3583 | | #[test] |
3584 | | fn only_clean_content_tags() { |
3585 | | let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; |
3586 | | let result = String::from( |
3587 | | Builder::new() |
3588 | | .clean_content_tags(hashset!["script"]) |
3589 | | .clean(fragment), |
3590 | | ); |
3591 | | assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); |
3592 | | } |
3593 | | #[test] |
3594 | | fn clean_removed_default_tag() { |
3595 | | let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>"; |
3596 | | let result = String::from( |
3597 | | Builder::new() |
3598 | | .rm_tags(hashset!["a"]) |
3599 | | .rm_tag_attributes("a", hashset!["href", "hreflang"]) |
3600 | | .clean_content_tags(hashset!["script"]) |
3601 | | .clean(fragment), |
3602 | | ); |
3603 | | assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>"); |
3604 | | } |
3605 | | #[test] |
3606 | | #[should_panic] |
3607 | | fn panic_on_clean_content_tag_attribute() { |
3608 | | Builder::new() |
3609 | | .rm_tags(std::iter::once("a")) |
3610 | | .clean_content_tags(hashset!["a"]) |
3611 | | .clean(""); |
3612 | | } |
3613 | | #[test] |
3614 | | #[should_panic] |
3615 | | fn panic_on_clean_content_tag() { |
3616 | | Builder::new().clean_content_tags(hashset!["a"]).clean(""); |
3617 | | } |
3618 | | |
3619 | | #[test] |
3620 | | fn clean_text_test() { |
3621 | | assert_eq!( |
3622 | | clean_text("<this> is <a test function"), |
3623 | | "<this> is <a test function" |
3624 | | ); |
3625 | | } |
3626 | | |
3627 | | #[test] |
3628 | | fn clean_text_spaces_test() { |
3629 | | assert_eq!(clean_text("\x09\x0a\x0c\x20"), "	  "); |
3630 | | } |
3631 | | |
3632 | | #[test] |
3633 | | fn ns_svg() { |
3634 | | // https://github.com/cure53/DOMPurify/pull/495 |
3635 | | let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##; |
3636 | | let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
3637 | | assert_eq!(result.to_string(), ""); |
3638 | | |
3639 | | let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>"; |
3640 | | let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
3641 | | assert_eq!(result.to_string(), "<iframe>keep me</iframe>"); |
3642 | | |
3643 | | let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>"; |
3644 | | let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment)); |
3645 | | assert_eq!(result.to_string(), "<iframe>keep me</iframe>"); |
3646 | | |
3647 | | let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>"; |
3648 | | let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment)); |
3649 | | assert_eq!( |
3650 | | result.to_string(), |
3651 | | "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>" |
3652 | | ); |
3653 | | } |
3654 | | |
3655 | | #[test] |
3656 | | fn ns_svg_2() { |
3657 | | let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--><img src=1 onerror=alert(1)>'>"; |
3658 | | let result = Builder::default() |
3659 | | .strip_comments(false) |
3660 | | .add_tags(&["svg","foreignObject","table","path","xmp"]) |
3661 | | .clean(fragment); |
3662 | | assert_eq!( |
3663 | | result.to_string(), |
3664 | | "<svg><foreignObject><table></table></foreignObject></svg>" |
3665 | | ); |
3666 | | } |
3667 | | |
3668 | | #[test] |
3669 | | fn ns_mathml() { |
3670 | | // https://github.com/cure53/DOMPurify/pull/495 |
3671 | | let fragment = "<mglyph></mglyph>"; |
3672 | | let result = String::from( |
3673 | | Builder::new() |
3674 | | .add_tags(&["math", "mtext", "mglyph"]) |
3675 | | .clean(fragment), |
3676 | | ); |
3677 | | assert_eq!(result.to_string(), ""); |
3678 | | let fragment = "<math><mtext><div><mglyph>"; |
3679 | | let result = String::from( |
3680 | | Builder::new() |
3681 | | .add_tags(&["math", "mtext", "mglyph"]) |
3682 | | .clean(fragment), |
3683 | | ); |
3684 | | assert_eq!( |
3685 | | result.to_string(), |
3686 | | "<math><mtext><div></div></mtext></math>" |
3687 | | ); |
3688 | | let fragment = "<math><mtext><mglyph>"; |
3689 | | let result = String::from( |
3690 | | Builder::new() |
3691 | | .add_tags(&["math", "mtext", "mglyph"]) |
3692 | | .clean(fragment), |
3693 | | ); |
3694 | | assert_eq!( |
3695 | | result.to_string(), |
3696 | | "<math><mtext><mglyph></mglyph></mtext></math>" |
3697 | | ); |
3698 | | } |
3699 | | |
3700 | | #[test] |
3701 | | fn ns_mathml_2() { |
3702 | | let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--><img src=1 onerror=alert(1)>'>"; |
3703 | | let result = Builder::default() |
3704 | | .strip_comments(false) |
3705 | | .add_tags(&["math","mtext","table","mglyph","xmp"]) |
3706 | | .clean(fragment); |
3707 | | assert_eq!( |
3708 | | result.to_string(), |
3709 | | "<math><mtext><table></table></mtext></math>" |
3710 | | ); |
3711 | | } |
3712 | | |
3713 | | |
3714 | | #[test] |
3715 | | fn xml_processing_instruction() { |
3716 | | // https://blog.slonser.info/posts/dompurify-node-type-confusion/ |
3717 | | let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##; |
3718 | | let result = String::from(Builder::new().clean(fragment)); |
3719 | | assert_eq!(result.to_string(), ""); |
3720 | | |
3721 | | let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##; |
3722 | | let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment)); |
3723 | | assert_eq!(result.to_string(), "<svg></svg>"); |
3724 | | |
3725 | | let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##; |
3726 | | let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment)); |
3727 | | assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?>"); |
3728 | | } |
3729 | | |
3730 | | #[test] |
3731 | | fn generic_attribute_prefixes() { |
3732 | | let prefix_data = ["data-"]; |
3733 | | let prefix_code = ["code-"]; |
3734 | | let mut b = Builder::new(); |
3735 | | let mut hs: HashSet<&'_ str> = HashSet::new(); |
3736 | | hs.insert("data-"); |
3737 | | assert!(b.generic_attribute_prefixes.is_none()); |
3738 | | b.generic_attribute_prefixes(hs); |
3739 | | assert!(b.generic_attribute_prefixes.is_some()); |
3740 | | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3741 | | b.add_generic_attribute_prefixes(&prefix_data); |
3742 | | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3743 | | b.add_generic_attribute_prefixes(&prefix_code); |
3744 | | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2); |
3745 | | b.rm_generic_attribute_prefixes(&prefix_code); |
3746 | | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3747 | | b.rm_generic_attribute_prefixes(&prefix_code); |
3748 | | assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1); |
3749 | | b.rm_generic_attribute_prefixes(&prefix_data); |
3750 | | assert!(b.generic_attribute_prefixes.is_none()); |
3751 | | } |
3752 | | |
3753 | | #[test] |
3754 | | fn generic_attribute_prefixes_clean() { |
3755 | | let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#; |
3756 | | let result_cleaned = String::from( |
3757 | | Builder::new() |
3758 | | .add_tag_attributes("a", &["data-1"]) |
3759 | | .clean(fragment), |
3760 | | ); |
3761 | | assert_eq!( |
3762 | | result_cleaned, |
3763 | | r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3764 | | ); |
3765 | | let result_allowed = String::from( |
3766 | | Builder::new() |
3767 | | .add_tag_attributes("a", &["data-1"]) |
3768 | | .add_generic_attribute_prefixes(&["data-"]) |
3769 | | .clean(fragment), |
3770 | | ); |
3771 | | assert_eq!( |
3772 | | result_allowed, |
3773 | | r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3774 | | ); |
3775 | | let result_allowed = String::from( |
3776 | | Builder::new() |
3777 | | .add_tag_attributes("a", &["data-1", "code-1"]) |
3778 | | .add_generic_attribute_prefixes(&["data-", "code-"]) |
3779 | | .clean(fragment), |
3780 | | ); |
3781 | | assert_eq!( |
3782 | | result_allowed, |
3783 | | r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"# |
3784 | | ); |
3785 | | } |
3786 | | #[test] |
3787 | | fn lesser_than_isnt_html() { |
3788 | | let fragment = "1 < 2"; |
3789 | | assert!(!is_html(fragment)); |
3790 | | } |
3791 | | #[test] |
3792 | | fn dense_lesser_than_isnt_html() { |
3793 | | let fragment = "1<2"; |
3794 | | assert!(!is_html(fragment)); |
3795 | | } |
3796 | | #[test] |
3797 | | fn what_about_number_elements() { |
3798 | | let fragment = "foo<2>bar"; |
3799 | | assert!(!is_html(fragment)); |
3800 | | } |
3801 | | #[test] |
3802 | | fn turbofish_is_html_sadly() { |
3803 | | let fragment = "Vec::<u8>::new()"; |
3804 | | assert!(is_html(fragment)); |
3805 | | } |
3806 | | #[test] |
3807 | | fn stop_grinning() { |
3808 | | let fragment = "did you really believe me? <g>"; |
3809 | | assert!(is_html(fragment)); |
3810 | | } |
3811 | | #[test] |
3812 | | fn dont_be_bold() { |
3813 | | let fragment = "<b>"; |
3814 | | assert!(is_html(fragment)); |
3815 | | } |
3816 | | |
3817 | | #[test] |
3818 | | fn rewrite_with_root() { |
3819 | | let tests = [ |
3820 | | ( |
3821 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3822 | | "README.md", |
3823 | | "", |
3824 | | "https://github.com/rust-ammonia/ammonia/blob/master/README.md", |
3825 | | ), |
3826 | | ( |
3827 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3828 | | "README.md", |
3829 | | "/", |
3830 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3831 | | ), |
3832 | | ( |
3833 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3834 | | "README.md", |
3835 | | "/CONTRIBUTING.md", |
3836 | | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
3837 | | ), |
3838 | | ( |
3839 | | "https://github.com/rust-ammonia/ammonia/blob/master", |
3840 | | "README.md", |
3841 | | "", |
3842 | | "https://github.com/rust-ammonia/ammonia/blob/README.md", |
3843 | | ), |
3844 | | ( |
3845 | | "https://github.com/rust-ammonia/ammonia/blob/master", |
3846 | | "README.md", |
3847 | | "/", |
3848 | | "https://github.com/rust-ammonia/ammonia/blob/", |
3849 | | ), |
3850 | | ( |
3851 | | "https://github.com/rust-ammonia/ammonia/blob/master", |
3852 | | "README.md", |
3853 | | "/CONTRIBUTING.md", |
3854 | | "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md", |
3855 | | ), |
3856 | | ( |
3857 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3858 | | "", |
3859 | | "", |
3860 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3861 | | ), |
3862 | | ( |
3863 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3864 | | "", |
3865 | | "/", |
3866 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3867 | | ), |
3868 | | ( |
3869 | | "https://github.com/rust-ammonia/ammonia/blob/master/", |
3870 | | "", |
3871 | | "/CONTRIBUTING.md", |
3872 | | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
3873 | | ), |
3874 | | ( |
3875 | | "https://github.com/", |
3876 | | "rust-ammonia/ammonia/blob/master/README.md", |
3877 | | "", |
3878 | | "https://github.com/rust-ammonia/ammonia/blob/master/README.md", |
3879 | | ), |
3880 | | ( |
3881 | | "https://github.com/", |
3882 | | "rust-ammonia/ammonia/blob/master/README.md", |
3883 | | "/", |
3884 | | "https://github.com/", |
3885 | | ), |
3886 | | ( |
3887 | | "https://github.com/", |
3888 | | "rust-ammonia/ammonia/blob/master/README.md", |
3889 | | "CONTRIBUTING.md", |
3890 | | "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md", |
3891 | | ), |
3892 | | ( |
3893 | | "https://github.com/", |
3894 | | "rust-ammonia/ammonia/blob/master/README.md", |
3895 | | "/CONTRIBUTING.md", |
3896 | | "https://github.com/CONTRIBUTING.md", |
3897 | | ), |
3898 | | ]; |
3899 | | for (root, path, url, result) in tests { |
3900 | | let h = format!(r#"<a href="{url}">test</a>"#); |
3901 | | let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#); |
3902 | | let a = Builder::new() |
3903 | | .url_relative(UrlRelative::RewriteWithRoot { |
3904 | | root: Url::parse(root).unwrap(), |
3905 | | path: path.to_string(), |
3906 | | }) |
3907 | | .clean(&h) |
3908 | | .to_string(); |
3909 | | if r != a { |
3910 | | println!( |
3911 | | "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}", |
3912 | | r = r |
3913 | | ); |
3914 | | assert_eq!(r, a); |
3915 | | } |
3916 | | } |
3917 | | } |
3918 | | } |