Coverage Report

Created: 2026-03-23 07:13

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/rust/registry/src/index.crates.io-1949cf8c6b5b557f/ammonia-4.1.2/src/lib.rs
Line
Count
Source
1
// Copyright (C) Michael Howell and others
2
// this library is released under the same terms as Rust itself.
3
4
#![deny(unsafe_code)]
5
#![deny(missing_docs)]
6
7
//! Ammonia is a whitelist-based HTML sanitization library. It is designed to
8
//! prevent cross-site scripting, layout breaking, and clickjacking caused
9
//! by untrusted user-provided HTML being mixed into a larger web page.
10
//!
11
//! Ammonia uses [html5ever] to parse and serialize document fragments the same way browsers do,
12
//! so it is extremely resilient to syntactic obfuscation.
13
//!
14
//! Ammonia parses its input exactly according to the HTML5 specification;
15
//! it will not linkify bare URLs, insert line or paragraph breaks, or convert `(C)` into ©.
16
//! If you want that, use a markup processor before running the sanitizer, like [pulldown-cmark].
17
//!
18
//! # Examples
19
//!
20
//! ```
21
//! let result = ammonia::clean(
22
//!     "<b><img src='' onerror=alert('hax')>I'm not trying to XSS you</b>"
23
//! );
24
//! assert_eq!(result, "<b><img src=\"\">I'm not trying to XSS you</b>");
25
//! ```
26
//!
27
//! [html5ever]: https://github.com/servo/html5ever "The HTML parser in Servo"
28
//! [pulldown-cmark]: https://github.com/google/pulldown-cmark "CommonMark parser"
29
30
#[cfg(ammonia_unstable)]
31
pub mod rcdom;
32
33
#[cfg(not(ammonia_unstable))]
34
mod rcdom;
35
36
mod style;
37
38
use html5ever::interface::Attribute;
39
use html5ever::serialize::{serialize, SerializeOpts};
40
use html5ever::tree_builder::{NodeOrText, TreeSink};
41
use html5ever::{driver as html, local_name, ns, QualName};
42
use maplit::{hashmap, hashset};
43
use std::sync::LazyLock;
44
use rcdom::{Handle, NodeData, RcDom, SerializableHandle};
45
use std::borrow::{Borrow, Cow};
46
use std::cell::Cell;
47
use std::cmp::max;
48
use std::collections::{HashMap, HashSet};
49
use std::fmt::{self, Display};
50
use std::io;
51
use std::iter::IntoIterator as IntoIter;
52
use std::mem;
53
use std::rc::Rc;
54
use std::str::FromStr;
55
use tendril::stream::TendrilSink;
56
use tendril::StrTendril;
57
use tendril::{format_tendril, ByteTendril};
58
pub use url::Url;
59
60
use html5ever::buffer_queue::BufferQueue;
61
use html5ever::tokenizer::{Token, TokenSink, TokenSinkResult, Tokenizer};
62
pub use url;
63
64
static AMMONIA: LazyLock<Builder<'static>> = LazyLock::new(Builder::default);
65
66
/// Clean HTML with a conservative set of defaults.
67
///
68
/// * [tags](struct.Builder.html#defaults)
69
/// * [`script` and `style` have their contents stripped](struct.Builder.html#defaults-1)
70
/// * [attributes on specific tags](struct.Builder.html#defaults-2)
71
/// * [attributes on all tags](struct.Builder.html#defaults-6)
72
/// * [url schemes](struct.Builder.html#defaults-7)
73
/// * [relative URLs are passed through, unchanged, by default](struct.Builder.html#defaults-8)
74
/// * [links are marked `noopener noreferrer` by default](struct.Builder.html#defaults-9)
75
/// * all `class=""` settings are blocked by default
76
/// * comments are stripped by default
77
/// * no generic attribute prefixes are turned on by default
78
/// * no specific tag-attribute-value settings are configured by default
79
///
80
/// [opener]: https://mathiasbynens.github.io/rel-noopener/
81
/// [referrer]: https://en.wikipedia.org/wiki/HTTP_referer
82
///
83
/// # Examples
84
///
85
///     assert_eq!(ammonia::clean("XSS<script>attack</script>"), "XSS")
86
0
pub fn clean(src: &str) -> String {
87
0
    AMMONIA.clean(src).to_string()
88
0
}
89
90
/// Turn an arbitrary string into unformatted HTML.
91
///
92
/// This function is roughly equivalent to PHP's `htmlspecialchars` and `htmlentities`.
93
/// It is as strict as possible, encoding every character that has special meaning to the
94
/// HTML parser.
95
///
96
/// # Warnings
97
///
98
/// This function cannot be used to package strings into a `<script>` or `<style>` tag;
99
/// you need a JavaScript or CSS escaper to do that.
100
///
101
///     // DO NOT DO THIS
102
///     # use ammonia::clean_text;
103
///     let untrusted = "Robert\"); abuse();//";
104
///     let html = format!("<script>invoke(\"{}\")</script>", clean_text(untrusted));
105
///
106
/// `<textarea>` tags will strip the first newline, if present, even if that newline is encoded.
107
/// If you want to build an editor that works the way most folks expect them to, you should put a
108
/// newline at the beginning of the tag, like this:
109
///
110
///     # use ammonia::{Builder, clean_text};
111
///     let untrusted = "\n\nhi!";
112
///     let mut b = Builder::new();
113
///     b.add_tags(&["textarea"]);
114
///     // This is the bad version
115
///     // The user put two newlines at the beginning, but the first one was removed
116
///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted))).to_string();
117
///     assert_eq!("<textarea>\nhi!</textarea>", sanitized);
118
///     // This is a good version
119
///     // The user put two newlines at the beginning, and we add a third one,
120
///     // so the result still has two
121
///     let sanitized = b.clean(&format!("<textarea>\n{}</textarea>", clean_text(untrusted))).to_string();
122
///     assert_eq!("<textarea>\n\nhi!</textarea>", sanitized);
123
///     // This version is also often considered good
124
///     // For many applications, leading and trailing whitespace is probably unwanted
125
///     let sanitized = b.clean(&format!("<textarea>{}</textarea>", clean_text(untrusted.trim()))).to_string();
126
///     assert_eq!("<textarea>hi!</textarea>", sanitized);
127
///
128
/// It also does not make user text safe for HTML attribute microsyntaxes such as `class` or `id`.
129
/// Only use this function for places where HTML accepts unrestricted text such as `title` attributes
130
/// and paragraph contents.
131
0
pub fn clean_text(src: &str) -> String {
132
0
    let mut ret_val = String::with_capacity(max(4, src.len()));
133
0
    for c in src.chars() {
134
0
        let replacement = match c {
135
            // this character, when confronted, will start a tag
136
0
            '<' => "&lt;",
137
            // in an unquoted attribute, will end the attribute value
138
0
            '>' => "&gt;",
139
            // in an attribute surrounded by double quotes, this character will end the attribute value
140
0
            '\"' => "&quot;",
141
            // in an attribute surrounded by single quotes, this character will end the attribute value
142
0
            '\'' => "&apos;",
143
            // in HTML5, returns a bogus parse error in an unquoted attribute, while in SGML/HTML, it will end an attribute value surrounded by backquotes
144
0
            '`' => "&grave;",
145
            // in an unquoted attribute, this character will end the attribute
146
0
            '/' => "&#47;",
147
            // starts an entity reference
148
0
            '&' => "&amp;",
149
            // if at the beginning of an unquoted attribute, will get ignored
150
0
            '=' => "&#61;",
151
            // will end an unquoted attribute
152
0
            ' ' => "&#32;",
153
0
            '\t' => "&#9;",
154
0
            '\n' => "&#10;",
155
0
            '\x0c' => "&#12;",
156
0
            '\r' => "&#13;",
157
            // a spec-compliant browser will perform this replacement anyway, but the middleware might not
158
0
            '\0' => "&#65533;",
159
            // ALL OTHER CHARACTERS ARE PASSED THROUGH VERBATIM
160
            _ => {
161
0
                ret_val.push(c);
162
0
                continue;
163
            }
164
        };
165
0
        ret_val.push_str(replacement);
166
    }
167
0
    ret_val
168
0
}
169
170
/// Determine if a given string contains HTML
171
///
172
/// This function is parses the full string into HTML and checks if the input contained any
173
/// HTML syntax.
174
///
175
/// # Note
176
/// This function will return positively for strings that contain invalid HTML syntax like
177
/// `<g>` and even `Vec::<u8>::new()`.
178
0
pub fn is_html(input: &str) -> bool {
179
0
    let santok = SanitizationTokenizer::new();
180
0
    let mut chunk = ByteTendril::new();
181
0
    chunk.push_slice(input.as_bytes());
182
0
    let mut input = BufferQueue::default();
183
0
    input.push_back(chunk.try_reinterpret().unwrap());
184
185
0
    let tok = Tokenizer::new(santok, Default::default());
186
0
    let _ = tok.feed(&mut input);
187
0
    tok.end();
188
0
    tok.sink.was_sanitized.get()
189
0
}
190
191
#[derive(Clone)]
192
struct SanitizationTokenizer {
193
    was_sanitized: Cell<bool>,
194
}
195
196
impl SanitizationTokenizer {
197
0
    pub fn new() -> SanitizationTokenizer {
198
0
        SanitizationTokenizer {
199
0
            was_sanitized: false.into(),
200
0
        }
201
0
    }
202
}
203
204
impl TokenSink for SanitizationTokenizer {
205
    type Handle = ();
206
0
    fn process_token(&self, token: Token, _line_number: u64) -> TokenSinkResult<()> {
207
0
        match token {
208
0
            Token::CharacterTokens(_) | Token::EOFToken | Token::ParseError(_) => {}
209
0
            _ => {
210
0
                self.was_sanitized.set(true);
211
0
            }
212
        }
213
0
        TokenSinkResult::Continue
214
0
    }
215
0
    fn end(&self) {}
216
}
217
218
/// An HTML sanitizer.
219
///
220
/// Given a fragment of HTML, Ammonia will parse it according to the HTML5
221
/// parsing algorithm and sanitize any disallowed tags or attributes. This
222
/// algorithm also takes care of things like unclosed and (some) misnested
223
/// tags.
224
///
225
/// # Examples
226
///
227
///     use ammonia::{Builder, UrlRelative};
228
///
229
///     let a = Builder::default()
230
///         .link_rel(None)
231
///         .url_relative(UrlRelative::PassThrough)
232
///         .clean("<a href=/>test")
233
///         .to_string();
234
///     assert_eq!(
235
///         a,
236
///         "<a href=\"/\">test</a>");
237
///
238
/// # Panics
239
///
240
/// Running [`clean`] or [`clean_from_reader`] may cause a panic if the builder is
241
/// configured with any of these (contradictory) settings:
242
///
243
///  * The `rel` attribute is added to [`generic_attributes`] or the
244
///    [`tag_attributes`] for the `<a>` tag, and [`link_rel`] is not set to `None`.
245
///
246
///    For example, this is going to panic, since [`link_rel`] is set  to
247
///    `Some("noopener noreferrer")` by default,
248
///    and it makes no sense to simultaneously say that the user is allowed to
249
///    set their own `rel` attribute while saying that every link shall be set to
250
///    a particular value:
251
///
252
///    ```should_panic
253
///    use ammonia::Builder;
254
///    use maplit::hashset;
255
///
256
///    # fn main() {
257
///    Builder::default()
258
///        .generic_attributes(hashset!["rel"])
259
///        .clean("");
260
///    # }
261
///    ```
262
///
263
///    This, however, is perfectly valid:
264
///
265
///    ```
266
///    use ammonia::Builder;
267
///    use maplit::hashset;
268
///
269
///    # fn main() {
270
///    Builder::default()
271
///        .generic_attributes(hashset!["rel"])
272
///        .link_rel(None)
273
///        .clean("");
274
///    # }
275
///    ```
276
///
277
///  * The `class` attribute is in [`allowed_classes`] and is in the
278
///    corresponding [`tag_attributes`] or in [`generic_attributes`].
279
///
280
///    This is done both to line up with the treatment of `rel`,
281
///    and to prevent people from accidentally allowing arbitrary
282
///    classes on a particular element.
283
///
284
///    This will panic:
285
///
286
///    ```should_panic
287
///    use ammonia::Builder;
288
///    use maplit::{hashmap, hashset};
289
///
290
///    # fn main() {
291
///    Builder::default()
292
///        .generic_attributes(hashset!["class"])
293
///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
294
///        .clean("");
295
///    # }
296
///    ```
297
///
298
///    This, however, is perfectly valid:
299
///
300
///    ```
301
///    use ammonia::Builder;
302
///    use maplit::{hashmap, hashset};
303
///
304
///    # fn main() {
305
///    Builder::default()
306
///        .allowed_classes(hashmap!["span" => hashset!["hidden"]])
307
///        .clean("");
308
///    # }
309
///    ```
310
///
311
///  * A tag is in either [`tags`] or [`tag_attributes`] while also
312
///    being in [`clean_content_tags`].
313
///
314
///    Both [`tags`] and [`tag_attributes`] are whitelists but
315
///    [`clean_content_tags`] is a blacklist, so it doesn't make sense
316
///    to have the same tag in both.
317
///
318
///    For example, this will panic, since the `aside` tag is in
319
///    [`tags`] by default:
320
///
321
///    ```should_panic
322
///    use ammonia::Builder;
323
///    use maplit::hashset;
324
///
325
///    # fn main() {
326
///    Builder::default()
327
///        .clean_content_tags(hashset!["aside"])
328
///        .clean("");
329
///    # }
330
///    ```
331
///
332
///    This, however, is valid:
333
///
334
///    ```
335
///    use ammonia::Builder;
336
///    use maplit::hashset;
337
///
338
///    # fn main() {
339
///    Builder::default()
340
///        .rm_tags(&["aside"])
341
///        .clean_content_tags(hashset!["aside"])
342
///        .clean("");
343
///    # }
344
///    ```
345
///
346
/// [`clean`]: #method.clean
347
/// [`clean_from_reader`]: #method.clean_from_reader
348
/// [`generic_attributes`]: #method.generic_attributes
349
/// [`tag_attributes`]: #method.tag_attributes
350
/// [`generic_attributes`]: #method.generic_attributes
351
/// [`link_rel`]: #method.link_rel
352
/// [`allowed_classes`]: #method.allowed_classes
353
/// [`id_prefix`]: #method.id_prefix
354
/// [`tags`]: #method.tags
355
/// [`clean_content_tags`]: #method.clean_content_tags
356
#[derive(Debug)]
357
pub struct Builder<'a> {
358
    tags: HashSet<&'a str>,
359
    clean_content_tags: HashSet<&'a str>,
360
    tag_attributes: HashMap<&'a str, HashSet<&'a str>>,
361
    tag_attribute_values: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
362
    set_tag_attribute_values: HashMap<&'a str, HashMap<&'a str, &'a str>>,
363
    generic_attributes: HashSet<&'a str>,
364
    url_schemes: HashSet<&'a str>,
365
    url_relative: UrlRelative<'a>,
366
    attribute_filter: Option<Box<dyn AttributeFilter>>,
367
    link_rel: Option<&'a str>,
368
    allowed_classes: HashMap<&'a str, HashSet<&'a str>>,
369
    strip_comments: bool,
370
    id_prefix: Option<&'a str>,
371
    generic_attribute_prefixes: Option<HashSet<&'a str>>,
372
    style_properties: Option<HashSet<&'a str>>,
373
}
374
375
impl<'a> Default for Builder<'a> {
376
0
    fn default() -> Self {
377
        #[rustfmt::skip]
378
0
        let tags = hashset![
379
0
            "a", "abbr", "acronym", "area", "article", "aside", "b", "bdi",
380
0
            "bdo", "blockquote", "br", "caption", "center", "cite", "code",
381
0
            "col", "colgroup", "data", "dd", "del", "details", "dfn", "div",
382
0
            "dl", "dt", "em", "figcaption", "figure", "footer", "h1", "h2",
383
0
            "h3", "h4", "h5", "h6", "header", "hgroup", "hr", "i", "img",
384
0
            "ins", "kbd", "li", "map", "mark", "nav", "ol", "p", "pre",
385
0
            "q", "rp", "rt", "rtc", "ruby", "s", "samp", "small", "span",
386
0
            "strike", "strong", "sub", "summary", "sup", "table", "tbody",
387
0
            "td", "th", "thead", "time", "tr", "tt", "u", "ul", "var", "wbr"
388
        ];
389
0
        let clean_content_tags = hashset!["script", "style"];
390
0
        let generic_attributes = hashset!["lang", "title"];
391
0
        let tag_attributes = hashmap![
392
0
            "a" => hashset![
393
0
                "href", "hreflang"
394
            ],
395
0
            "bdo" => hashset![
396
                "dir"
397
            ],
398
0
            "blockquote" => hashset![
399
                "cite"
400
            ],
401
0
            "col" => hashset![
402
0
                "align", "char", "charoff", "span"
403
            ],
404
0
            "colgroup" => hashset![
405
0
                "align", "char", "charoff", "span"
406
            ],
407
0
            "del" => hashset![
408
0
                "cite", "datetime"
409
            ],
410
0
            "hr" => hashset![
411
0
                "align", "size", "width"
412
            ],
413
0
            "img" => hashset![
414
0
                "align", "alt", "height", "src", "width"
415
            ],
416
0
            "ins" => hashset![
417
0
                "cite", "datetime"
418
            ],
419
0
            "ol" => hashset![
420
                "start"
421
            ],
422
0
            "q" => hashset![
423
                "cite"
424
            ],
425
0
            "table" => hashset![
426
0
                "align", "char", "charoff", "summary"
427
            ],
428
0
            "tbody" => hashset![
429
0
                "align", "char", "charoff"
430
            ],
431
0
            "td" => hashset![
432
0
                "align", "char", "charoff", "colspan", "headers", "rowspan"
433
            ],
434
0
            "tfoot" => hashset![
435
0
                "align", "char", "charoff"
436
            ],
437
0
            "th" => hashset![
438
0
                "align", "char", "charoff", "colspan", "headers", "rowspan", "scope"
439
            ],
440
0
            "thead" => hashset![
441
0
                "align", "char", "charoff"
442
            ],
443
0
            "tr" => hashset![
444
0
                "align", "char", "charoff"
445
            ],
446
        ];
447
0
        let tag_attribute_values = hashmap![];
448
0
        let set_tag_attribute_values = hashmap![];
449
0
        let url_schemes = hashset![
450
            "bitcoin",
451
0
            "ftp",
452
0
            "ftps",
453
0
            "geo",
454
0
            "http",
455
0
            "https",
456
0
            "im",
457
0
            "irc",
458
0
            "ircs",
459
0
            "magnet",
460
0
            "mailto",
461
0
            "mms",
462
0
            "mx",
463
0
            "news",
464
0
            "nntp",
465
0
            "openpgp4fpr",
466
0
            "sip",
467
0
            "sms",
468
0
            "smsto",
469
0
            "ssh",
470
0
            "tel",
471
0
            "url",
472
0
            "webcal",
473
0
            "wtai",
474
0
            "xmpp"
475
        ];
476
0
        let allowed_classes = hashmap![];
477
478
0
        Builder {
479
0
            tags,
480
0
            clean_content_tags,
481
0
            tag_attributes,
482
0
            tag_attribute_values,
483
0
            set_tag_attribute_values,
484
0
            generic_attributes,
485
0
            url_schemes,
486
0
            url_relative: UrlRelative::PassThrough,
487
0
            attribute_filter: None,
488
0
            link_rel: Some("noopener noreferrer"),
489
0
            allowed_classes,
490
0
            strip_comments: true,
491
0
            id_prefix: None,
492
0
            generic_attribute_prefixes: None,
493
0
            style_properties: None,
494
0
        }
495
0
    }
496
}
497
498
impl<'a> Builder<'a> {
499
    /// Sets the tags that are allowed.
500
    ///
501
    /// # Examples
502
    ///
503
    ///     use ammonia::Builder;
504
    ///     use maplit::hashset;
505
    ///
506
    ///     # fn main() {
507
    ///     let tags = hashset!["my-tag"];
508
    ///     let a = Builder::new()
509
    ///         .tags(tags)
510
    ///         .clean("<my-tag>")
511
    ///         .to_string();
512
    ///     assert_eq!(a, "<my-tag></my-tag>");
513
    ///     # }
514
    ///
515
    /// # Defaults
516
    ///
517
    /// ```notest
518
    /// a, abbr, acronym, area, article, aside, b, bdi,
519
    /// bdo, blockquote, br, caption, center, cite, code,
520
    /// col, colgroup, data, dd, del, details, dfn, div,
521
    /// dl, dt, em, figcaption, figure, footer, h1, h2,
522
    /// h3, h4, h5, h6, header, hgroup, hr, i, img,
523
    /// ins, kbd, li, map, mark, nav, ol, p, pre,
524
    /// q, rp, rt, rtc, ruby, s, samp, small, span,
525
    /// strike, strong, sub, summary, sup, table, tbody,
526
    /// td, th, thead, time, tr, tt, u, ul, var, wbr
527
    /// ```
528
0
    pub fn tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
529
0
        self.tags = value;
530
0
        self
531
0
    }
532
533
    /// Add additonal whitelisted tags without overwriting old ones.
534
    ///
535
    /// Does nothing if the tag is already there.
536
    ///
537
    /// # Examples
538
    ///
539
    ///     let a = ammonia::Builder::default()
540
    ///         .add_tags(&["my-tag"])
541
    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
542
    ///     assert_eq!("<my-tag>test</my-tag> <span>mess</span>", a);
543
0
    pub fn add_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
544
0
        &mut self,
545
0
        it: I,
546
0
    ) -> &mut Self {
547
0
        self.tags.extend(it.into_iter().map(Borrow::borrow));
548
0
        self
549
0
    }
550
551
    /// Remove already-whitelisted tags.
552
    ///
553
    /// Does nothing if the tags is already gone.
554
    ///
555
    /// # Examples
556
    ///
557
    ///     let a = ammonia::Builder::default()
558
    ///         .rm_tags(&["span"])
559
    ///         .clean("<span></span>").to_string();
560
    ///     assert_eq!("", a);
561
0
    pub fn rm_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
562
0
        &mut self,
563
0
        it: I,
564
0
    ) -> &mut Self {
565
0
        for i in it {
566
0
            self.tags.remove(i.borrow());
567
0
        }
568
0
        self
569
0
    }
570
571
    /// Returns a copy of the set of whitelisted tags.
572
    ///
573
    /// # Examples
574
    ///
575
    ///     use maplit::hashset;
576
    ///
577
    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
578
    ///
579
    ///     let mut b = ammonia::Builder::default();
580
    ///     b.tags(Clone::clone(&tags));
581
    ///     assert_eq!(tags, b.clone_tags());
582
0
    pub fn clone_tags(&self) -> HashSet<&'a str> {
583
0
        self.tags.clone()
584
0
    }
585
586
    /// Sets the tags whose contents will be completely removed from the output.
587
    ///
588
    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
589
    /// a panic.
590
    ///
591
    /// # Examples
592
    ///
593
    ///     use ammonia::Builder;
594
    ///     use maplit::hashset;
595
    ///
596
    ///     # fn main() {
597
    ///     let tag_blacklist = hashset!["script", "style"];
598
    ///     let a = Builder::new()
599
    ///         .clean_content_tags(tag_blacklist)
600
    ///         .clean("<script>alert('hello')</script><style>a { background: #fff }</style>")
601
    ///         .to_string();
602
    ///     assert_eq!(a, "");
603
    ///     # }
604
    ///
605
    /// # Defaults
606
    ///
607
    /// ```notest
608
    /// script, style
609
    /// ```
610
0
    pub fn clean_content_tags(&mut self, value: HashSet<&'a str>) -> &mut Self {
611
0
        self.clean_content_tags = value;
612
0
        self
613
0
    }
614
615
    /// Add additonal blacklisted clean-content tags without overwriting old ones.
616
    ///
617
    /// Does nothing if the tag is already there.
618
    ///
619
    /// Adding tags which are whitelisted in `tags` or `tag_attributes` will cause
620
    /// a panic.
621
    ///
622
    /// # Examples
623
    ///
624
    ///     let a = ammonia::Builder::default()
625
    ///         .add_clean_content_tags(&["my-tag"])
626
    ///         .clean("<my-tag>test</my-tag><span>mess</span>").to_string();
627
    ///     assert_eq!("<span>mess</span>", a);
628
0
    pub fn add_clean_content_tags<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
629
0
        &mut self,
630
0
        it: I,
631
0
    ) -> &mut Self {
632
0
        self.clean_content_tags
633
0
            .extend(it.into_iter().map(Borrow::borrow));
634
0
        self
635
0
    }
636
637
    /// Remove already-blacklisted clean-content tags.
638
    ///
639
    /// Does nothing if the tags aren't blacklisted.
640
    ///
641
    /// # Examples
642
    ///     use ammonia::Builder;
643
    ///     use maplit::hashset;
644
    ///
645
    ///     # fn main() {
646
    ///     let tag_blacklist = hashset!["script"];
647
    ///     let a = ammonia::Builder::default()
648
    ///         .clean_content_tags(tag_blacklist)
649
    ///         .rm_clean_content_tags(&["script"])
650
    ///         .clean("<script>XSS</script>").to_string();
651
    ///     assert_eq!("XSS", a);
652
    ///     # }
653
0
    pub fn rm_clean_content_tags<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
654
0
        &mut self,
655
0
        it: I,
656
0
    ) -> &mut Self {
657
0
        for i in it {
658
0
            self.clean_content_tags.remove(i.borrow());
659
0
        }
660
0
        self
661
0
    }
662
663
    /// Returns a copy of the set of blacklisted clean-content tags.
664
    ///
665
    /// # Examples
666
    ///     # use maplit::hashset;
667
    ///
668
    ///     let tags = hashset!["my-tag-1", "my-tag-2"];
669
    ///
670
    ///     let mut b = ammonia::Builder::default();
671
    ///     b.clean_content_tags(Clone::clone(&tags));
672
    ///     assert_eq!(tags, b.clone_clean_content_tags());
673
0
    pub fn clone_clean_content_tags(&self) -> HashSet<&'a str> {
674
0
        self.clean_content_tags.clone()
675
0
    }
676
677
    /// Sets the HTML attributes that are allowed on specific tags.
678
    ///
679
    /// The value is structured as a map from tag names to a set of attribute names.
680
    ///
681
    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
682
    ///
683
    /// # Examples
684
    ///
685
    ///     use ammonia::Builder;
686
    ///     use maplit::{hashmap, hashset};
687
    ///
688
    ///     # fn main() {
689
    ///     let tags = hashset!["my-tag"];
690
    ///     let tag_attributes = hashmap![
691
    ///         "my-tag" => hashset!["val"]
692
    ///     ];
693
    ///     let a = Builder::new().tags(tags).tag_attributes(tag_attributes)
694
    ///         .clean("<my-tag val=1>")
695
    ///         .to_string();
696
    ///     assert_eq!(a, "<my-tag val=\"1\"></my-tag>");
697
    ///     # }
698
    ///
699
    /// # Defaults
700
    ///
701
    /// ```notest
702
    /// a =>
703
    ///     href, hreflang
704
    /// bdo =>
705
    ///     dir
706
    /// blockquote =>
707
    ///     cite
708
    /// col =>
709
    ///     align, char, charoff, span
710
    /// colgroup =>
711
    ///     align, char, charoff, span
712
    /// del =>
713
    ///     cite, datetime
714
    /// hr =>
715
    ///     align, size, width
716
    /// img =>
717
    ///     align, alt, height, src, width
718
    /// ins =>
719
    ///     cite, datetime
720
    /// ol =>
721
    ///     start
722
    /// q =>
723
    ///     cite
724
    /// table =>
725
    ///     align, char, charoff, summary
726
    /// tbody =>
727
    ///     align, char, charoff
728
    /// td =>
729
    ///     align, char, charoff, colspan, headers, rowspan
730
    /// tfoot =>
731
    ///     align, char, charoff
732
    /// th =>
733
    ///     align, char, charoff, colspan, headers, rowspan, scope
734
    /// thead =>
735
    ///     align, char, charoff
736
    /// tr =>
737
    ///     align, char, charoff
738
    /// ```
739
0
    pub fn tag_attributes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
740
0
        self.tag_attributes = value;
741
0
        self
742
0
    }
743
744
    /// Add additonal whitelisted tag-specific attributes without overwriting old ones.
745
    ///
746
    /// # Examples
747
    ///
748
    ///     let a = ammonia::Builder::default()
749
    ///         .add_tags(&["my-tag"])
750
    ///         .add_tag_attributes("my-tag", &["my-attr"])
751
    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
752
    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
753
0
    pub fn add_tag_attributes<
754
0
        T: 'a + ?Sized + Borrow<str>,
755
0
        U: 'a + ?Sized + Borrow<str>,
756
0
        I: IntoIter<Item = &'a T>,
757
0
    >(
758
0
        &mut self,
759
0
        tag: &'a U,
760
0
        it: I,
761
0
    ) -> &mut Self {
762
0
        self.tag_attributes
763
0
            .entry(tag.borrow())
764
0
            .or_default()
765
0
            .extend(it.into_iter().map(Borrow::borrow));
766
0
        self
767
0
    }
768
769
    /// Remove already-whitelisted tag-specific attributes.
770
    ///
771
    /// Does nothing if the attribute is already gone.
772
    ///
773
    /// # Examples
774
    ///
775
    ///     let a = ammonia::Builder::default()
776
    ///         .rm_tag_attributes("a", &["href"])
777
    ///         .clean("<a href=\"/\"></a>").to_string();
778
    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
779
0
    pub fn rm_tag_attributes<
780
0
        'b,
781
0
        'c,
782
0
        T: 'b + ?Sized + Borrow<str>,
783
0
        U: 'c + ?Sized + Borrow<str>,
784
0
        I: IntoIter<Item = &'b T>,
785
0
    >(
786
0
        &mut self,
787
0
        tag: &'c U,
788
0
        it: I,
789
0
    ) -> &mut Self {
790
0
        if let Some(tag) = self.tag_attributes.get_mut(tag.borrow()) {
791
0
            for i in it {
792
0
                tag.remove(i.borrow());
793
0
            }
794
0
        }
795
0
        self
796
0
    }
797
798
    /// Returns a copy of the set of whitelisted tag-specific attributes.
799
    ///
800
    /// # Examples
801
    ///     use maplit::{hashmap, hashset};
802
    ///
803
    ///     let tag_attributes = hashmap![
804
    ///         "my-tag" => hashset!["my-attr-1", "my-attr-2"]
805
    ///     ];
806
    ///
807
    ///     let mut b = ammonia::Builder::default();
808
    ///     b.tag_attributes(Clone::clone(&tag_attributes));
809
    ///     assert_eq!(tag_attributes, b.clone_tag_attributes());
810
0
    pub fn clone_tag_attributes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
811
0
        self.tag_attributes.clone()
812
0
    }
813
814
    /// Sets the values of HTML attributes that are allowed on specific tags.
815
    ///
816
    /// The value is structured as a map from tag names to a map from attribute names to a set of
817
    /// attribute values.
818
    ///
819
    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
820
    ///
821
    /// # Examples
822
    ///
823
    ///     use ammonia::Builder;
824
    ///     use maplit::{hashmap, hashset};
825
    ///
826
    ///     # fn main() {
827
    ///     let tags = hashset!["my-tag"];
828
    ///     let tag_attribute_values = hashmap![
829
    ///         "my-tag" => hashmap![
830
    ///             "my-attr" => hashset!["val"],
831
    ///         ],
832
    ///     ];
833
    ///     let a = Builder::new().tags(tags).tag_attribute_values(tag_attribute_values)
834
    ///         .clean("<my-tag my-attr=val>")
835
    ///         .to_string();
836
    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
837
    ///     # }
838
    ///
839
    /// # Defaults
840
    ///
841
    /// None.
842
0
    pub fn tag_attribute_values(
843
0
        &mut self,
844
0
        value: HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>>,
845
0
    ) -> &mut Self {
846
0
        self.tag_attribute_values = value;
847
0
        self
848
0
    }
849
850
    /// Add additonal whitelisted tag-specific attribute values without overwriting old ones.
851
    ///
852
    /// # Examples
853
    ///
854
    ///     let a = ammonia::Builder::default()
855
    ///         .add_tags(&["my-tag"])
856
    ///         .add_tag_attribute_values("my-tag", "my-attr", &[""])
857
    ///         .clean("<my-tag my-attr>test</my-tag> <span>mess</span>").to_string();
858
    ///     assert_eq!("<my-tag my-attr=\"\">test</my-tag> <span>mess</span>", a);
859
0
    pub fn add_tag_attribute_values<
860
0
        T: 'a + ?Sized + Borrow<str>,
861
0
        U: 'a + ?Sized + Borrow<str>,
862
0
        V: 'a + ?Sized + Borrow<str>,
863
0
        I: IntoIter<Item = &'a T>,
864
0
    >(
865
0
        &mut self,
866
0
        tag: &'a U,
867
0
        attribute: &'a V,
868
0
        it: I,
869
0
    ) -> &mut Self {
870
0
        self.tag_attribute_values
871
0
            .entry(tag.borrow())
872
0
            .or_default()
873
0
            .entry(attribute.borrow())
874
0
            .or_default()
875
0
            .extend(it.into_iter().map(Borrow::borrow));
876
877
0
        self
878
0
    }
879
880
    /// Remove already-whitelisted tag-specific attribute values.
881
    ///
882
    /// Does nothing if the attribute or the value is already gone.
883
    ///
884
    /// # Examples
885
    ///
886
    ///     let a = ammonia::Builder::default()
887
    ///         .rm_tag_attributes("a", &["href"])
888
    ///         .add_tag_attribute_values("a", "href", &["/"])
889
    ///         .rm_tag_attribute_values("a", "href", &["/"])
890
    ///         .clean("<a href=\"/\"></a>").to_string();
891
    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
892
0
    pub fn rm_tag_attribute_values<
893
0
        'b,
894
0
        'c,
895
0
        T: 'b + ?Sized + Borrow<str>,
896
0
        U: 'c + ?Sized + Borrow<str>,
897
0
        V: 'c + ?Sized + Borrow<str>,
898
0
        I: IntoIter<Item = &'b T>,
899
0
    >(
900
0
        &mut self,
901
0
        tag: &'c U,
902
0
        attribute: &'c V,
903
0
        it: I,
904
0
    ) -> &mut Self {
905
0
        if let Some(attrs) = self
906
0
            .tag_attribute_values
907
0
            .get_mut(tag.borrow())
908
0
            .and_then(|map| map.get_mut(attribute.borrow()))
909
        {
910
0
            for i in it {
911
0
                attrs.remove(i.borrow());
912
0
            }
913
0
        }
914
0
        self
915
0
    }
916
917
    /// Returns a copy of the set of whitelisted tag-specific attribute values.
918
    ///
919
    /// # Examples
920
    ///
921
    ///     use maplit::{hashmap, hashset};
922
    ///
923
    ///     let attribute_values = hashmap![
924
    ///         "my-attr-1" => hashset!["foo"],
925
    ///         "my-attr-2" => hashset!["baz", "bar"],
926
    ///     ];
927
    ///     let tag_attribute_values = hashmap![
928
    ///         "my-tag" => attribute_values
929
    ///     ];
930
    ///
931
    ///     let mut b = ammonia::Builder::default();
932
    ///     b.tag_attribute_values(Clone::clone(&tag_attribute_values));
933
    ///     assert_eq!(tag_attribute_values, b.clone_tag_attribute_values());
934
0
    pub fn clone_tag_attribute_values(
935
0
        &self,
936
0
    ) -> HashMap<&'a str, HashMap<&'a str, HashSet<&'a str>>> {
937
0
        self.tag_attribute_values.clone()
938
0
    }
939
940
    /// Sets the values of HTML attributes that are to be set on specific tags.
941
    ///
942
    /// The value is structured as a map from tag names to a map from attribute names to an
943
    /// attribute value.
944
    ///
945
    /// If a tag is not itself whitelisted, adding entries to this map will do nothing.
946
    ///
947
    /// # Examples
948
    ///
949
    ///     use ammonia::Builder;
950
    ///     use maplit::{hashmap, hashset};
951
    ///
952
    ///     # fn main() {
953
    ///     let tags = hashset!["my-tag"];
954
    ///     let set_tag_attribute_values = hashmap![
955
    ///         "my-tag" => hashmap![
956
    ///             "my-attr" => "val",
957
    ///         ],
958
    ///     ];
959
    ///     let a = Builder::new().tags(tags).set_tag_attribute_values(set_tag_attribute_values)
960
    ///         .clean("<my-tag>")
961
    ///         .to_string();
962
    ///     assert_eq!(a, "<my-tag my-attr=\"val\"></my-tag>");
963
    ///     # }
964
    ///
965
    /// # Defaults
966
    ///
967
    /// None.
968
0
    pub fn set_tag_attribute_values(
969
0
        &mut self,
970
0
        value: HashMap<&'a str, HashMap<&'a str, &'a str>>,
971
0
    ) -> &mut Self {
972
0
        self.set_tag_attribute_values = value;
973
0
        self
974
0
    }
975
976
    /// Add an attribute value to set on a specific element.
977
    ///
978
    /// # Examples
979
    ///
980
    ///     let a = ammonia::Builder::default()
981
    ///         .add_tags(&["my-tag"])
982
    ///         .set_tag_attribute_value("my-tag", "my-attr", "val")
983
    ///         .clean("<my-tag>test</my-tag> <span>mess</span>").to_string();
984
    ///     assert_eq!("<my-tag my-attr=\"val\">test</my-tag> <span>mess</span>", a);
985
0
    pub fn set_tag_attribute_value<
986
0
        T: 'a + ?Sized + Borrow<str>,
987
0
        A: 'a + ?Sized + Borrow<str>,
988
0
        V: 'a + ?Sized + Borrow<str>,
989
0
    >(
990
0
        &mut self,
991
0
        tag: &'a T,
992
0
        attribute: &'a A,
993
0
        value: &'a V,
994
0
    ) -> &mut Self {
995
0
        self.set_tag_attribute_values
996
0
            .entry(tag.borrow())
997
0
            .or_default()
998
0
            .insert(attribute.borrow(), value.borrow());
999
0
        self
1000
0
    }
1001
1002
    /// Remove existing tag-specific attribute values to be set.
1003
    ///
1004
    /// Does nothing if the attribute is already gone.
1005
    ///
1006
    /// # Examples
1007
    ///
1008
    ///     let a = ammonia::Builder::default()
1009
    ///         // this does nothing, since no value is set for this tag attribute yet
1010
    ///         .rm_set_tag_attribute_value("a", "target")
1011
    ///         .set_tag_attribute_value("a", "target", "_blank")
1012
    ///         .rm_set_tag_attribute_value("a", "target")
1013
    ///         .clean("<a href=\"/\"></a>").to_string();
1014
    ///     assert_eq!("<a href=\"/\" rel=\"noopener noreferrer\"></a>", a);
1015
0
    pub fn rm_set_tag_attribute_value<
1016
0
        T: 'a + ?Sized + Borrow<str>,
1017
0
        A: 'a + ?Sized + Borrow<str>,
1018
0
    >(
1019
0
        &mut self,
1020
0
        tag: &'a T,
1021
0
        attribute: &'a A,
1022
0
    ) -> &mut Self {
1023
0
        if let Some(attributes) = self.set_tag_attribute_values.get_mut(tag.borrow()) {
1024
0
            attributes.remove(attribute.borrow());
1025
0
        }
1026
0
        self
1027
0
    }
1028
1029
    /// Returns the value that will be set for the attribute on the element, if any.
1030
    ///
1031
    /// # Examples
1032
    ///
1033
    ///     let mut b = ammonia::Builder::default();
1034
    ///     b.set_tag_attribute_value("a", "target", "_blank");
1035
    ///     let value = b.get_set_tag_attribute_value("a", "target");
1036
    ///     assert_eq!(value, Some("_blank"));
1037
0
    pub fn get_set_tag_attribute_value<
1038
0
        T: 'a + ?Sized + Borrow<str>,
1039
0
        A: 'a + ?Sized + Borrow<str>,
1040
0
    >(
1041
0
        &self,
1042
0
        tag: &'a T,
1043
0
        attribute: &'a A,
1044
0
    ) -> Option<&'a str> {
1045
0
        self.set_tag_attribute_values
1046
0
            .get(tag.borrow())
1047
0
            .and_then(|map| map.get(attribute.borrow()))
1048
0
            .copied()
1049
0
    }
1050
1051
    /// Returns a copy of the set of tag-specific attribute values to be set.
1052
    ///
1053
    /// # Examples
1054
    ///
1055
    ///     use maplit::{hashmap, hashset};
1056
    ///
1057
    ///     let attribute_values = hashmap![
1058
    ///         "my-attr-1" => "foo",
1059
    ///         "my-attr-2" => "bar",
1060
    ///     ];
1061
    ///     let set_tag_attribute_values = hashmap![
1062
    ///         "my-tag" => attribute_values,
1063
    ///     ];
1064
    ///
1065
    ///     let mut b = ammonia::Builder::default();
1066
    ///     b.set_tag_attribute_values(Clone::clone(&set_tag_attribute_values));
1067
    ///     assert_eq!(set_tag_attribute_values, b.clone_set_tag_attribute_values());
1068
0
    pub fn clone_set_tag_attribute_values(&self) -> HashMap<&'a str, HashMap<&'a str, &'a str>> {
1069
0
        self.set_tag_attribute_values.clone()
1070
0
    }
1071
1072
    /// Sets the prefix of attributes that are allowed on any tag.
1073
    ///
1074
    /// # Examples
1075
    ///
1076
    ///     use ammonia::Builder;
1077
    ///     use maplit::hashset;
1078
    ///
1079
    ///     # fn main() {
1080
    ///     let prefixes = hashset!["data-"];
1081
    ///     let a = Builder::new()
1082
    ///         .generic_attribute_prefixes(prefixes)
1083
    ///         .clean("<b data-val=1>")
1084
    ///         .to_string();
1085
    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1086
    ///     # }
1087
    ///
1088
    /// # Defaults
1089
    ///
1090
    /// No attribute prefixes are allowed by default.
1091
0
    pub fn generic_attribute_prefixes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1092
0
        self.generic_attribute_prefixes = Some(value);
1093
0
        self
1094
0
    }
1095
1096
    /// Add additional whitelisted attribute prefix without overwriting old ones.
1097
    ///
1098
    /// # Examples
1099
    ///
1100
    ///     let a = ammonia::Builder::default()
1101
    ///         .add_generic_attribute_prefixes(&["my-"])
1102
    ///         .clean("<span my-attr>mess</span>").to_string();
1103
    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1104
0
    pub fn add_generic_attribute_prefixes<
1105
0
        T: 'a + ?Sized + Borrow<str>,
1106
0
        I: IntoIter<Item = &'a T>,
1107
0
    >(
1108
0
        &mut self,
1109
0
        it: I,
1110
0
    ) -> &mut Self {
1111
0
        self.generic_attribute_prefixes
1112
0
            .get_or_insert_with(HashSet::new)
1113
0
            .extend(it.into_iter().map(Borrow::borrow));
1114
0
        self
1115
0
    }
1116
1117
    /// Remove already-whitelisted attribute prefixes.
1118
    ///
1119
    /// Does nothing if the attribute prefix is already gone.
1120
    ///
1121
    /// # Examples
1122
    ///
1123
    ///     let a = ammonia::Builder::default()
1124
    ///         .add_generic_attribute_prefixes(&["data-", "code-"])
1125
    ///         .rm_generic_attribute_prefixes(&["data-"])
1126
    ///         .clean("<span code-test=\"foo\" data-test=\"cool\"></span>").to_string();
1127
    ///     assert_eq!("<span code-test=\"foo\"></span>", a);
1128
0
    pub fn rm_generic_attribute_prefixes<
1129
0
        'b,
1130
0
        T: 'b + ?Sized + Borrow<str>,
1131
0
        I: IntoIter<Item = &'b T>,
1132
0
    >(
1133
0
        &mut self,
1134
0
        it: I,
1135
0
    ) -> &mut Self {
1136
0
        if let Some(true) = self.generic_attribute_prefixes.as_mut().map(|prefixes| {
1137
0
            for i in it {
1138
0
                let _ = prefixes.remove(i.borrow());
1139
0
            }
1140
0
            prefixes.is_empty()
1141
0
        }) {
1142
0
            self.generic_attribute_prefixes = None;
1143
0
        }
1144
0
        self
1145
0
    }
1146
1147
    /// Returns a copy of the set of whitelisted attribute prefixes.
1148
    ///
1149
    /// # Examples
1150
    ///
1151
    ///     use maplit::hashset;
1152
    ///
1153
    ///     let generic_attribute_prefixes = hashset!["my-prfx-1-", "my-prfx-2-"];
1154
    ///
1155
    ///     let mut b = ammonia::Builder::default();
1156
    ///     b.generic_attribute_prefixes(Clone::clone(&generic_attribute_prefixes));
1157
    ///     assert_eq!(Some(generic_attribute_prefixes), b.clone_generic_attribute_prefixes());
1158
0
    pub fn clone_generic_attribute_prefixes(&self) -> Option<HashSet<&'a str>> {
1159
0
        self.generic_attribute_prefixes.clone()
1160
0
    }
1161
1162
    /// Sets the attributes that are allowed on any tag.
1163
    ///
1164
    /// # Examples
1165
    ///
1166
    ///     use ammonia::Builder;
1167
    ///     use maplit::hashset;
1168
    ///
1169
    ///     # fn main() {
1170
    ///     let attributes = hashset!["data-val"];
1171
    ///     let a = Builder::new()
1172
    ///         .generic_attributes(attributes)
1173
    ///         .clean("<b data-val=1>")
1174
    ///         .to_string();
1175
    ///     assert_eq!(a, "<b data-val=\"1\"></b>");
1176
    ///     # }
1177
    ///
1178
    /// # Defaults
1179
    ///
1180
    /// ```notest
1181
    /// lang, title
1182
    /// ```
1183
0
    pub fn generic_attributes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1184
0
        self.generic_attributes = value;
1185
0
        self
1186
0
    }
1187
1188
    /// Add additonal whitelisted attributes without overwriting old ones.
1189
    ///
1190
    /// # Examples
1191
    ///
1192
    ///     let a = ammonia::Builder::default()
1193
    ///         .add_generic_attributes(&["my-attr"])
1194
    ///         .clean("<span my-attr>mess</span>").to_string();
1195
    ///     assert_eq!("<span my-attr=\"\">mess</span>", a);
1196
0
    pub fn add_generic_attributes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1197
0
        &mut self,
1198
0
        it: I,
1199
0
    ) -> &mut Self {
1200
0
        self.generic_attributes
1201
0
            .extend(it.into_iter().map(Borrow::borrow));
1202
0
        self
1203
0
    }
1204
1205
    /// Remove already-whitelisted attributes.
1206
    ///
1207
    /// Does nothing if the attribute is already gone.
1208
    ///
1209
    /// # Examples
1210
    ///
1211
    ///     let a = ammonia::Builder::default()
1212
    ///         .rm_generic_attributes(&["title"])
1213
    ///         .clean("<span title=\"cool\"></span>").to_string();
1214
    ///     assert_eq!("<span></span>", a);
1215
0
    pub fn rm_generic_attributes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1216
0
        &mut self,
1217
0
        it: I,
1218
0
    ) -> &mut Self {
1219
0
        for i in it {
1220
0
            self.generic_attributes.remove(i.borrow());
1221
0
        }
1222
0
        self
1223
0
    }
1224
1225
    /// Returns a copy of the set of whitelisted attributes.
1226
    ///
1227
    /// # Examples
1228
    ///
1229
    ///     use maplit::hashset;
1230
    ///
1231
    ///     let generic_attributes = hashset!["my-attr-1", "my-attr-2"];
1232
    ///
1233
    ///     let mut b = ammonia::Builder::default();
1234
    ///     b.generic_attributes(Clone::clone(&generic_attributes));
1235
    ///     assert_eq!(generic_attributes, b.clone_generic_attributes());
1236
0
    pub fn clone_generic_attributes(&self) -> HashSet<&'a str> {
1237
0
        self.generic_attributes.clone()
1238
0
    }
1239
1240
    /// Sets the URL schemes permitted on `href` and `src` attributes.
1241
    ///
1242
    /// # Examples
1243
    ///
1244
    ///     use ammonia::Builder;
1245
    ///     use maplit::hashset;
1246
    ///
1247
    ///     # fn main() {
1248
    ///     let url_schemes = hashset![
1249
    ///         "http", "https", "mailto", "magnet"
1250
    ///     ];
1251
    ///     let a = Builder::new().url_schemes(url_schemes)
1252
    ///         .clean("<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&xl=0&dn=zero_len.fil&xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\">zero-length file</a>")
1253
    ///         .to_string();
1254
    ///
1255
    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1256
    ///     // in the cleaned HTML.
1257
    ///     assert_eq!(a,
1258
    ///       "<a href=\"magnet:?xt=urn:ed2k:31D6CFE0D16AE931B73C59D7E0C089C0&amp;xl=0&amp;dn=zero_len.fil&amp;xt=urn:bitprint:3I42H3S6NNFQ2MSVX7XZKYAYSCX5QBYJ.LWPNACQDBZRYXW3VHJVCJ64QBZNGHOHHHZWCLNQ&amp;xt=urn:md5:D41D8CD98F00B204E9800998ECF8427E\" rel=\"noopener noreferrer\">zero-length file</a>");
1259
    ///     # }
1260
    ///
1261
    /// # Defaults
1262
    ///
1263
    /// ```notest
1264
    /// bitcoin, ftp, ftps, geo, http, https, im, irc,
1265
    /// ircs, magnet, mailto, mms, mx, news, nntp,
1266
    /// openpgp4fpr, sip, sms, smsto, ssh, tel, url,
1267
    /// webcal, wtai, xmpp
1268
    /// ```
1269
0
    pub fn url_schemes(&mut self, value: HashSet<&'a str>) -> &mut Self {
1270
0
        self.url_schemes = value;
1271
0
        self
1272
0
    }
1273
1274
    /// Add additonal whitelisted URL schemes without overwriting old ones.
1275
    ///
1276
    /// # Examples
1277
    ///
1278
    ///     let a = ammonia::Builder::default()
1279
    ///         .add_url_schemes(&["my-scheme"])
1280
    ///         .clean("<a href=my-scheme:home>mess</span>").to_string();
1281
    ///     assert_eq!("<a href=\"my-scheme:home\" rel=\"noopener noreferrer\">mess</a>", a);
1282
0
    pub fn add_url_schemes<T: 'a + ?Sized + Borrow<str>, I: IntoIter<Item = &'a T>>(
1283
0
        &mut self,
1284
0
        it: I,
1285
0
    ) -> &mut Self {
1286
0
        self.url_schemes.extend(it.into_iter().map(Borrow::borrow));
1287
0
        self
1288
0
    }
1289
1290
    /// Remove already-whitelisted attributes.
1291
    ///
1292
    /// Does nothing if the attribute is already gone.
1293
    ///
1294
    /// # Examples
1295
    ///
1296
    ///     let a = ammonia::Builder::default()
1297
    ///         .rm_url_schemes(&["ftp"])
1298
    ///         .clean("<a href=\"ftp://ftp.mozilla.org/\"></a>").to_string();
1299
    ///     assert_eq!("<a rel=\"noopener noreferrer\"></a>", a);
1300
0
    pub fn rm_url_schemes<'b, T: 'b + ?Sized + Borrow<str>, I: IntoIter<Item = &'b T>>(
1301
0
        &mut self,
1302
0
        it: I,
1303
0
    ) -> &mut Self {
1304
0
        for i in it {
1305
0
            self.url_schemes.remove(i.borrow());
1306
0
        }
1307
0
        self
1308
0
    }
1309
1310
    /// Returns a copy of the set of whitelisted URL schemes.
1311
    ///
1312
    /// # Examples
1313
    ///     use maplit::hashset;
1314
    ///
1315
    ///     let url_schemes = hashset!["my-scheme-1", "my-scheme-2"];
1316
    ///
1317
    ///     let mut b = ammonia::Builder::default();
1318
    ///     b.url_schemes(Clone::clone(&url_schemes));
1319
    ///     assert_eq!(url_schemes, b.clone_url_schemes());
1320
0
    pub fn clone_url_schemes(&self) -> HashSet<&'a str> {
1321
0
        self.url_schemes.clone()
1322
0
    }
1323
1324
    /// Configures the behavior for relative URLs: pass-through, resolve-with-base, or deny.
1325
    ///
1326
    /// # Examples
1327
    ///
1328
    ///     use ammonia::{Builder, UrlRelative};
1329
    ///
1330
    ///     let a = Builder::new().url_relative(UrlRelative::PassThrough)
1331
    ///         .clean("<a href=/>Home</a>")
1332
    ///         .to_string();
1333
    ///
1334
    ///     // See `link_rel` for information on the rel="noopener noreferrer" attribute
1335
    ///     // in the cleaned HTML.
1336
    ///     assert_eq!(
1337
    ///       a,
1338
    ///       "<a href=\"/\" rel=\"noopener noreferrer\">Home</a>");
1339
    ///
1340
    /// # Defaults
1341
    ///
1342
    /// ```notest
1343
    /// UrlRelative::PassThrough
1344
    /// ```
1345
0
    pub fn url_relative(&mut self, value: UrlRelative<'a>) -> &mut Self {
1346
0
        self.url_relative = value;
1347
0
        self
1348
0
    }
1349
1350
    /// Allows rewriting of all attributes using a callback.
1351
    ///
1352
    /// The callback takes name of the element, attribute and its value.
1353
    /// Returns `None` to remove the attribute, or a value to use.
1354
    ///
1355
    /// Rewriting of attributes with URLs is done before `url_relative()`.
1356
    ///
1357
    /// # Panics
1358
    ///
1359
    /// If more than one callback is set.
1360
    ///
1361
    /// # Examples
1362
    ///
1363
    /// ```rust
1364
    /// use ammonia::Builder;
1365
    /// let a = Builder::new()
1366
    ///     .attribute_filter(|element, attribute, value| {
1367
    ///         match (element, attribute) {
1368
    ///             ("img", "src") => None,
1369
    ///             _ => Some(value.into())
1370
    ///         }
1371
    ///     })
1372
    ///     .link_rel(None)
1373
    ///     .clean("<a href=/><img alt=Home src=foo></a>")
1374
    ///     .to_string();
1375
    /// assert_eq!(a,
1376
    ///     r#"<a href="/"><img alt="Home"></a>"#);
1377
    /// ```
1378
0
    pub fn attribute_filter<'cb, CallbackFn>(&mut self, callback: CallbackFn) -> &mut Self
1379
0
    where
1380
0
        CallbackFn: for<'u> Fn(&str, &str, &'u str) -> Option<Cow<'u, str>> + Send + Sync + 'static,
1381
    {
1382
0
        assert!(
1383
0
            self.attribute_filter.is_none(),
1384
            "attribute_filter can be set only once"
1385
        );
1386
0
        self.attribute_filter = Some(Box::new(callback));
1387
0
        self
1388
0
    }
1389
1390
    /// Returns `true` if the relative URL resolver is set to `Deny`.
1391
    ///
1392
    /// # Examples
1393
    ///
1394
    ///     use ammonia::{Builder, UrlRelative};
1395
    ///     let mut a = Builder::default();
1396
    ///     a.url_relative(UrlRelative::Deny);
1397
    ///     assert!(a.is_url_relative_deny());
1398
    ///     a.url_relative(UrlRelative::PassThrough);
1399
    ///     assert!(!a.is_url_relative_deny());
1400
0
    pub fn is_url_relative_deny(&self) -> bool {
1401
0
        matches!(self.url_relative, UrlRelative::Deny)
1402
0
    }
1403
1404
    /// Returns `true` if the relative URL resolver is set to `PassThrough`.
1405
    ///
1406
    /// # Examples
1407
    ///
1408
    ///     use ammonia::{Builder, UrlRelative};
1409
    ///     let mut a = Builder::default();
1410
    ///     a.url_relative(UrlRelative::Deny);
1411
    ///     assert!(!a.is_url_relative_pass_through());
1412
    ///     a.url_relative(UrlRelative::PassThrough);
1413
    ///     assert!(a.is_url_relative_pass_through());
1414
0
    pub fn is_url_relative_pass_through(&self) -> bool {
1415
0
        matches!(self.url_relative, UrlRelative::PassThrough)
1416
0
    }
1417
1418
    /// Returns `true` if the relative URL resolver is set to `Custom`.
1419
    ///
1420
    /// # Examples
1421
    ///
1422
    ///     use ammonia::{Builder, UrlRelative};
1423
    ///     use std::borrow::Cow;
1424
    ///     fn test(a: &str) -> Option<Cow<str>> { None }
1425
    ///     # fn main() {
1426
    ///     let mut a = Builder::default();
1427
    ///     a.url_relative(UrlRelative::Custom(Box::new(test)));
1428
    ///     assert!(a.is_url_relative_custom());
1429
    ///     a.url_relative(UrlRelative::PassThrough);
1430
    ///     assert!(!a.is_url_relative_custom());
1431
    ///     a.url_relative(UrlRelative::Deny);
1432
    ///     assert!(!a.is_url_relative_custom());
1433
    ///     # }
1434
0
    pub fn is_url_relative_custom(&self) -> bool {
1435
0
        matches!(self.url_relative, UrlRelative::Custom(_))
1436
0
    }
1437
1438
    /// Configures a `rel` attribute that will be added on links.
1439
    ///
1440
    /// If `rel` is in the generic or tag attributes, this must be set to `None`.
1441
    /// Common `rel` values to include:
1442
    ///
1443
    /// * `noopener`: This prevents [a particular type of XSS attack],
1444
    ///   and should usually be turned on for untrusted HTML.
1445
    /// * `noreferrer`: This prevents the browser from [sending the source URL]
1446
    ///   to the website that is linked to.
1447
    /// * `nofollow`: This prevents search engines from [using this link for
1448
    ///   ranking], which disincentivizes spammers.
1449
    ///
1450
    /// To turn on rel-insertion, call this function with a space-separated list.
1451
    /// Ammonia does not parse rel-attributes;
1452
    /// it just puts the given string into the attribute directly.
1453
    ///
1454
    /// [a particular type of XSS attack]: https://mathiasbynens.github.io/rel-noopener/
1455
    /// [sending the source URL]: https://en.wikipedia.org/wiki/HTTP_referer
1456
    /// [using this link for ranking]: https://en.wikipedia.org/wiki/Nofollow
1457
    ///
1458
    /// # Examples
1459
    ///
1460
    ///     use ammonia::Builder;
1461
    ///
1462
    ///     let a = Builder::new().link_rel(None)
1463
    ///         .clean("<a href=https://rust-lang.org/>Rust</a>")
1464
    ///         .to_string();
1465
    ///     assert_eq!(
1466
    ///       a,
1467
    ///       "<a href=\"https://rust-lang.org/\">Rust</a>");
1468
    ///
1469
    /// # Defaults
1470
    ///
1471
    /// ```notest
1472
    /// Some("noopener noreferrer")
1473
    /// ```
1474
0
    pub fn link_rel(&mut self, value: Option<&'a str>) -> &mut Self {
1475
0
        self.link_rel = value;
1476
0
        self
1477
0
    }
1478
1479
    /// Returns the settings for links' `rel` attribute, if one is set.
1480
    ///
1481
    /// # Examples
1482
    ///
1483
    ///     use ammonia::{Builder, UrlRelative};
1484
    ///     let mut a = Builder::default();
1485
    ///     a.link_rel(Some("a b"));
1486
    ///     assert_eq!(a.get_link_rel(), Some("a b"));
1487
0
    pub fn get_link_rel(&self) -> Option<&str> {
1488
0
        self.link_rel
1489
0
    }
1490
1491
    /// Sets the CSS classes that are allowed on specific tags.
1492
    ///
1493
    /// The values is structured as a map from tag names to a set of class names.
1494
    ///
1495
    /// If the `class` attribute is itself whitelisted for a tag, then adding entries to
1496
    /// this map will cause a panic.
1497
    ///
1498
    /// # Examples
1499
    ///
1500
    ///     use ammonia::Builder;
1501
    ///     use maplit::{hashmap, hashset};
1502
    ///
1503
    ///     # fn main() {
1504
    ///     let allowed_classes = hashmap![
1505
    ///         "code" => hashset!["rs", "ex", "c", "cxx", "js"]
1506
    ///     ];
1507
    ///     let a = Builder::new()
1508
    ///         .allowed_classes(allowed_classes)
1509
    ///         .clean("<code class=rs>fn main() {}</code>")
1510
    ///         .to_string();
1511
    ///     assert_eq!(
1512
    ///       a,
1513
    ///       "<code class=\"rs\">fn main() {}</code>");
1514
    ///     # }
1515
    ///
1516
    /// # Defaults
1517
    ///
1518
    /// The set of allowed classes is empty by default.
1519
0
    pub fn allowed_classes(&mut self, value: HashMap<&'a str, HashSet<&'a str>>) -> &mut Self {
1520
0
        self.allowed_classes = value;
1521
0
        self
1522
0
    }
1523
1524
    /// Add additonal whitelisted classes without overwriting old ones.
1525
    ///
1526
    /// # Examples
1527
    ///
1528
    ///     let a = ammonia::Builder::default()
1529
    ///         .add_allowed_classes("a", &["onebox"])
1530
    ///         .clean("<a href=/ class=onebox>mess</span>").to_string();
1531
    ///     assert_eq!("<a href=\"/\" class=\"onebox\" rel=\"noopener noreferrer\">mess</a>", a);
1532
0
    pub fn add_allowed_classes<
1533
0
        T: 'a + ?Sized + Borrow<str>,
1534
0
        U: 'a + ?Sized + Borrow<str>,
1535
0
        I: IntoIter<Item = &'a T>,
1536
0
    >(
1537
0
        &mut self,
1538
0
        tag: &'a U,
1539
0
        it: I,
1540
0
    ) -> &mut Self {
1541
0
        self.allowed_classes
1542
0
            .entry(tag.borrow())
1543
0
            .or_default()
1544
0
            .extend(it.into_iter().map(Borrow::borrow));
1545
0
        self
1546
0
    }
1547
1548
    /// Remove already-whitelisted attributes.
1549
    ///
1550
    /// Does nothing if the attribute is already gone.
1551
    ///
1552
    /// # Examples
1553
    ///
1554
    ///     let a = ammonia::Builder::default()
1555
    ///         .add_allowed_classes("span", &["active"])
1556
    ///         .rm_allowed_classes("span", &["active"])
1557
    ///         .clean("<span class=active>").to_string();
1558
    ///     assert_eq!("<span class=\"\"></span>", a);
1559
0
    pub fn rm_allowed_classes<
1560
0
        'b,
1561
0
        'c,
1562
0
        T: 'b + ?Sized + Borrow<str>,
1563
0
        U: 'c + ?Sized + Borrow<str>,
1564
0
        I: IntoIter<Item = &'b T>,
1565
0
    >(
1566
0
        &mut self,
1567
0
        tag: &'c U,
1568
0
        it: I,
1569
0
    ) -> &mut Self {
1570
0
        if let Some(tag) = self.allowed_classes.get_mut(tag.borrow()) {
1571
0
            for i in it {
1572
0
                tag.remove(i.borrow());
1573
0
            }
1574
0
        }
1575
0
        self
1576
0
    }
1577
1578
    /// Returns a copy of the set of whitelisted class attributes.
1579
    ///
1580
    /// # Examples
1581
    ///
1582
    ///     use maplit::{hashmap, hashset};
1583
    ///
1584
    ///     let allowed_classes = hashmap![
1585
    ///         "my-tag" => hashset!["my-class-1", "my-class-2"]
1586
    ///     ];
1587
    ///
1588
    ///     let mut b = ammonia::Builder::default();
1589
    ///     b.allowed_classes(Clone::clone(&allowed_classes));
1590
    ///     assert_eq!(allowed_classes, b.clone_allowed_classes());
1591
0
    pub fn clone_allowed_classes(&self) -> HashMap<&'a str, HashSet<&'a str>> {
1592
0
        self.allowed_classes.clone()
1593
0
    }
1594
1595
    /// Configures the handling of HTML comments.
1596
    ///
1597
    /// If this option is false, comments will be preserved.
1598
    ///
1599
    /// # Examples
1600
    ///
1601
    ///     use ammonia::Builder;
1602
    ///
1603
    ///     let a = Builder::new().strip_comments(false)
1604
    ///         .clean("<!-- yes -->")
1605
    ///         .to_string();
1606
    ///     assert_eq!(
1607
    ///       a,
1608
    ///       "<!-- yes -->");
1609
    ///
1610
    /// # Defaults
1611
    ///
1612
    /// `true`
1613
0
    pub fn strip_comments(&mut self, value: bool) -> &mut Self {
1614
0
        self.strip_comments = value;
1615
0
        self
1616
0
    }
1617
1618
    /// Returns `true` if comment stripping is turned on.
1619
    ///
1620
    /// # Examples
1621
    ///
1622
    ///     let mut a = ammonia::Builder::new();
1623
    ///     a.strip_comments(true);
1624
    ///     assert!(a.will_strip_comments());
1625
    ///     a.strip_comments(false);
1626
    ///     assert!(!a.will_strip_comments());
1627
0
    pub fn will_strip_comments(&self) -> bool {
1628
0
        self.strip_comments
1629
0
    }
1630
1631
    /// Prefixes all "id" attribute values with a given string.  Note that the tag and
1632
    /// attribute themselves must still be whitelisted.
1633
    ///
1634
    /// # Examples
1635
    ///
1636
    ///     use ammonia::Builder;
1637
    ///     use maplit::hashset;
1638
    ///
1639
    ///     # fn main() {
1640
    ///     let attributes = hashset!["id"];
1641
    ///     let a = Builder::new()
1642
    ///         .generic_attributes(attributes)
1643
    ///         .id_prefix(Some("safe-"))
1644
    ///         .clean("<b id=42>")
1645
    ///         .to_string();
1646
    ///     assert_eq!(a, "<b id=\"safe-42\"></b>");
1647
    ///     # }
1648
1649
    ///
1650
    /// # Defaults
1651
    ///
1652
    /// `None`
1653
0
    pub fn id_prefix(&mut self, value: Option<&'a str>) -> &mut Self {
1654
0
        self.id_prefix = value;
1655
0
        self
1656
0
    }
1657
1658
    /// Only allows the specified properties in `style` attributes.
1659
    ///
1660
    /// Irrelevant if `style` is not an allowed attribute.
1661
    ///
1662
    /// Note that if style filtering is enabled style properties will be normalised e.g.
1663
    /// invalid declarations and @rules will be removed, with only syntactically valid
1664
    /// declarations kept.
1665
    ///
1666
    /// # Examples
1667
    ///
1668
    ///     use ammonia::Builder;
1669
    ///     use maplit::hashset;
1670
    ///
1671
    ///     # fn main() {
1672
    ///     let attributes = hashset!["style"];
1673
    ///     let properties = hashset!["color"];
1674
    ///     let a = Builder::new()
1675
    ///         .generic_attributes(attributes)
1676
    ///         .filter_style_properties(properties)
1677
    ///         .clean("<p style=\"font-weight: heavy; color: red\">my html</p>")
1678
    ///         .to_string();
1679
    ///     assert_eq!(a, "<p style=\"color:red\">my html</p>");
1680
    ///     # }
1681
0
    pub fn filter_style_properties(&mut self, value: HashSet<&'a str>) -> &mut Self {
1682
0
        self.style_properties = Some(value);
1683
0
        self
1684
0
    }
1685
1686
    /// Constructs a [`Builder`] instance configured with the [default options].
1687
    ///
1688
    /// # Examples
1689
    ///
1690
    ///     use ammonia::{Builder, Url, UrlRelative};
1691
    ///     # use std::error::Error;
1692
    ///
1693
    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1694
    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1695
    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1696
    ///
1697
    ///     let result = Builder::new() // <--
1698
    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1699
    ///         .clean(input)
1700
    ///         .to_string();
1701
    ///     assert_eq!(result, output);
1702
    ///     # Ok(())
1703
    ///     # }
1704
    ///     # fn main() { do_main().unwrap() }
1705
    ///
1706
    /// [default options]: fn.clean.html
1707
    /// [`Builder`]: struct.Builder.html
1708
0
    pub fn new() -> Self {
1709
0
        Self::default()
1710
0
    }
1711
1712
    /// Constructs a [`Builder`] instance configured with no allowed tags.
1713
    ///
1714
    /// # Examples
1715
    ///
1716
    ///     use ammonia::{Builder, Url, UrlRelative};
1717
    ///     # use std::error::Error;
1718
    ///
1719
    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1720
    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>empty()</code> function</a>.";
1721
    ///     let output = "This is an Ammonia example using the empty() function.";
1722
    ///
1723
    ///     let result = Builder::empty() // <--
1724
    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1725
    ///         .clean(input)
1726
    ///         .to_string();
1727
    ///     assert_eq!(result, output);
1728
    ///     # Ok(())
1729
    ///     # }
1730
    ///     # fn main() { do_main().unwrap() }
1731
    ///
1732
    /// [default options]: fn.clean.html
1733
    /// [`Builder`]: struct.Builder.html
1734
0
    pub fn empty() -> Self {
1735
0
        Self {
1736
0
            tags: hashset![],
1737
0
            ..Self::default()
1738
0
        }
1739
0
    }
1740
1741
    /// Sanitizes an HTML fragment in a string according to the configured options.
1742
    ///
1743
    /// # Examples
1744
    ///
1745
    ///     use ammonia::{Builder, Url, UrlRelative};
1746
    ///     # use std::error::Error;
1747
    ///
1748
    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1749
    ///     let input = "<!-- comments will be stripped -->This is an <a href=.>Ammonia</a> example using <a href=struct.Builder.html#method.new onclick=xss>the <code onmouseover=xss>new()</code> function</a>.";
1750
    ///     let output = "This is an <a href=\"https://docs.rs/ammonia/1.0/ammonia/\" rel=\"noopener noreferrer\">Ammonia</a> example using <a href=\"https://docs.rs/ammonia/1.0/ammonia/struct.Builder.html#method.new\" rel=\"noopener noreferrer\">the <code>new()</code> function</a>.";
1751
    ///
1752
    ///     let result = Builder::new()
1753
    ///         .url_relative(UrlRelative::RewriteWithBase(Url::parse("https://docs.rs/ammonia/1.0/ammonia/")?))
1754
    ///         .clean(input)
1755
    ///         .to_string(); // <--
1756
    ///     assert_eq!(result, output);
1757
    ///     # Ok(())
1758
    ///     # }
1759
    ///     # fn main() { do_main().unwrap() }
1760
0
    pub fn clean(&self, src: &str) -> Document {
1761
0
        let parser = Self::make_parser();
1762
0
        let dom = parser.one(src);
1763
0
        self.clean_dom(dom)
1764
0
    }
1765
1766
    /// Sanitizes an HTML fragment from a reader according to the configured options.
1767
    ///
1768
    /// The input should be in UTF-8 encoding, otherwise the decoding is lossy, just
1769
    /// like when using [`String::from_utf8_lossy`].
1770
    ///
1771
    /// To avoid consuming the reader, a mutable reference can be passed to this method.
1772
    ///
1773
    /// # Examples
1774
    ///
1775
    ///     use ammonia::Builder;
1776
    ///     # use std::error::Error;
1777
    ///
1778
    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
1779
    ///     let a = Builder::new()
1780
    ///         .clean_from_reader(&b"<!-- no -->"[..])? // notice the `b`
1781
    ///         .to_string();
1782
    ///     assert_eq!(a, "");
1783
    ///     # Ok(()) }
1784
    ///     # fn main() { do_main().unwrap() }
1785
    ///
1786
    /// [`String::from_utf8_lossy`]: https://doc.rust-lang.org/std/string/struct.String.html#method.from_utf8_lossy
1787
0
    pub fn clean_from_reader<R>(&self, mut src: R) -> io::Result<Document>
1788
0
    where
1789
0
        R: io::Read,
1790
    {
1791
0
        let parser = Self::make_parser().from_utf8();
1792
0
        let dom = parser.read_from(&mut src)?;
1793
0
        Ok(self.clean_dom(dom))
1794
0
    }
1795
1796
    /// Clean a post-parsing DOM.
1797
    ///
1798
    /// This is not a public API because RcDom isn't really stable.
1799
    /// We want to be able to take breaking changes to html5ever itself
1800
    /// without having to break Ammonia's API.
1801
0
    fn clean_dom(&self, dom: RcDom) -> Document {
1802
0
        let mut stack = Vec::new();
1803
0
        let mut removed = Vec::new();
1804
0
        let link_rel = self
1805
0
            .link_rel
1806
0
            .map(|link_rel| format_tendril!("{}", link_rel));
1807
0
        if link_rel.is_some() {
1808
0
            assert!(self.generic_attributes.get("rel").is_none());
1809
0
            assert!(self
1810
0
                .tag_attributes
1811
0
                .get("a")
1812
0
                .and_then(|a| a.get("rel"))
1813
0
                .is_none());
1814
0
        }
1815
0
        assert!(self.allowed_classes.is_empty() || !self.generic_attributes.contains("class"));
1816
0
        for tag_name in self.allowed_classes.keys() {
1817
0
            assert!(self
1818
0
                .tag_attributes
1819
0
                .get(tag_name)
1820
0
                .and_then(|a| a.get("class"))
1821
0
                .is_none());
1822
        }
1823
0
        for tag_name in &self.clean_content_tags {
1824
0
            assert!(!self.tags.contains(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tags` at the same time");
1825
0
            assert!(!self.tag_attributes.contains_key(tag_name), "`{tag_name}` appears in `clean_content_tags` and in `tag_attributes` at the same time");
1826
        }
1827
0
        let body = {
1828
0
            let children = dom.document.children.borrow();
1829
0
            children[0].clone()
1830
        };
1831
0
        stack.extend(
1832
0
            mem::take(&mut *body.children.borrow_mut())
1833
0
                .into_iter()
1834
0
                .rev(),
1835
        );
1836
        // This design approach is used to prevent pathological content from producing
1837
        // a stack overflow. The `stack` contains to-be-cleaned nodes, while `remove`,
1838
        // of course, contains nodes that need to be dropped (we can't just drop them,
1839
        // because they could have a very deep child tree).
1840
0
        while let Some(mut node) = stack.pop() {
1841
0
            let parent = node.parent
1842
0
                .replace(None).expect("a node in the DOM will have a parent, except the root, which is not processed")
1843
0
                .upgrade().expect("a node's parent will be pointed to by its parent (or the root pointer), and will not be dropped");
1844
0
            if self.clean_node_content(&node) || !self.check_expected_namespace(&parent, &node) {
1845
0
                removed.push(node);
1846
0
                continue;
1847
0
            }
1848
0
            let pass = self.clean_child(&mut node);
1849
0
            if pass {
1850
0
                self.adjust_node_attributes(&mut node, &link_rel, self.id_prefix);
1851
0
                dom.append(&parent.clone(), NodeOrText::AppendNode(node.clone()));
1852
0
            } else {
1853
0
                for sub in node.children.borrow_mut().iter_mut() {
1854
0
                    sub.parent.replace(Some(Rc::downgrade(&parent)));
1855
0
                }
1856
            }
1857
0
            stack.extend(
1858
0
                mem::take(&mut *node.children.borrow_mut())
1859
0
                    .into_iter()
1860
0
                    .rev(),
1861
            );
1862
0
            if !pass {
1863
0
                removed.push(node);
1864
0
            }
1865
        }
1866
        // Now, imperatively clean up all of the child nodes.
1867
        // Otherwise, we could wind up with a DoS, either caused by a memory leak,
1868
        // or caused by a stack overflow.
1869
0
        while let Some(node) = removed.pop() {
1870
0
            removed.extend_from_slice(&mem::take(&mut *node.children.borrow_mut())[..]);
1871
0
        }
1872
0
        Document(dom)
1873
0
    }
1874
1875
    /// Returns `true` if a node and all its content should be removed.
1876
0
    fn clean_node_content(&self, node: &Handle) -> bool {
1877
0
        match node.data {
1878
            NodeData::Text { .. }
1879
            | NodeData::Comment { .. }
1880
            | NodeData::Doctype { .. }
1881
            | NodeData::Document
1882
0
            | NodeData::ProcessingInstruction { .. } => false,
1883
0
            NodeData::Element { ref name, .. } => self.clean_content_tags.contains(&*name.local),
1884
        }
1885
0
    }
1886
1887
    /// Remove unwanted attributes, and check if the node should be kept or not.
1888
    ///
1889
    /// The root node doesn't need cleaning because we create the root node ourselves,
1890
    /// and it doesn't get serialized, and ... it just exists to give the parser
1891
    /// a context (in this case, a div-like block context).
1892
0
    fn clean_child(&self, child: &mut Handle) -> bool {
1893
0
        match child.data {
1894
0
            NodeData::Text { .. } => true,
1895
0
            NodeData::Comment { .. } => !self.strip_comments,
1896
            NodeData::Doctype { .. }
1897
            | NodeData::Document
1898
0
            | NodeData::ProcessingInstruction { .. } => false,
1899
            NodeData::Element {
1900
0
                ref name,
1901
0
                ref attrs,
1902
                ..
1903
            } => {
1904
0
                if self.tags.contains(&*name.local) {
1905
0
                    let attr_filter = |attr: &html5ever::Attribute| {
1906
0
                        let whitelisted = self.generic_attributes.contains(&*attr.name.local)
1907
0
                            || self.generic_attribute_prefixes.as_ref().map(|prefixes| {
1908
0
                                prefixes.iter().any(|&p| attr.name.local.starts_with(p))
1909
0
                            }) == Some(true)
1910
0
                            || self
1911
0
                                .tag_attributes
1912
0
                                .get(&*name.local)
1913
0
                                .map(|ta| ta.contains(&*attr.name.local))
1914
0
                                == Some(true)
1915
0
                            || self
1916
0
                                .tag_attribute_values
1917
0
                                .get(&*name.local)
1918
0
                                .and_then(|tav| tav.get(&*attr.name.local))
1919
0
                                .map(|vs| {
1920
0
                                    let attr_val = attr.value.to_lowercase();
1921
0
                                    vs.iter().any(|v| v.to_lowercase() == attr_val)
1922
0
                                })
1923
0
                                == Some(true);
1924
0
                        if !whitelisted {
1925
                            // If the class attribute is not whitelisted,
1926
                            // but there is a whitelisted set of allowed_classes,
1927
                            // do not strip out the class attribute.
1928
                            // Banned classes will be filtered later.
1929
0
                            &*attr.name.local == "class"
1930
0
                                && self.allowed_classes.contains_key(&*name.local)
1931
0
                        } else if is_url_attr(&name.local, &attr.name.local) {
1932
0
                            let url = Url::parse(&attr.value);
1933
0
                            if let Ok(url) = url {
1934
0
                                self.url_schemes.contains(url.scheme())
1935
0
                            } else if url == Err(url::ParseError::RelativeUrlWithoutBase) {
1936
0
                                !matches!(self.url_relative, UrlRelative::Deny)
1937
                            } else {
1938
0
                                false
1939
                            }
1940
                        } else {
1941
0
                            true
1942
                        }
1943
0
                    };
1944
0
                    attrs.borrow_mut().retain(attr_filter);
1945
0
                    true
1946
                } else {
1947
0
                    false
1948
                }
1949
            }
1950
        }
1951
0
    }
1952
1953
    // Check for unexpected namespace changes.
1954
    //
1955
    // The issue happens if developers added to the list of allowed tags any
1956
    // tag which is parsed in RCDATA state, PLAINTEXT state or RAWTEXT state,
1957
    // that is:
1958
    //
1959
    // * title
1960
    // * textarea
1961
    // * xmp
1962
    // * iframe
1963
    // * noembed
1964
    // * noframes
1965
    // * plaintext
1966
    // * noscript
1967
    // * style
1968
    // * script
1969
    //
1970
    // An example in the wild is Plume, that allows iframe [1].  So in next
1971
    // examples I'll assume the following policy:
1972
    //
1973
    //     Builder::new()
1974
    //        .add_tags(&["iframe"])
1975
    //
1976
    // In HTML namespace `<iframe>` is parsed specially; that is, its content is
1977
    // treated as text. For instance, the following html:
1978
    //
1979
    //     <iframe><a>test
1980
    //
1981
    // Is parsed into the following DOM tree:
1982
    //
1983
    //     iframe
1984
    //     └─ #text: <a>test
1985
    //
1986
    // So iframe cannot have any children other than a text node.
1987
    //
1988
    // The same is not true, though, in "foreign content"; that is, within
1989
    // <svg> or <math> tags. The following html:
1990
    //
1991
    //     <svg><iframe><a>test
1992
    //
1993
    // is parsed differently:
1994
    //
1995
    //    svg
1996
    //    └─ iframe
1997
    //       └─ a
1998
    //          └─ #text: test
1999
    //
2000
    // So in SVG namespace iframe can have children.
2001
    //
2002
    // Ammonia disallows <svg> but it keeps its content after deleting it. And
2003
    // the parser internally keeps track of the namespace of the element. So
2004
    // assume we have the following snippet:
2005
    //
2006
    //     <svg><iframe><a title="</iframe><img src onerror=alert(1)>">test
2007
    //
2008
    // It is parsed into:
2009
    //
2010
    //     svg
2011
    //     └─ iframe
2012
    //        └─ a title="</iframe><img src onerror=alert(1)>"
2013
    //           └─ #text: test
2014
    //
2015
    // This DOM tree is harmless from ammonia point of view because the piece
2016
    // of code that looks like XSS is in a title attribute. Hence, the
2017
    // resulting "safe" HTML from ammonia would be:
2018
    //
2019
    //     <iframe><a title="</iframe><img src onerror=alert(1)>" rel="noopener
2020
    // noreferrer">test</a></iframe>
2021
    //
2022
    // However, at this point, the information about namespace is lost, which
2023
    // means that the browser will parse this snippet into:
2024
    //
2025
    //     ├─ iframe
2026
    //     │  └─ #text: <a title="
2027
    //     ├─ img src="" onerror="alert(1)"
2028
    //     └─ #text: " rel="noopener noreferrer">test
2029
    //
2030
    // Leading to XSS.
2031
    //
2032
    // To solve this issue, check for unexpected namespace switches after cleanup.
2033
    // Elements which change namespace at an unexpected point are removed.
2034
    // This function returns `true` if `child` should be kept, and `false` if it
2035
    // should be removed.
2036
    //
2037
    // [1]: https://github.com/Plume-org/Plume/blob/main/plume-models/src/safe_string.rs#L21
2038
0
    fn check_expected_namespace(&self, parent: &Handle, child: &Handle) -> bool {
2039
0
        let (parent, child) = match (&parent.data, &child.data) {
2040
0
            (NodeData::Element { name: pn, .. }, NodeData::Element { name: cn, .. }) => (pn, cn),
2041
0
            _ => return true,
2042
        };
2043
        // The only way to switch from html to svg is with the <svg> tag
2044
0
        if parent.ns == ns!(html) && child.ns == ns!(svg) {
2045
0
            child.local == local_name!("svg")
2046
        // The only way to switch from html to mathml is with the <math> tag
2047
0
        } else if parent.ns == ns!(html) && child.ns == ns!(mathml) {
2048
0
            child.local == local_name!("math")
2049
        // The only way to switch from mathml to svg/html is with a text integration point
2050
0
        } else if parent.ns == ns!(mathml) && child.ns != ns!(mathml) {
2051
            // https://html.spec.whatwg.org/#mathml
2052
0
            matches!(
2053
0
                &*parent.local,
2054
0
                "mi" | "mo" | "mn" | "ms" | "mtext" | "annotation-xml"
2055
0
            ) && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2056
        // The only way to switch from svg to mathml/html is with an html integration point
2057
0
        } else if parent.ns == ns!(svg) && child.ns != ns!(svg) {
2058
            // https://html.spec.whatwg.org/#svg-0
2059
0
            matches!(&*parent.local, "foreignObject")
2060
0
                && if child.ns == ns!(html) { is_html_tag(&child.local) } else { true }
2061
0
        } else if child.ns == ns!(svg) {
2062
0
            is_svg_tag(&child.local)
2063
0
        } else if child.ns == ns!(mathml) {
2064
0
            is_mathml_tag(&child.local)
2065
0
        } else if child.ns == ns!(html) {
2066
0
            is_html_tag(&child.local)
2067
        } else {
2068
            // There are no other supported ways to switch namespace
2069
0
            parent.ns == child.ns
2070
        }
2071
0
    }
2072
2073
    /// Add and transform special-cased attributes and elements.
2074
    ///
2075
    /// This function handles:
2076
    ///
2077
    /// * relative URL rewriting
2078
    /// * adding `<a rel>` attributes
2079
    /// * filtering out banned style properties
2080
    /// * filtering out banned classes
2081
0
    fn adjust_node_attributes(
2082
0
        &self,
2083
0
        child: &mut Handle,
2084
0
        link_rel: &Option<StrTendril>,
2085
0
        id_prefix: Option<&'a str>,
2086
0
    ) {
2087
        if let NodeData::Element {
2088
0
            ref name,
2089
0
            ref attrs,
2090
            ..
2091
0
        } = child.data
2092
        {
2093
0
            if let Some(set_attrs) = self.set_tag_attribute_values.get(&*name.local) {
2094
0
                let mut attrs = attrs.borrow_mut();
2095
0
                for (&set_name, &set_value) in set_attrs {
2096
                    // set the value of the attribute if the attribute is already present
2097
0
                    if let Some(attr) = attrs.iter_mut().find(|attr| &*attr.name.local == set_name)
2098
                    {
2099
0
                        if &*attr.value != set_value {
2100
0
                            attr.value = set_value.into();
2101
0
                        }
2102
0
                    } else {
2103
0
                        // otherwise, add the attribute
2104
0
                        let attr = Attribute {
2105
0
                            name: QualName::new(None, ns!(), set_name.into()),
2106
0
                            value: set_value.into(),
2107
0
                        };
2108
0
                        attrs.push(attr);
2109
0
                    }
2110
                }
2111
0
            }
2112
0
            if let Some(ref link_rel) = *link_rel {
2113
0
                if &*name.local == "a" {
2114
0
                    attrs.borrow_mut().push(Attribute {
2115
0
                        name: QualName::new(None, ns!(), local_name!("rel")),
2116
0
                        value: link_rel.clone(),
2117
0
                    })
2118
0
                }
2119
0
            }
2120
0
            if let Some(ref id_prefix) = id_prefix {
2121
0
                for attr in &mut *attrs.borrow_mut() {
2122
0
                    if &attr.name.local == "id" && !attr.value.starts_with(id_prefix) {
2123
0
                        attr.value = format_tendril!("{}{}", id_prefix, attr.value);
2124
0
                    }
2125
                }
2126
0
            }
2127
0
            if let Some(ref attr_filter) = self.attribute_filter {
2128
0
                let mut drop_attrs = Vec::new();
2129
0
                let mut attrs = attrs.borrow_mut();
2130
0
                for (i, attr) in &mut attrs.iter_mut().enumerate() {
2131
0
                    let replace_with = if let Some(new) =
2132
0
                        attr_filter.filter(&name.local, &attr.name.local, &attr.value)
2133
                    {
2134
0
                        if *new != *attr.value {
2135
0
                            Some(format_tendril!("{}", new))
2136
                        } else {
2137
0
                            None // no need to replace the attr if filter returned the same value
2138
                        }
2139
                    } else {
2140
0
                        drop_attrs.push(i);
2141
0
                        None
2142
                    };
2143
0
                    if let Some(replace_with) = replace_with {
2144
0
                        attr.value = replace_with;
2145
0
                    }
2146
                }
2147
0
                for i in drop_attrs.into_iter().rev() {
2148
0
                    attrs.swap_remove(i);
2149
0
                }
2150
0
            }
2151
            {
2152
0
                let mut drop_attrs = Vec::new();
2153
0
                let mut attrs = attrs.borrow_mut();
2154
0
                for (i, attr) in attrs.iter_mut().enumerate() {
2155
0
                    if is_url_attr(&name.local, &attr.name.local) && is_url_relative(&attr.value) {
2156
0
                        let new_value = self.url_relative.evaluate(&attr.value);
2157
0
                        if let Some(new_value) = new_value {
2158
0
                            attr.value = new_value;
2159
0
                        } else {
2160
0
                            drop_attrs.push(i);
2161
0
                        }
2162
0
                    }
2163
                }
2164
                // Swap remove scrambles the vector after the current point.
2165
                // We will not do anything except with items before the current point.
2166
                // The `rev()` is, as such, necessary for correctness.
2167
                // We could use regular `remove(usize)` and a forward iterator,
2168
                // but that's slower.
2169
0
                for i in drop_attrs.into_iter().rev() {
2170
0
                    attrs.swap_remove(i);
2171
0
                }
2172
            }
2173
0
            if let Some(allowed_values) = &self.style_properties {
2174
0
                for attr in &mut *attrs.borrow_mut() {
2175
0
                    if &attr.name.local == "style" {
2176
0
                        attr.value = style::filter_style_attribute(&attr.value, allowed_values).into();
2177
0
                    }
2178
                }
2179
0
            }
2180
0
            if let Some(allowed_values) = self.allowed_classes.get(&*name.local) {
2181
0
                for attr in &mut *attrs.borrow_mut() {
2182
0
                    if &attr.name.local == "class" {
2183
0
                        let mut classes = vec![];
2184
                        // https://html.spec.whatwg.org/#global-attributes:classes-2
2185
0
                        for class in attr.value.split_ascii_whitespace() {
2186
0
                            if allowed_values.contains(class) {
2187
0
                                classes.push(class.to_owned());
2188
0
                            }
2189
                        }
2190
0
                        attr.value = format_tendril!("{}", classes.join(" "));
2191
0
                    }
2192
                }
2193
0
            }
2194
0
        }
2195
0
    }
2196
2197
    /// Initializes an HTML fragment parser.
2198
    ///
2199
    /// Ammonia conforms to the HTML5 fragment parsing rules,
2200
    /// by parsing the given fragment as if it were included in a <div> tag.
2201
0
    fn make_parser() -> html::Parser<RcDom> {
2202
0
        html::parse_fragment(
2203
0
            RcDom::default(),
2204
0
            html::ParseOpts::default(),
2205
0
            QualName::new(None, ns!(html), local_name!("div")),
2206
0
            vec![],
2207
            false,
2208
        )
2209
0
    }
2210
}
2211
2212
/// Given an element name and attribute name, determine if the given attribute contains a URL.
2213
0
fn is_url_attr(element: &str, attr: &str) -> bool {
2214
0
    attr == "href"
2215
0
        || attr == "src"
2216
0
        || (element == "form" && attr == "action")
2217
0
        || (element == "object" && attr == "data")
2218
0
        || ((element == "button" || element == "input") && attr == "formaction")
2219
0
        || (element == "a" && attr == "ping")
2220
0
        || (element == "video" && attr == "poster")
2221
0
}
2222
2223
0
fn is_html_tag(element: &str) -> bool {
2224
0
    (!is_svg_tag(element) && !is_mathml_tag(element))
2225
0
        || matches!(
2226
0
            element,
2227
0
            "title" | "style" | "font" | "a" | "script" | "span"
2228
        )
2229
0
}
2230
2231
/// Given an element name, check if it's SVG
2232
0
fn is_svg_tag(element: &str) -> bool {
2233
    // https://svgwg.org/svg2-draft/eltindex.html
2234
0
    matches!(
2235
0
        element,
2236
0
        "a" | "animate"
2237
0
            | "animateMotion"
2238
0
            | "animateTransform"
2239
0
            | "circle"
2240
0
            | "clipPath"
2241
0
            | "defs"
2242
0
            | "desc"
2243
0
            | "discard"
2244
0
            | "ellipse"
2245
0
            | "feBlend"
2246
0
            | "feColorMatrix"
2247
0
            | "feComponentTransfer"
2248
0
            | "feComposite"
2249
0
            | "feConvolveMatrix"
2250
0
            | "feDiffuseLighting"
2251
0
            | "feDisplacementMap"
2252
0
            | "feDistantLight"
2253
0
            | "feDropShadow"
2254
0
            | "feFlood"
2255
0
            | "feFuncA"
2256
0
            | "feFuncB"
2257
0
            | "feFuncG"
2258
0
            | "feFuncR"
2259
0
            | "feGaussianBlur"
2260
0
            | "feImage"
2261
0
            | "feMerge"
2262
0
            | "feMergeNode"
2263
0
            | "feMorphology"
2264
0
            | "feOffset"
2265
0
            | "fePointLight"
2266
0
            | "feSpecularLighting"
2267
0
            | "feSpotLight"
2268
0
            | "feTile"
2269
0
            | "feTurbulence"
2270
0
            | "filter"
2271
0
            | "foreignObject"
2272
0
            | "g"
2273
0
            | "image"
2274
0
            | "line"
2275
0
            | "linearGradient"
2276
0
            | "marker"
2277
0
            | "mask"
2278
0
            | "metadata"
2279
0
            | "mpath"
2280
0
            | "path"
2281
0
            | "pattern"
2282
0
            | "polygon"
2283
0
            | "polyline"
2284
0
            | "radialGradient"
2285
0
            | "rect"
2286
0
            | "script"
2287
0
            | "set"
2288
0
            | "stop"
2289
0
            | "style"
2290
0
            | "svg"
2291
0
            | "switch"
2292
0
            | "symbol"
2293
0
            | "text"
2294
0
            | "textPath"
2295
0
            | "title"
2296
0
            | "tspan"
2297
0
            | "use"
2298
0
            | "view"
2299
    )
2300
0
}
2301
2302
/// Given an element name, check if it's Math
2303
0
fn is_mathml_tag(element: &str) -> bool {
2304
    // https://svgwg.org/svg2-draft/eltindex.html
2305
0
    matches!(
2306
0
        element,
2307
0
        "abs"
2308
0
            | "and"
2309
0
            | "annotation"
2310
0
            | "annotation-xml"
2311
0
            | "apply"
2312
0
            | "approx"
2313
0
            | "arccos"
2314
0
            | "arccosh"
2315
0
            | "arccot"
2316
0
            | "arccoth"
2317
0
            | "arccsc"
2318
0
            | "arccsch"
2319
0
            | "arcsec"
2320
0
            | "arcsech"
2321
0
            | "arcsin"
2322
0
            | "arcsinh"
2323
0
            | "arctan"
2324
0
            | "arctanh"
2325
0
            | "arg"
2326
0
            | "bind"
2327
0
            | "bvar"
2328
0
            | "card"
2329
0
            | "cartesianproduct"
2330
0
            | "cbytes"
2331
0
            | "ceiling"
2332
0
            | "cerror"
2333
0
            | "ci"
2334
0
            | "cn"
2335
0
            | "codomain"
2336
0
            | "complexes"
2337
0
            | "compose"
2338
0
            | "condition"
2339
0
            | "conjugate"
2340
0
            | "cos"
2341
0
            | "cosh"
2342
0
            | "cot"
2343
0
            | "coth"
2344
0
            | "cs"
2345
0
            | "csc"
2346
0
            | "csch"
2347
0
            | "csymbol"
2348
0
            | "curl"
2349
0
            | "declare"
2350
0
            | "degree"
2351
0
            | "determinant"
2352
0
            | "diff"
2353
0
            | "divergence"
2354
0
            | "divide"
2355
0
            | "domain"
2356
0
            | "domainofapplication"
2357
0
            | "emptyset"
2358
0
            | "eq"
2359
0
            | "equivalent"
2360
0
            | "eulergamma"
2361
0
            | "exists"
2362
0
            | "exp"
2363
0
            | "exponentiale"
2364
0
            | "factorial"
2365
0
            | "factorof"
2366
0
            | "false"
2367
0
            | "floor"
2368
0
            | "fn"
2369
0
            | "forall"
2370
0
            | "gcd"
2371
0
            | "geq"
2372
0
            | "grad"
2373
0
            | "gt"
2374
0
            | "ident"
2375
0
            | "image"
2376
0
            | "imaginary"
2377
0
            | "imaginaryi"
2378
0
            | "implies"
2379
0
            | "in"
2380
0
            | "infinity"
2381
0
            | "int"
2382
0
            | "integers"
2383
0
            | "intersect"
2384
0
            | "interval"
2385
0
            | "inverse"
2386
0
            | "lambda"
2387
0
            | "laplacian"
2388
0
            | "lcm"
2389
0
            | "leq"
2390
0
            | "limit"
2391
0
            | "list"
2392
0
            | "ln"
2393
0
            | "log"
2394
0
            | "logbase"
2395
0
            | "lowlimit"
2396
0
            | "lt"
2397
0
            | "maction"
2398
0
            | "maligngroup"
2399
0
            | "malignmark"
2400
0
            | "math"
2401
0
            | "matrix"
2402
0
            | "matrixrow"
2403
0
            | "max"
2404
0
            | "mean"
2405
0
            | "median"
2406
0
            | "menclose"
2407
0
            | "merror"
2408
0
            | "mfenced"
2409
0
            | "mfrac"
2410
0
            | "mglyph"
2411
0
            | "mi"
2412
0
            | "min"
2413
0
            | "minus"
2414
0
            | "mlabeledtr"
2415
0
            | "mlongdiv"
2416
0
            | "mmultiscripts"
2417
0
            | "mn"
2418
0
            | "mo"
2419
0
            | "mode"
2420
0
            | "moment"
2421
0
            | "momentabout"
2422
0
            | "mover"
2423
0
            | "mpadded"
2424
0
            | "mphantom"
2425
0
            | "mprescripts"
2426
0
            | "mroot"
2427
0
            | "mrow"
2428
0
            | "ms"
2429
0
            | "mscarries"
2430
0
            | "mscarry"
2431
0
            | "msgroup"
2432
0
            | "msline"
2433
0
            | "mspace"
2434
0
            | "msqrt"
2435
0
            | "msrow"
2436
0
            | "mstack"
2437
0
            | "mstyle"
2438
0
            | "msub"
2439
0
            | "msubsup"
2440
0
            | "msup"
2441
0
            | "mtable"
2442
0
            | "mtd"
2443
0
            | "mtext"
2444
0
            | "mtr"
2445
0
            | "munder"
2446
0
            | "munderover"
2447
0
            | "naturalnumbers"
2448
0
            | "neq"
2449
0
            | "none"
2450
0
            | "not"
2451
0
            | "notanumber"
2452
0
            | "notin"
2453
0
            | "notprsubset"
2454
0
            | "notsubset"
2455
0
            | "or"
2456
0
            | "otherwise"
2457
0
            | "outerproduct"
2458
0
            | "partialdiff"
2459
0
            | "pi"
2460
0
            | "piece"
2461
0
            | "piecewise"
2462
0
            | "plus"
2463
0
            | "power"
2464
0
            | "primes"
2465
0
            | "product"
2466
0
            | "prsubset"
2467
0
            | "quotient"
2468
0
            | "rationals"
2469
0
            | "real"
2470
0
            | "reals"
2471
0
            | "reln"
2472
0
            | "rem"
2473
0
            | "root"
2474
0
            | "scalarproduct"
2475
0
            | "sdev"
2476
0
            | "sec"
2477
0
            | "sech"
2478
0
            | "selector"
2479
0
            | "semantics"
2480
0
            | "sep"
2481
0
            | "set"
2482
0
            | "setdiff"
2483
0
            | "share"
2484
0
            | "sin"
2485
0
            | "sinh"
2486
0
            | "span"
2487
0
            | "subset"
2488
0
            | "sum"
2489
0
            | "tan"
2490
0
            | "tanh"
2491
0
            | "tendsto"
2492
0
            | "times"
2493
0
            | "transpose"
2494
0
            | "true"
2495
0
            | "union"
2496
0
            | "uplimit"
2497
0
            | "variance"
2498
0
            | "vector"
2499
0
            | "vectorproduct"
2500
0
            | "xor"
2501
    )
2502
0
}
2503
2504
0
fn is_url_relative(url: &str) -> bool {
2505
0
    matches!(
2506
0
        Url::parse(url),
2507
        Err(url::ParseError::RelativeUrlWithoutBase)
2508
    )
2509
0
}
2510
2511
/// Policy for [relative URLs], that is, URLs that do not specify the scheme in full.
2512
///
2513
/// This policy kicks in, if set, for any attribute named `src` or `href`,
2514
/// as well as the `data` attribute of an `object` tag.
2515
///
2516
/// [relative URLs]: struct.Builder.html#method.url_relative
2517
///
2518
/// # Examples
2519
///
2520
/// ## `Deny`
2521
///
2522
/// * `<a href="test">` is a file-relative URL, and will be removed
2523
/// * `<a href="/test">` is a domain-relative URL, and will be removed
2524
/// * `<a href="//example.com/test">` is a scheme-relative URL, and will be removed
2525
/// * `<a href="http://example.com/test">` is an absolute URL, and will be kept
2526
///
2527
/// ## `PassThrough`
2528
///
2529
/// No changes will be made to any URLs, except if a disallowed scheme is used.
2530
///
2531
/// ## `RewriteWithBase`
2532
///
2533
/// If the base is set to `http://notriddle.com/some-directory/some-file`
2534
///
2535
/// * `<a href="test">` will be rewritten to `<a href="http://notriddle.com/some-directory/test">`
2536
/// * `<a href="/test">` will be rewritten to `<a href="http://notriddle.com/test">`
2537
/// * `<a href="//example.com/test">` will be rewritten to `<a href="http://example.com/test">`
2538
/// * `<a href="http://example.com/test">` is an absolute URL, so it will be kept as-is
2539
///
2540
/// ## `Custom`
2541
///
2542
/// Pass the relative URL to a function.
2543
/// If it returns `Some(string)`, then that one gets used.
2544
/// Otherwise, it will remove the attribute (like `Deny` does).
2545
///
2546
///     use std::borrow::Cow;
2547
///     fn is_absolute_path(url: &str) -> bool {
2548
///         let u = url.as_bytes();
2549
///         // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
2550
///         // `/a/b/c` is an absolute path, and what we want to do stuff to.
2551
///         u.get(0) == Some(&b'/') && u.get(1) != Some(&b'/')
2552
///     }
2553
///     fn evaluate(url: &str) -> Option<Cow<str>> {
2554
///         if is_absolute_path(url) {
2555
///             Some(Cow::Owned(String::from("/root") + url))
2556
///         } else {
2557
///             Some(Cow::Borrowed(url))
2558
///         }
2559
///     }
2560
///     fn main() {
2561
///         let a = ammonia::Builder::new()
2562
///             .url_relative(ammonia::UrlRelative::Custom(Box::new(evaluate)))
2563
///             .clean("<a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
2564
///             .to_string();
2565
///         assert_eq!(a, "<a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
2566
///     }
2567
///
2568
/// This function is only applied to relative URLs.
2569
/// To filter all of the URLs,
2570
/// use the not-yet-implemented Content Security Policy.
2571
#[non_exhaustive]
2572
pub enum UrlRelative<'a> {
2573
    /// Relative URLs will be completely stripped from the document.
2574
    Deny,
2575
    /// Relative URLs will be passed through unchanged.
2576
    PassThrough,
2577
    /// Relative URLs will be changed into absolute URLs, based on this base URL.
2578
    RewriteWithBase(Url),
2579
    /// Force absolute and relative paths into a particular directory.
2580
    ///
2581
    /// Since the resolver does not affect fully-qualified URLs, it doesn't
2582
    /// prevent users from linking wherever they want. This feature only
2583
    /// serves to make content more portable.
2584
    ///
2585
    /// # Examples
2586
    ///
2587
    /// <table>
2588
    /// <thead>
2589
    /// <tr>
2590
    ///     <th>root</th>
2591
    ///     <th>path</th>
2592
    ///     <th>url</th>
2593
    ///     <th>result</th>
2594
    /// </tr>
2595
    /// </thead>
2596
    /// <tbody>
2597
    /// <tr>
2598
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2599
    ///     <td>README.md</td>
2600
    ///     <td></td>
2601
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2602
    /// </tr><tr>
2603
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2604
    ///     <td>README.md</td>
2605
    ///     <td>/</td>
2606
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2607
    /// </tr><tr>
2608
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2609
    ///     <td>README.md</td>
2610
    ///     <td>/CONTRIBUTING.md</td>
2611
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2612
    /// </tr><tr>
2613
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2614
    ///     <td>README.md</td>
2615
    ///     <td></td>
2616
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/README.md</td>
2617
    /// </tr><tr>
2618
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2619
    ///     <td>README.md</td>
2620
    ///     <td>/</td>
2621
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/</td>
2622
    /// </tr><tr>
2623
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master</td>
2624
    ///     <td>README.md</td>
2625
    ///     <td>/CONTRIBUTING.md</td>
2626
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md</td>
2627
    /// </tr><tr>
2628
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2629
    ///     <td></td>
2630
    ///     <td></td>
2631
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2632
    /// </tr><tr>
2633
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2634
    ///     <td></td>
2635
    ///     <td>/</td>
2636
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2637
    /// </tr><tr>
2638
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/</td>
2639
    ///     <td></td>
2640
    ///     <td>/CONTRIBUTING.md</td>
2641
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2642
    /// </tr><tr>
2643
    ///     <td>https://github.com/</td>
2644
    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2645
    ///     <td></td>
2646
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/README.md</td>
2647
    /// </tr><tr>
2648
    ///     <td>https://github.com/</td>
2649
    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2650
    ///     <td>/</td>
2651
    ///     <td>https://github.com/</td>
2652
    /// </tr><tr>
2653
    ///     <td>https://github.com/</td>
2654
    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2655
    ///     <td>CONTRIBUTING.md</td>
2656
    ///     <td>https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md</td>
2657
    /// </tr><tr>
2658
    ///     <td>https://github.com/</td>
2659
    ///     <td>rust-ammonia/ammonia/blob/master/README.md</td>
2660
    ///     <td>/CONTRIBUTING.md</td>
2661
    ///     <td>https://github.com/CONTRIBUTING.md</td>
2662
    /// </tr>
2663
    /// </tbody>
2664
    /// </table>
2665
    RewriteWithRoot {
2666
        /// The URL that is treated as the root by the resolver.
2667
        root: Url,
2668
        /// The "current path" used to resolve relative paths.
2669
        path: String,
2670
    },
2671
    /// Rewrite URLs with a custom function.
2672
    Custom(Box<dyn UrlRelativeEvaluate<'a>>),
2673
}
2674
2675
impl<'a> UrlRelative<'a> {
2676
0
    fn evaluate(&self, url: &str) -> Option<tendril::StrTendril> {
2677
0
        match self {
2678
0
            UrlRelative::RewriteWithBase(ref url_base) => url_base
2679
0
                .join(url)
2680
0
                .ok()
2681
0
                .and_then(|x| StrTendril::from_str(x.as_str()).ok()),
2682
0
            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2683
0
                (match url.as_bytes() {
2684
                    // Scheme-relative URL
2685
0
                    [b'/', b'/', ..] => root.join(url),
2686
                    // Path-absolute URL
2687
0
                    b"/" => root.join("."),
2688
0
                    [b'/', ..] => root.join(&url[1..]),
2689
                    // Path-relative URL
2690
0
                    _ => root.join(path).and_then(|r| r.join(url)),
2691
                })
2692
0
                .ok()
2693
0
                .and_then(|x| StrTendril::from_str(x.as_str()).ok())
2694
            }
2695
0
            UrlRelative::Custom(ref evaluate) => evaluate
2696
0
                .evaluate(url)
2697
0
                .as_ref()
2698
0
                .map(Cow::as_ref)
2699
0
                .map(StrTendril::from_str)
2700
0
                .and_then(Result::ok),
2701
0
            UrlRelative::PassThrough => StrTendril::from_str(url).ok(),
2702
0
            UrlRelative::Deny => None,
2703
        }
2704
0
    }
2705
}
2706
2707
impl<'a> fmt::Debug for UrlRelative<'a> {
2708
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2709
0
        match *self {
2710
0
            UrlRelative::Deny => write!(f, "UrlRelative::Deny"),
2711
0
            UrlRelative::PassThrough => write!(f, "UrlRelative::PassThrough"),
2712
0
            UrlRelative::RewriteWithBase(ref base) => {
2713
0
                write!(f, "UrlRelative::RewriteWithBase({})", base)
2714
            }
2715
0
            UrlRelative::RewriteWithRoot { ref root, ref path } => {
2716
0
                write!(
2717
0
                    f,
2718
                    "UrlRelative::RewriteWithRoot {{ root: {root}, path: {path} }}"
2719
                )
2720
            }
2721
0
            UrlRelative::Custom(_) => write!(f, "UrlRelative::Custom"),
2722
        }
2723
0
    }
2724
}
2725
2726
/// Types that implement this trait can be used to convert a relative URL into an absolute URL.
2727
///
2728
/// This evaluator is only called when the URL is relative; absolute URLs are not evaluated.
2729
///
2730
/// See [`url_relative`][url_relative] for more details.
2731
///
2732
/// [url_relative]: struct.Builder.html#method.url_relative
2733
pub trait UrlRelativeEvaluate<'a>: Send + Sync + 'a {
2734
    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2735
    fn evaluate<'url>(&self, _: &'url str) -> Option<Cow<'url, str>>;
2736
}
2737
impl<'a, T> UrlRelativeEvaluate<'a> for T
2738
where
2739
    T: Fn(&str) -> Option<Cow<'_, str>> + Send + Sync + 'a,
2740
{
2741
0
    fn evaluate<'url>(&self, url: &'url str) -> Option<Cow<'url, str>> {
2742
0
        self(url)
2743
0
    }
2744
}
2745
2746
impl fmt::Debug for dyn AttributeFilter {
2747
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2748
0
        f.write_str("AttributeFilter")
2749
0
    }
2750
}
2751
2752
/// Types that implement this trait can be used to remove or rewrite arbitrary attributes.
2753
///
2754
/// See [`attribute_filter`][attribute_filter] for more details.
2755
///
2756
/// [attribute_filter]: struct.Builder.html#method.attribute_filter
2757
pub trait AttributeFilter: Send + Sync {
2758
    /// Return `None` to remove the attribute. Return `Some(str)` to replace it with a new string.
2759
    fn filter<'a>(&self, _: &str, _: &str, _: &'a str) -> Option<Cow<'a, str>>;
2760
}
2761
2762
impl<T> AttributeFilter for T
2763
where
2764
    T: for<'a> Fn(&str, &str, &'a str) -> Option<Cow<'a, str>> + Send + Sync + 'static,
2765
{
2766
0
    fn filter<'a>(&self, element: &str, attribute: &str, value: &'a str) -> Option<Cow<'a, str>> {
2767
0
        self(element, attribute, value)
2768
0
    }
2769
}
2770
2771
/// A sanitized HTML document.
2772
///
2773
/// The `Document` type is an opaque struct representing an HTML fragment that was sanitized by
2774
/// `ammonia`. It can be converted to a [`String`] or written to a [`Write`] instance. This allows
2775
/// users to avoid buffering the serialized representation to a [`String`] when desired.
2776
///
2777
/// This type is opaque to insulate the caller from breaking changes in the `html5ever` interface.
2778
///
2779
/// Note that this type wraps an `html5ever` DOM tree. `ammonia` does not support streaming, so
2780
/// the complete fragment needs to be stored in memory during processing.
2781
///
2782
/// [`String`]: https://doc.rust-lang.org/nightly/std/string/struct.String.html
2783
/// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2784
///
2785
/// # Examples
2786
///
2787
///     use ammonia::Builder;
2788
///
2789
///     let input = "<!-- comments will be stripped -->This is an Ammonia example.";
2790
///     let output = "This is an Ammonia example.";
2791
///
2792
///     let document = Builder::new()
2793
///         .clean(input);
2794
///     assert_eq!(document.to_string(), output);
2795
pub struct Document(RcDom);
2796
2797
impl Document {
2798
    /// Serializes a `Document` instance to a writer.
2799
    ///
2800
    /// This method writes the sanitized HTML to a [`Write`] instance, avoiding a buffering step.
2801
    ///
2802
    /// To avoid consuming the writer, a mutable reference can be passed, like in the example below.
2803
    ///
2804
    /// Note that the in-memory representation of `Document` is larger than the serialized
2805
    /// `String`.
2806
    ///
2807
    /// [`Write`]: https://doc.rust-lang.org/nightly/std/io/trait.Write.html
2808
    ///
2809
    /// # Examples
2810
    ///
2811
    ///     use ammonia::Builder;
2812
    ///
2813
    ///     let input = "Some <style></style>HTML here";
2814
    ///     let expected = b"Some HTML here";
2815
    ///
2816
    ///     let document = Builder::new()
2817
    ///         .clean(input);
2818
    ///
2819
    ///     let mut sanitized = Vec::new();
2820
    ///     document.write_to(&mut sanitized)
2821
    ///         .expect("Writing to a string should not fail (except on OOM)");
2822
    ///     assert_eq!(sanitized, expected);
2823
0
    pub fn write_to<W>(&self, writer: W) -> io::Result<()>
2824
0
    where
2825
0
        W: io::Write,
2826
    {
2827
0
        let opts = Self::serialize_opts();
2828
0
        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2829
0
        serialize(writer, &inner, opts)
2830
0
    }
2831
2832
    /// Exposes the `Document` instance as an [`rcdom::Handle`].
2833
    ///
2834
    /// This method returns the inner object backing the `Document` instance. This allows
2835
    /// making further changes to the DOM without introducing redundant serialization and
2836
    /// parsing.
2837
    ///
2838
    /// Note that this method should be considered unstable and sits outside of the semver
2839
    /// stability guarantees. It may change, break, or go away at any time, either because
2840
    /// of `html5ever` changes or `ammonia` implementation changes.
2841
    ///
2842
    /// For this method to be accessible, a `cfg` flag is required. The easiest way is to
2843
    /// use the `RUSTFLAGS` environment variable:
2844
    ///
2845
    /// ```text
2846
    /// RUSTFLAGS='--cfg ammonia_unstable' cargo build
2847
    /// ```
2848
    ///
2849
    /// on Unix-like platforms, or
2850
    ///
2851
    /// ```text
2852
    /// set RUSTFLAGS=--cfg ammonia_unstable
2853
    /// cargo build
2854
    /// ```
2855
    ///
2856
    /// on Windows.
2857
    ///
2858
    /// This requirement also applies to crates that transitively depend on crates that use
2859
    /// this flag.
2860
    ///
2861
    /// # Examples
2862
    ///
2863
    ///     use ammonia::Builder;
2864
    ///     use maplit::hashset;
2865
    ///     use html5ever::serialize::{serialize, SerializeOpts};
2866
    ///
2867
    ///     # use std::error::Error;
2868
    ///     # fn do_main() -> Result<(), Box<dyn Error>> {
2869
    ///     let input = "<a>one link</a> and <a>one more</a>";
2870
    ///     let expected = "<a>one more</a> and <a>one link</a>";
2871
    ///
2872
    ///     let document = Builder::new()
2873
    ///         .link_rel(None)
2874
    ///         .clean(input);
2875
    ///
2876
    ///     let mut node = document.to_dom_node();
2877
    ///     node.children.borrow_mut().reverse();
2878
    ///
2879
    ///     let mut buf = Vec::new();
2880
    ///     serialize(&mut buf, &node, SerializeOpts::default())?;
2881
    ///     let output = String::from_utf8(buf)?;
2882
    ///
2883
    ///     assert_eq!(output, expected);
2884
    ///     # Ok(())
2885
    ///     # }
2886
    ///     # fn main() { do_main().unwrap() }
2887
    #[cfg(ammonia_unstable)]
2888
    pub fn to_dom_node(&self) -> Handle {
2889
        self.0.document.children.borrow()[0].clone()
2890
    }
2891
2892
0
    fn serialize_opts() -> SerializeOpts {
2893
0
        SerializeOpts::default()
2894
0
    }
2895
}
2896
2897
impl Clone for Document {
2898
0
    fn clone(&self) -> Self {
2899
0
        let parser = Builder::make_parser();
2900
0
        let dom = parser.one(&self.to_string()[..]);
2901
0
        Document(dom)
2902
0
    }
2903
}
2904
2905
/// Convert a `Document` to stringified HTML.
2906
///
2907
/// Since [`Document`] implements [`Display`], it can be converted to a [`String`] using the
2908
/// standard [`ToString::to_string`] method. This is the simplest way to use `ammonia`.
2909
///
2910
/// [`Document`]: ammonia::Document
2911
/// [`Display`]: std::fmt::Display
2912
/// [`ToString::to_string`]: std::string::ToString
2913
///
2914
/// # Examples
2915
///
2916
///     use ammonia::Builder;
2917
///
2918
///     let input = "Some <style></style>HTML here";
2919
///     let output = "Some HTML here";
2920
///
2921
///     let document = Builder::new()
2922
///         .clean(input);
2923
///     assert_eq!(document.to_string(), output);
2924
impl Display for Document {
2925
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2926
0
        let opts = Self::serialize_opts();
2927
0
        let mut ret_val = Vec::new();
2928
0
        let inner: SerializableHandle = self.0.document.children.borrow()[0].clone().into();
2929
0
        serialize(&mut ret_val, &inner, opts)
2930
0
            .expect("Writing to a string shouldn't fail (expect on OOM)");
2931
0
        String::from_utf8(ret_val)
2932
0
            .expect("html5ever only supports UTF8")
2933
0
            .fmt(f)
2934
0
    }
2935
}
2936
2937
impl fmt::Debug for Document {
2938
0
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2939
0
        write!(f, "Document({})", self)
2940
0
    }
2941
}
2942
2943
impl From<Document> for String {
2944
0
    fn from(document: Document) -> Self {
2945
0
        document.to_string()
2946
0
    }
2947
}
2948
2949
#[cfg(test)]
2950
mod test {
2951
    use super::*;
2952
    #[test]
2953
    fn deeply_nested_whitelisted() {
2954
        clean(&"<b>".repeat(60_000));
2955
    }
2956
    #[test]
2957
    fn deeply_nested_blacklisted() {
2958
        clean(&"<b-b>".repeat(60_000));
2959
    }
2960
    #[test]
2961
    fn deeply_nested_alternating() {
2962
        clean(&"<b-b>".repeat(35_000));
2963
    }
2964
    #[test]
2965
    fn included_angles() {
2966
        let fragment = "1 < 2";
2967
        let result = clean(fragment);
2968
        assert_eq!(result, "1 &lt; 2");
2969
    }
2970
    #[test]
2971
    fn remove_script() {
2972
        let fragment = "an <script>evil()</script> example";
2973
        let result = clean(fragment);
2974
        assert_eq!(result, "an  example");
2975
    }
2976
    #[test]
2977
    fn ignore_link() {
2978
        let fragment = "a <a href=\"http://www.google.com\">good</a> example";
2979
        let expected = "a <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">\
2980
                        good</a> example";
2981
        let result = clean(fragment);
2982
        assert_eq!(result, expected);
2983
    }
2984
    #[test]
2985
    fn remove_unsafe_link() {
2986
        let fragment = "an <a onclick=\"evil()\" href=\"http://www.google.com\">evil</a> example";
2987
        let result = clean(fragment);
2988
        assert_eq!(
2989
            result,
2990
            "an <a href=\"http://www.google.com\" rel=\"noopener noreferrer\">evil</a> example"
2991
        );
2992
    }
2993
    #[test]
2994
    fn remove_js_link() {
2995
        let fragment = "an <a href=\"javascript:evil()\">evil</a> example";
2996
        let result = clean(fragment);
2997
        assert_eq!(result, "an <a rel=\"noopener noreferrer\">evil</a> example");
2998
    }
2999
    #[test]
3000
    fn tag_rebalance() {
3001
        let fragment = "<b>AWESOME!";
3002
        let result = clean(fragment);
3003
        assert_eq!(result, "<b>AWESOME!</b>");
3004
    }
3005
    #[test]
3006
    fn allow_url_relative() {
3007
        let fragment = "<a href=test>Test</a>";
3008
        let result = Builder::new()
3009
            .url_relative(UrlRelative::PassThrough)
3010
            .clean(fragment)
3011
            .to_string();
3012
        assert_eq!(
3013
            result,
3014
            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3015
        );
3016
    }
3017
    #[test]
3018
    fn rewrite_url_relative() {
3019
        let fragment = "<a href=test>Test</a>";
3020
        let result = Builder::new()
3021
            .url_relative(UrlRelative::RewriteWithBase(
3022
                Url::parse("http://example.com/").unwrap(),
3023
            ))
3024
            .clean(fragment)
3025
            .to_string();
3026
        assert_eq!(
3027
            result,
3028
            "<a href=\"http://example.com/test\" rel=\"noopener noreferrer\">Test</a>"
3029
        );
3030
    }
3031
    #[test]
3032
    fn rewrite_url_relative_with_invalid_url() {
3033
        // Reduced from https://github.com/Bauke/ammonia-crash-test
3034
        let fragment = r##"<a href="\\"https://example.com\\"">test</a>"##;
3035
        let result = Builder::new()
3036
            .url_relative(UrlRelative::RewriteWithBase(
3037
                Url::parse("http://example.com/").unwrap(),
3038
            ))
3039
            .clean(fragment)
3040
            .to_string();
3041
        assert_eq!(result, r##"<a rel="noopener noreferrer">test</a>"##);
3042
    }
3043
    #[test]
3044
    fn attribute_filter_nop() {
3045
        let fragment = "<a href=test>Test</a>";
3046
        let result = Builder::new()
3047
            .attribute_filter(|elem, attr, value| {
3048
                assert_eq!("a", elem);
3049
                assert!(
3050
                    matches!(
3051
                        (attr, value),
3052
                        ("href", "test") | ("rel", "noopener noreferrer")
3053
                    ),
3054
                    "{}",
3055
                    value.to_string()
3056
                );
3057
                Some(value.into())
3058
            })
3059
            .clean(fragment)
3060
            .to_string();
3061
        assert_eq!(
3062
            result,
3063
            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3064
        );
3065
    }
3066
3067
    #[test]
3068
    fn attribute_filter_drop() {
3069
        let fragment = "Test<img alt=test src=imgtest>";
3070
        let result = Builder::new()
3071
            .attribute_filter(|elem, attr, value| {
3072
                assert_eq!("img", elem);
3073
                match (attr, value) {
3074
                    ("src", "imgtest") => None,
3075
                    ("alt", "test") => Some(value.into()),
3076
                    _ => panic!("unexpected"),
3077
                }
3078
            })
3079
            .clean(fragment)
3080
            .to_string();
3081
        assert_eq!(result, r#"Test<img alt="test">"#);
3082
    }
3083
3084
    #[test]
3085
    fn url_filter_absolute() {
3086
        let fragment = "Test<img alt=test src=imgtest>";
3087
        let result = Builder::new()
3088
            .attribute_filter(|elem, attr, value| {
3089
                assert_eq!("img", elem);
3090
                match (attr, value) {
3091
                    ("src", "imgtest") => {
3092
                        Some(format!("https://example.com/images/{}", value).into())
3093
                    }
3094
                    ("alt", "test") => None,
3095
                    _ => panic!("unexpected"),
3096
                }
3097
            })
3098
            .url_relative(UrlRelative::RewriteWithBase(
3099
                Url::parse("http://wrong.invalid/").unwrap(),
3100
            ))
3101
            .clean(fragment)
3102
            .to_string();
3103
        assert_eq!(
3104
            result,
3105
            r#"Test<img src="https://example.com/images/imgtest">"#
3106
        );
3107
    }
3108
3109
    #[test]
3110
    fn url_filter_relative() {
3111
        let fragment = "Test<img alt=test src=imgtest>";
3112
        let result = Builder::new()
3113
            .attribute_filter(|elem, attr, value| {
3114
                assert_eq!("img", elem);
3115
                match (attr, value) {
3116
                    ("src", "imgtest") => Some("rewrite".into()),
3117
                    ("alt", "test") => Some("altalt".into()),
3118
                    _ => panic!("unexpected"),
3119
                }
3120
            })
3121
            .url_relative(UrlRelative::RewriteWithBase(
3122
                Url::parse("https://example.com/base/#").unwrap(),
3123
            ))
3124
            .clean(fragment)
3125
            .to_string();
3126
        assert_eq!(
3127
            result,
3128
            r#"Test<img alt="altalt" src="https://example.com/base/rewrite">"#
3129
        );
3130
    }
3131
3132
    #[test]
3133
    fn rewrite_url_relative_no_rel() {
3134
        let fragment = "<a href=test>Test</a>";
3135
        let result = Builder::new()
3136
            .url_relative(UrlRelative::RewriteWithBase(
3137
                Url::parse("http://example.com/").unwrap(),
3138
            ))
3139
            .link_rel(None)
3140
            .clean(fragment)
3141
            .to_string();
3142
        assert_eq!(result, "<a href=\"http://example.com/test\">Test</a>");
3143
    }
3144
    #[test]
3145
    fn deny_url_relative() {
3146
        let fragment = "<a href=test>Test</a>";
3147
        let result = Builder::new()
3148
            .url_relative(UrlRelative::Deny)
3149
            .clean(fragment)
3150
            .to_string();
3151
        assert_eq!(result, "<a rel=\"noopener noreferrer\">Test</a>");
3152
    }
3153
    #[test]
3154
    fn replace_rel() {
3155
        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3156
        let result = Builder::new()
3157
            .url_relative(UrlRelative::PassThrough)
3158
            .clean(fragment)
3159
            .to_string();
3160
        assert_eq!(
3161
            result,
3162
            "<a href=\"test\" rel=\"noopener noreferrer\">Test</a>"
3163
        );
3164
    }
3165
    #[test]
3166
    fn consider_rel_still_banned() {
3167
        let fragment = "<a href=test rel=\"garbage\">Test</a>";
3168
        let result = Builder::new()
3169
            .url_relative(UrlRelative::PassThrough)
3170
            .link_rel(None)
3171
            .clean(fragment)
3172
            .to_string();
3173
        assert_eq!(result, "<a href=\"test\">Test</a>");
3174
    }
3175
    #[test]
3176
    fn object_data() {
3177
        let fragment = "<span data=\"javascript:evil()\">Test</span>\
3178
                        <object data=\"javascript:evil()\"></object>M";
3179
        let expected = r#"<span data="javascript:evil()">Test</span><object></object>M"#;
3180
        let result = Builder::new()
3181
            .tags(hashset!["span", "object"])
3182
            .generic_attributes(hashset!["data"])
3183
            .clean(fragment)
3184
            .to_string();
3185
        assert_eq!(result, expected);
3186
    }
3187
    #[test]
3188
    fn remove_attributes() {
3189
        let fragment = "<table border=\"1\"><tr></tr></table>";
3190
        let result = Builder::new().clean(fragment);
3191
        assert_eq!(
3192
            result.to_string(),
3193
            "<table><tbody><tr></tr></tbody></table>"
3194
        );
3195
    }
3196
    #[test]
3197
    fn quotes_in_attrs() {
3198
        let fragment = "<b title='\"'>contents</b>";
3199
        let result = clean(fragment);
3200
        assert_eq!(result, "<b title=\"&quot;\">contents</b>");
3201
    }
3202
    #[test]
3203
    #[should_panic]
3204
    fn panic_if_rel_is_allowed_and_replaced_generic() {
3205
        Builder::new()
3206
            .link_rel(Some("noopener noreferrer"))
3207
            .generic_attributes(hashset!["rel"])
3208
            .clean("something");
3209
    }
3210
    #[test]
3211
    #[should_panic]
3212
    fn panic_if_rel_is_allowed_and_replaced_a() {
3213
        Builder::new()
3214
            .link_rel(Some("noopener noreferrer"))
3215
            .tag_attributes(hashmap![
3216
                "a" => hashset!["rel"],
3217
            ])
3218
            .clean("something");
3219
    }
3220
    #[test]
3221
    fn no_panic_if_rel_is_allowed_and_replaced_span() {
3222
        Builder::new()
3223
            .link_rel(Some("noopener noreferrer"))
3224
            .tag_attributes(hashmap![
3225
                "span" => hashset!["rel"],
3226
            ])
3227
            .clean("<span rel=\"what\">s</span>");
3228
    }
3229
    #[test]
3230
    fn no_panic_if_rel_is_allowed_and_not_replaced_generic() {
3231
        Builder::new()
3232
            .link_rel(None)
3233
            .generic_attributes(hashset!["rel"])
3234
            .clean("<a rel=\"what\">s</a>");
3235
    }
3236
    #[test]
3237
    fn no_panic_if_rel_is_allowed_and_not_replaced_a() {
3238
        Builder::new()
3239
            .link_rel(None)
3240
            .tag_attributes(hashmap![
3241
                "a" => hashset!["rel"],
3242
            ])
3243
            .clean("<a rel=\"what\">s</a>");
3244
    }
3245
    #[test]
3246
    fn dont_close_void_elements() {
3247
        let fragment = "<br>";
3248
        let result = clean(fragment);
3249
        assert_eq!(result.to_string(), "<br>");
3250
    }
3251
    #[should_panic]
3252
    #[test]
3253
    fn panic_on_allowed_classes_tag_attributes() {
3254
        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3255
        Builder::new()
3256
            .link_rel(None)
3257
            .tag_attributes(hashmap![
3258
                "p" => hashset!["class"],
3259
                "a" => hashset!["class"],
3260
            ])
3261
            .allowed_classes(hashmap![
3262
                "p" => hashset!["foo", "bar"],
3263
                "a" => hashset!["baz"],
3264
            ])
3265
            .clean(fragment);
3266
    }
3267
    #[should_panic]
3268
    #[test]
3269
    fn panic_on_allowed_classes_generic_attributes() {
3270
        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3271
        Builder::new()
3272
            .link_rel(None)
3273
            .generic_attributes(hashset!["class", "href", "some-foo"])
3274
            .allowed_classes(hashmap![
3275
                "p" => hashset!["foo", "bar"],
3276
                "a" => hashset!["baz"],
3277
            ])
3278
            .clean(fragment);
3279
    }
3280
    #[test]
3281
    fn remove_non_allowed_classes() {
3282
        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3283
        let result = Builder::new()
3284
            .link_rel(None)
3285
            .allowed_classes(hashmap![
3286
                "p" => hashset!["foo", "bar"],
3287
                "a" => hashset!["baz"],
3288
            ])
3289
            .clean(fragment);
3290
        assert_eq!(
3291
            result.to_string(),
3292
            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3293
        );
3294
    }
3295
    #[test]
3296
    fn remove_non_allowed_classes_with_tag_class() {
3297
        let fragment = "<p class=\"foo bar\"><a class=\"baz bleh\">Hey</a></p>";
3298
        let result = Builder::new()
3299
            .link_rel(None)
3300
            .tag_attributes(hashmap![
3301
                "div" => hashset!["class"],
3302
            ])
3303
            .allowed_classes(hashmap![
3304
                "p" => hashset!["foo", "bar"],
3305
                "a" => hashset!["baz"],
3306
            ])
3307
            .clean(fragment);
3308
        assert_eq!(
3309
            result.to_string(),
3310
            "<p class=\"foo bar\"><a class=\"baz\">Hey</a></p>"
3311
        );
3312
    }
3313
    #[test]
3314
    fn allowed_classes_ascii_whitespace() {
3315
        // According to https://infra.spec.whatwg.org/#ascii-whitespace,
3316
        // TAB (\t), LF (\n), FF (\x0C), CR (\x0D) and SPACE (\x20) are
3317
        // considered to be ASCII whitespace. Unicode whitespace characters
3318
        // and VT (\x0B) aren't ASCII whitespace.
3319
        let fragment = "<p class=\"a\tb\nc\x0Cd\re f\x0B g\u{2000}\">";
3320
        let result = Builder::new()
3321
            .allowed_classes(hashmap![
3322
                "p" => hashset!["a", "b", "c", "d", "e", "f", "g"],
3323
            ])
3324
            .clean(fragment);
3325
        assert_eq!(result.to_string(), r#"<p class="a b c d e"></p>"#);
3326
    }
3327
    #[test]
3328
    fn remove_non_allowed_attributes_with_tag_attribute_values() {
3329
        let fragment = "<p data-label=\"baz\" name=\"foo\"></p>";
3330
        let result = Builder::new()
3331
            .tag_attribute_values(hashmap![
3332
                "p" => hashmap![
3333
                    "data-label" => hashset!["bar"],
3334
                ],
3335
            ])
3336
            .tag_attributes(hashmap![
3337
                "p" => hashset!["name"],
3338
            ])
3339
            .clean(fragment);
3340
        assert_eq!(result.to_string(), "<p name=\"foo\"></p>",);
3341
    }
3342
    #[test]
3343
    fn keep_allowed_attributes_with_tag_attribute_values() {
3344
        let fragment = "<p data-label=\"bar\" name=\"foo\"></p>";
3345
        let result = Builder::new()
3346
            .tag_attribute_values(hashmap![
3347
                "p" => hashmap![
3348
                    "data-label" => hashset!["bar"],
3349
                ],
3350
            ])
3351
            .tag_attributes(hashmap![
3352
                "p" => hashset!["name"],
3353
            ])
3354
            .clean(fragment);
3355
        assert_eq!(
3356
            result.to_string(),
3357
            "<p data-label=\"bar\" name=\"foo\"></p>",
3358
        );
3359
    }
3360
    #[test]
3361
    fn tag_attribute_values_case_insensitive() {
3362
        let fragment = "<input type=\"CHECKBOX\" name=\"foo\">";
3363
        let result = Builder::new()
3364
            .tags(hashset!["input"])
3365
            .tag_attribute_values(hashmap![
3366
                "input" => hashmap![
3367
                    "type" => hashset!["checkbox"],
3368
                ],
3369
            ])
3370
            .tag_attributes(hashmap![
3371
                "input" => hashset!["name"],
3372
            ])
3373
            .clean(fragment);
3374
        assert_eq!(result.to_string(), "<input type=\"CHECKBOX\" name=\"foo\">",);
3375
    }
3376
    #[test]
3377
    fn set_tag_attribute_values() {
3378
        let fragment = "<a href=\"https://example.com/\">Link</a>";
3379
        let result = Builder::new()
3380
            .link_rel(None)
3381
            .add_tag_attributes("a", &["target"])
3382
            .set_tag_attribute_value("a", "target", "_blank")
3383
            .clean(fragment);
3384
        assert_eq!(
3385
            result.to_string(),
3386
            "<a href=\"https://example.com/\" target=\"_blank\">Link</a>",
3387
        );
3388
    }
3389
    #[test]
3390
    fn update_existing_set_tag_attribute_values() {
3391
        let fragment = "<a target=\"bad\" href=\"https://example.com/\">Link</a>";
3392
        let result = Builder::new()
3393
            .link_rel(None)
3394
            .add_tag_attributes("a", &["target"])
3395
            .set_tag_attribute_value("a", "target", "_blank")
3396
            .clean(fragment);
3397
        assert_eq!(
3398
            result.to_string(),
3399
            "<a target=\"_blank\" href=\"https://example.com/\">Link</a>",
3400
        );
3401
    }
3402
    #[test]
3403
    fn unwhitelisted_set_tag_attribute_values() {
3404
        let fragment = "<span>hi</span><my-elem>";
3405
        let result = Builder::new()
3406
            .set_tag_attribute_value("my-elem", "my-attr", "val")
3407
            .clean(fragment);
3408
        assert_eq!(result.to_string(), "<span>hi</span>",);
3409
    }
3410
    #[test]
3411
    fn remove_entity_link() {
3412
        let fragment = "<a href=\"&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61\
3413
                        &#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29\">Click me!</a>";
3414
        let result = clean(fragment);
3415
        assert_eq!(
3416
            result.to_string(),
3417
            "<a rel=\"noopener noreferrer\">Click me!</a>"
3418
        );
3419
    }
3420
    #[test]
3421
    fn remove_relative_url_evaluate() {
3422
        fn is_absolute_path(url: &str) -> bool {
3423
            let u = url.as_bytes();
3424
            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3425
            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3426
            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3427
        }
3428
        fn is_banned(url: &str) -> bool {
3429
            let u = url.as_bytes();
3430
            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3431
        }
3432
        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3433
            if is_absolute_path(url) {
3434
                Some(Cow::Owned(String::from("/root") + url))
3435
            } else if is_banned(url) {
3436
                None
3437
            } else {
3438
                Some(Cow::Borrowed(url))
3439
            }
3440
        }
3441
        let a = Builder::new()
3442
            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3443
            .clean("<a href=banned>banned</a><a href=/test/path>fixed</a><a href=path>passed</a><a href=http://google.com/>skipped</a>")
3444
            .to_string();
3445
        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a href=\"/root/test/path\" rel=\"noopener noreferrer\">fixed</a><a href=\"path\" rel=\"noopener noreferrer\">passed</a><a href=\"http://google.com/\" rel=\"noopener noreferrer\">skipped</a>");
3446
    }
3447
    #[test]
3448
    fn remove_relative_url_evaluate_b() {
3449
        fn is_absolute_path(url: &str) -> bool {
3450
            let u = url.as_bytes();
3451
            // `//a/b/c` is "protocol-relative", meaning "a" is a hostname
3452
            // `/a/b/c` is an absolute path, and what we want to do stuff to.
3453
            u.first() == Some(&b'/') && u.get(1) != Some(&b'/')
3454
        }
3455
        fn is_banned(url: &str) -> bool {
3456
            let u = url.as_bytes();
3457
            u.first() == Some(&b'b') && u.get(1) == Some(&b'a')
3458
        }
3459
        fn evaluate(url: &str) -> Option<Cow<'_, str>> {
3460
            if is_absolute_path(url) {
3461
                Some(Cow::Owned(String::from("/root") + url))
3462
            } else if is_banned(url) {
3463
                None
3464
            } else {
3465
                Some(Cow::Borrowed(url))
3466
            }
3467
        }
3468
        let a = Builder::new()
3469
            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3470
            .clean("<a href=banned>banned</a><a href=banned title=test>banned</a><a title=test href=banned>banned</a>")
3471
            .to_string();
3472
        assert_eq!(a, "<a rel=\"noopener noreferrer\">banned</a><a rel=\"noopener noreferrer\" title=\"test\">banned</a><a title=\"test\" rel=\"noopener noreferrer\">banned</a>");
3473
    }
3474
    #[test]
3475
    fn remove_relative_url_evaluate_c() {
3476
        // Don't run on absolute URLs.
3477
        fn evaluate(_: &str) -> Option<Cow<'_, str>> {
3478
            return Some(Cow::Owned(String::from("invalid")));
3479
        }
3480
        let a = Builder::new()
3481
            .url_relative(UrlRelative::Custom(Box::new(evaluate)))
3482
            .clean("<a href=\"https://www.google.com/\">google</a>")
3483
            .to_string();
3484
        assert_eq!(
3485
            a,
3486
            "<a href=\"https://www.google.com/\" rel=\"noopener noreferrer\">google</a>"
3487
        );
3488
    }
3489
    #[test]
3490
    fn clean_children_of_bad_element() {
3491
        let fragment = "<bad><evil>a</evil>b</bad>";
3492
        let result = Builder::new().clean(fragment);
3493
        assert_eq!(result.to_string(), "ab");
3494
    }
3495
    #[test]
3496
    fn reader_input() {
3497
        let fragment = b"an <script>evil()</script> example";
3498
        let result = Builder::new().clean_from_reader(&fragment[..]);
3499
        assert!(result.is_ok());
3500
        assert_eq!(result.unwrap().to_string(), "an  example");
3501
    }
3502
    #[test]
3503
    fn reader_non_utf8() {
3504
        let fragment = b"non-utf8 \xF0\x90\x80string";
3505
        let result = Builder::new().clean_from_reader(&fragment[..]);
3506
        assert!(result.is_ok());
3507
        assert_eq!(result.unwrap().to_string(), "non-utf8 \u{fffd}string");
3508
    }
3509
    #[test]
3510
    fn display_impl() {
3511
        let fragment = r#"a <a>link</a>"#;
3512
        let result = Builder::new().link_rel(None).clean(fragment);
3513
        assert_eq!(format!("{}", result), "a <a>link</a>");
3514
    }
3515
    #[test]
3516
    fn debug_impl() {
3517
        let fragment = r#"a <a>link</a>"#;
3518
        let result = Builder::new().link_rel(None).clean(fragment);
3519
        assert_eq!(format!("{:?}", result), "Document(a <a>link</a>)");
3520
    }
3521
    #[cfg(ammonia_unstable)]
3522
    #[test]
3523
    fn to_dom_node() {
3524
        let fragment = r#"a <a>link</a>"#;
3525
        let result = Builder::new().link_rel(None).clean(fragment);
3526
        let _node = result.to_dom_node();
3527
    }
3528
    #[test]
3529
    fn string_from_document() {
3530
        let fragment = r#"a <a>link"#;
3531
        let result = String::from(Builder::new().link_rel(None).clean(fragment));
3532
        assert_eq!(format!("{}", result), "a <a>link</a>");
3533
    }
3534
    fn require_sync<T: Sync>(_: T) {}
3535
    fn require_send<T: Send>(_: T) {}
3536
    #[test]
3537
    fn require_sync_and_send() {
3538
        require_sync(Builder::new());
3539
        require_send(Builder::new());
3540
    }
3541
    #[test]
3542
    fn id_prefixed() {
3543
        let fragment = "<a id=\"hello\"></a><b id=\"hello\"></a>";
3544
        let result = String::from(
3545
            Builder::new()
3546
                .tag_attributes(hashmap![
3547
                    "a" => hashset!["id"],
3548
                ])
3549
                .id_prefix(Some("prefix-"))
3550
                .clean(fragment),
3551
        );
3552
        assert_eq!(
3553
            result.to_string(),
3554
            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a><b></b>"
3555
        );
3556
    }
3557
    #[test]
3558
    fn id_already_prefixed() {
3559
        let fragment = "<a id=\"prefix-hello\"></a>";
3560
        let result = String::from(
3561
            Builder::new()
3562
                .tag_attributes(hashmap![
3563
                    "a" => hashset!["id"],
3564
                ])
3565
                .id_prefix(Some("prefix-"))
3566
                .clean(fragment),
3567
        );
3568
        assert_eq!(
3569
            result.to_string(),
3570
            "<a id=\"prefix-hello\" rel=\"noopener noreferrer\"></a>"
3571
        );
3572
    }
3573
    #[test]
3574
    fn clean_content_tags() {
3575
        let fragment = "<script type=\"text/javascript\"><a>Hello!</a></script>";
3576
        let result = String::from(
3577
            Builder::new()
3578
                .clean_content_tags(hashset!["script"])
3579
                .clean(fragment),
3580
        );
3581
        assert_eq!(result.to_string(), "");
3582
    }
3583
    #[test]
3584
    fn only_clean_content_tags() {
3585
        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3586
        let result = String::from(
3587
            Builder::new()
3588
                .clean_content_tags(hashset!["script"])
3589
                .clean(fragment),
3590
        );
3591
        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3592
    }
3593
    #[test]
3594
    fn clean_removed_default_tag() {
3595
        let fragment = "<em>This is</em><script><a>Hello!</a></script><p>still here!</p>";
3596
        let result = String::from(
3597
            Builder::new()
3598
                .rm_tags(hashset!["a"])
3599
                .rm_tag_attributes("a", hashset!["href", "hreflang"])
3600
                .clean_content_tags(hashset!["script"])
3601
                .clean(fragment),
3602
        );
3603
        assert_eq!(result.to_string(), "<em>This is</em><p>still here!</p>");
3604
    }
3605
    #[test]
3606
    #[should_panic]
3607
    fn panic_on_clean_content_tag_attribute() {
3608
        Builder::new()
3609
            .rm_tags(std::iter::once("a"))
3610
            .clean_content_tags(hashset!["a"])
3611
            .clean("");
3612
    }
3613
    #[test]
3614
    #[should_panic]
3615
    fn panic_on_clean_content_tag() {
3616
        Builder::new().clean_content_tags(hashset!["a"]).clean("");
3617
    }
3618
3619
    #[test]
3620
    fn clean_text_test() {
3621
        assert_eq!(
3622
            clean_text("<this> is <a test function"),
3623
            "&lt;this&gt;&#32;is&#32;&lt;a&#32;test&#32;function"
3624
        );
3625
    }
3626
3627
    #[test]
3628
    fn clean_text_spaces_test() {
3629
        assert_eq!(clean_text("\x09\x0a\x0c\x20"), "&#9;&#10;&#12;&#32;");
3630
    }
3631
3632
    #[test]
3633
    fn ns_svg() {
3634
        // https://github.com/cure53/DOMPurify/pull/495
3635
        let fragment = r##"<svg><iframe><a title="</iframe><img src onerror=alert(1)>">test"##;
3636
        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3637
        assert_eq!(result.to_string(), "");
3638
3639
        let fragment = "<svg><iframe>remove me</iframe></svg><iframe>keep me</iframe>";
3640
        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3641
        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3642
3643
        let fragment = "<svg><a>remove me</a></svg><iframe>keep me</iframe>";
3644
        let result = String::from(Builder::new().add_tags(&["iframe"]).clean(fragment));
3645
        assert_eq!(result.to_string(), "<iframe>keep me</iframe>");
3646
3647
        let fragment = "<svg><a>keep me</a></svg><iframe>keep me</iframe>";
3648
        let result = String::from(Builder::new().add_tags(&["iframe", "svg"]).clean(fragment));
3649
        assert_eq!(
3650
            result.to_string(),
3651
            "<svg><a rel=\"noopener noreferrer\">keep me</a></svg><iframe>keep me</iframe>"
3652
        );
3653
    }
3654
3655
    #[test]
3656
    fn ns_svg_2() {
3657
        let fragment = "<svg><foreignObject><table><path><xmp><!--</xmp><img title'--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3658
        let result =  Builder::default()
3659
            .strip_comments(false)
3660
            .add_tags(&["svg","foreignObject","table","path","xmp"])
3661
            .clean(fragment);
3662
        assert_eq!(
3663
            result.to_string(),
3664
            "<svg><foreignObject><table></table></foreignObject></svg>"
3665
        );
3666
    }
3667
3668
    #[test]
3669
    fn ns_mathml() {
3670
        // https://github.com/cure53/DOMPurify/pull/495
3671
        let fragment = "<mglyph></mglyph>";
3672
        let result = String::from(
3673
            Builder::new()
3674
                .add_tags(&["math", "mtext", "mglyph"])
3675
                .clean(fragment),
3676
        );
3677
        assert_eq!(result.to_string(), "");
3678
        let fragment = "<math><mtext><div><mglyph>";
3679
        let result = String::from(
3680
            Builder::new()
3681
                .add_tags(&["math", "mtext", "mglyph"])
3682
                .clean(fragment),
3683
        );
3684
        assert_eq!(
3685
            result.to_string(),
3686
            "<math><mtext><div></div></mtext></math>"
3687
        );
3688
        let fragment = "<math><mtext><mglyph>";
3689
        let result = String::from(
3690
            Builder::new()
3691
                .add_tags(&["math", "mtext", "mglyph"])
3692
                .clean(fragment),
3693
        );
3694
        assert_eq!(
3695
            result.to_string(),
3696
            "<math><mtext><mglyph></mglyph></mtext></math>"
3697
        );
3698
    }
3699
3700
    #[test]
3701
    fn ns_mathml_2() {
3702
        let fragment = "<math><mtext><table><mglyph><xmp><!--</xmp><img title='--&gt;&lt;img src=1 onerror=alert(1)&gt;'>";
3703
        let result =  Builder::default()
3704
            .strip_comments(false)
3705
            .add_tags(&["math","mtext","table","mglyph","xmp"])
3706
            .clean(fragment);
3707
        assert_eq!(
3708
            result.to_string(),
3709
            "<math><mtext><table></table></mtext></math>"
3710
        );
3711
    }
3712
3713
3714
    #[test]
3715
    fn xml_processing_instruction() {
3716
        // https://blog.slonser.info/posts/dompurify-node-type-confusion/
3717
        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3718
        let result = String::from(Builder::new().clean(fragment));
3719
        assert_eq!(result.to_string(), "");
3720
3721
        let fragment = r##"<svg><?xml-stylesheet src='slonser' ?></svg>"##;
3722
        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3723
        assert_eq!(result.to_string(), "<svg></svg>");
3724
3725
        let fragment = r##"<svg><?xml-stylesheet ><img src=x onerror="alert('Ammonia bypassed!!!')"> ?></svg>"##;
3726
        let result = String::from(Builder::new().add_tags(&["svg"]).clean(fragment));
3727
        assert_eq!(result.to_string(), "<svg></svg><img src=\"x\"> ?&gt;");
3728
    }
3729
3730
    #[test]
3731
    fn generic_attribute_prefixes() {
3732
        let prefix_data = ["data-"];
3733
        let prefix_code = ["code-"];
3734
        let mut b = Builder::new();
3735
        let mut hs: HashSet<&'_ str> = HashSet::new();
3736
        hs.insert("data-");
3737
        assert!(b.generic_attribute_prefixes.is_none());
3738
        b.generic_attribute_prefixes(hs);
3739
        assert!(b.generic_attribute_prefixes.is_some());
3740
        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3741
        b.add_generic_attribute_prefixes(&prefix_data);
3742
        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3743
        b.add_generic_attribute_prefixes(&prefix_code);
3744
        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 2);
3745
        b.rm_generic_attribute_prefixes(&prefix_code);
3746
        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3747
        b.rm_generic_attribute_prefixes(&prefix_code);
3748
        assert_eq!(b.generic_attribute_prefixes.as_ref().unwrap().len(), 1);
3749
        b.rm_generic_attribute_prefixes(&prefix_data);
3750
        assert!(b.generic_attribute_prefixes.is_none());
3751
    }
3752
3753
    #[test]
3754
    fn generic_attribute_prefixes_clean() {
3755
        let fragment = r#"<a data-1 data-2 code-1 code-2><a>Hello!</a></a>"#;
3756
        let result_cleaned = String::from(
3757
            Builder::new()
3758
                .add_tag_attributes("a", &["data-1"])
3759
                .clean(fragment),
3760
        );
3761
        assert_eq!(
3762
            result_cleaned,
3763
            r#"<a data-1="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3764
        );
3765
        let result_allowed = String::from(
3766
            Builder::new()
3767
                .add_tag_attributes("a", &["data-1"])
3768
                .add_generic_attribute_prefixes(&["data-"])
3769
                .clean(fragment),
3770
        );
3771
        assert_eq!(
3772
            result_allowed,
3773
            r#"<a data-1="" data-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3774
        );
3775
        let result_allowed = String::from(
3776
            Builder::new()
3777
                .add_tag_attributes("a", &["data-1", "code-1"])
3778
                .add_generic_attribute_prefixes(&["data-", "code-"])
3779
                .clean(fragment),
3780
        );
3781
        assert_eq!(
3782
            result_allowed,
3783
            r#"<a data-1="" data-2="" code-1="" code-2="" rel="noopener noreferrer"></a><a rel="noopener noreferrer">Hello!</a>"#
3784
        );
3785
    }
3786
    #[test]
3787
    fn lesser_than_isnt_html() {
3788
        let fragment = "1 < 2";
3789
        assert!(!is_html(fragment));
3790
    }
3791
    #[test]
3792
    fn dense_lesser_than_isnt_html() {
3793
        let fragment = "1<2";
3794
        assert!(!is_html(fragment));
3795
    }
3796
    #[test]
3797
    fn what_about_number_elements() {
3798
        let fragment = "foo<2>bar";
3799
        assert!(!is_html(fragment));
3800
    }
3801
    #[test]
3802
    fn turbofish_is_html_sadly() {
3803
        let fragment = "Vec::<u8>::new()";
3804
        assert!(is_html(fragment));
3805
    }
3806
    #[test]
3807
    fn stop_grinning() {
3808
        let fragment = "did you really believe me? <g>";
3809
        assert!(is_html(fragment));
3810
    }
3811
    #[test]
3812
    fn dont_be_bold() {
3813
        let fragment = "<b>";
3814
        assert!(is_html(fragment));
3815
    }
3816
3817
    #[test]
3818
    fn rewrite_with_root() {
3819
        let tests = [
3820
            (
3821
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3822
                "README.md",
3823
                "",
3824
                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3825
            ),
3826
            (
3827
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3828
                "README.md",
3829
                "/",
3830
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3831
            ),
3832
            (
3833
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3834
                "README.md",
3835
                "/CONTRIBUTING.md",
3836
                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3837
            ),
3838
            (
3839
                "https://github.com/rust-ammonia/ammonia/blob/master",
3840
                "README.md",
3841
                "",
3842
                "https://github.com/rust-ammonia/ammonia/blob/README.md",
3843
            ),
3844
            (
3845
                "https://github.com/rust-ammonia/ammonia/blob/master",
3846
                "README.md",
3847
                "/",
3848
                "https://github.com/rust-ammonia/ammonia/blob/",
3849
            ),
3850
            (
3851
                "https://github.com/rust-ammonia/ammonia/blob/master",
3852
                "README.md",
3853
                "/CONTRIBUTING.md",
3854
                "https://github.com/rust-ammonia/ammonia/blob/CONTRIBUTING.md",
3855
            ),
3856
            (
3857
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3858
                "",
3859
                "",
3860
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3861
            ),
3862
            (
3863
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3864
                "",
3865
                "/",
3866
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3867
            ),
3868
            (
3869
                "https://github.com/rust-ammonia/ammonia/blob/master/",
3870
                "",
3871
                "/CONTRIBUTING.md",
3872
                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3873
            ),
3874
            (
3875
                "https://github.com/",
3876
                "rust-ammonia/ammonia/blob/master/README.md",
3877
                "",
3878
                "https://github.com/rust-ammonia/ammonia/blob/master/README.md",
3879
            ),
3880
            (
3881
                "https://github.com/",
3882
                "rust-ammonia/ammonia/blob/master/README.md",
3883
                "/",
3884
                "https://github.com/",
3885
            ),
3886
            (
3887
                "https://github.com/",
3888
                "rust-ammonia/ammonia/blob/master/README.md",
3889
                "CONTRIBUTING.md",
3890
                "https://github.com/rust-ammonia/ammonia/blob/master/CONTRIBUTING.md",
3891
            ),
3892
            (
3893
                "https://github.com/",
3894
                "rust-ammonia/ammonia/blob/master/README.md",
3895
                "/CONTRIBUTING.md",
3896
                "https://github.com/CONTRIBUTING.md",
3897
            ),
3898
        ];
3899
        for (root, path, url, result) in tests {
3900
            let h = format!(r#"<a href="{url}">test</a>"#);
3901
            let r = format!(r#"<a href="{result}" rel="noopener noreferrer">test</a>"#);
3902
            let a = Builder::new()
3903
                .url_relative(UrlRelative::RewriteWithRoot {
3904
                    root: Url::parse(root).unwrap(),
3905
                    path: path.to_string(),
3906
                })
3907
                .clean(&h)
3908
                .to_string();
3909
            if r != a {
3910
                println!(
3911
                    "failed to check ({root}, {path}, {url}, {result})\n{r} != {a}",
3912
                    r = r
3913
                );
3914
                assert_eq!(r, a);
3915
            }
3916
        }
3917
    }
3918
}