Coverage Report

Created: 2026-03-25 07:07

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/pulldown-cmark/fuzz/src/lib.rs
Line
Count
Source
1
//! Fuzzin helper functions.
2
3
use std::convert::TryInto;
4
use std::ptr;
5
6
use anyhow::anyhow;
7
use mozjs::conversions::ToJSValConvertible;
8
use mozjs::jsapi::{
9
    EnterRealm, HandleValueArray, JS_NewGlobalObject, LeaveRealm, OnNewGlobalHookOption,
10
};
11
use mozjs::jsval::UndefinedValue;
12
use mozjs::rooted;
13
use mozjs::rust::wrappers::JS_CallFunctionName;
14
use mozjs::rust::SIMPLE_GLOBAL_CLASS;
15
use mozjs::rust::{JSEngine, RealmOptions, Runtime};
16
use pulldown_cmark::{CodeBlockKind, Event, LinkType, Parser, Tag, TagEnd};
17
use quick_xml::escape::unescape;
18
use quick_xml::events::Event as XmlEvent;
19
use quick_xml::reader::Reader;
20
21
369k
fn urldecode(data: &str) -> String {
22
369k
    let decoded = urlencoding::decode_binary(data.as_bytes());
23
369k
    urlencoding::encode_binary(&decoded[..]).to_string()
24
369k
}
25
26
/// Send Markdown `text` to `pulldown-cmark` and return Markdown
27
/// events.
28
960
pub fn pulldown_cmark(text: &str) -> Vec<Event<'_>> {
29
960
    Parser::new(text).collect()
30
960
}
31
32
/// Send Markdown `text` to `commonmark.js` and return XML.
33
960
pub fn commonmark_js(text: &str) -> anyhow::Result<String> {
34
    const COMMONMARK_MIN_JS: &str =
35
        include_str!("../../pulldown-cmark/third_party/commonmark.js/commonmark.min.js");
36
37
    thread_local! {
38
        static ENGINE: JSEngine = {
39
            JSEngine::init().expect("failed to initalize JS engine")
40
        }
41
    }
42
43
960
    ENGINE.with(|engine| {
44
960
        let rt = Runtime::new(engine.handle());
45
46
960
        let options = RealmOptions::default();
47
960
        rooted!(in(rt.cx()) let global = unsafe {
48
960
            JS_NewGlobalObject(rt.cx(), &SIMPLE_GLOBAL_CLASS, ptr::null_mut(),
49
960
                                OnNewGlobalHookOption::FireOnNewGlobalHook,
50
960
                                &*options)
51
        });
52
960
        let realm = unsafe { EnterRealm(rt.cx(), global.get()) };
53
54
        // The return value comes back here. If it could be a GC thing, you must add it to the
55
        // GC's "root set" with the rooted! macro.
56
960
        rooted!(in(rt.cx()) let mut rval = UndefinedValue());
57
58
        // These should indicate source location for diagnostics.
59
960
        let filename: &'static str = "commonmark.min.js";
60
960
        let lineno: u32 = 1;
61
960
        let res = rt.evaluate_script(
62
960
            global.handle(),
63
960
            COMMONMARK_MIN_JS,
64
960
            filename,
65
960
            lineno,
66
960
            rval.handle_mut(),
67
        );
68
960
        assert!(res.is_ok());
69
70
960
        let filename: &'static str = "{inline}";
71
960
        let lineno: u32 = 1;
72
960
        let script = r#"
73
960
            function render_to_xml(markdown) {
74
960
                var reader = new commonmark.Parser();
75
960
                var xmlwriter = new commonmark.XmlRenderer({ sourcepos: false });
76
960
                return xmlwriter.render(reader.parse(markdown));
77
960
            }
78
960
        "#;
79
960
        rooted!(in(rt.cx()) let mut render_to_xml = UndefinedValue());
80
960
        let res = rt.evaluate_script(
81
960
            global.handle(),
82
960
            script,
83
960
            filename,
84
960
            lineno,
85
960
            render_to_xml.handle_mut(),
86
        );
87
960
        assert!(res.is_ok());
88
89
        // rval now contains a reference to the render_to_xml function
90
960
        let xml = unsafe {
91
960
            rooted!(in(rt.cx()) let mut xml = UndefinedValue());
92
960
            rooted!(in(rt.cx()) let mut text_val = UndefinedValue());
93
960
            text.to_jsval(rt.cx(), text_val.handle_mut());
94
960
            JS_CallFunctionName(
95
960
                rt.cx(),
96
960
                global.handle(),
97
960
                b"render_to_xml\0".as_ptr() as *const i8,
98
960
                &HandleValueArray::from_rooted_slice(&[text_val.handle().get()]),
99
960
                xml.handle_mut(),
100
            );
101
960
            let xml_string = xml.handle().to_string();
102
960
            let utf8 = mozjs::conversions::jsstr_to_string(rt.cx(), xml_string);
103
960
            utf8
104
        };
105
106
960
        unsafe {
107
960
            LeaveRealm(rt.cx(), realm);
108
960
        }
109
110
960
        Ok(xml)
111
960
    })
112
960
}
113
114
/// Parse commonmark.js XML and return Markdown events.
115
960
pub fn xml_to_events(xml: &str) -> anyhow::Result<Vec<Event<'_>>> {
116
960
    let mut block_container_stack = Vec::new();
117
960
    let mut heading_stack = Vec::new();
118
119
960
    let mut reader = Reader::from_str(xml);
120
960
    let mut events: Vec<Event> = Vec::new();
121
    loop {
122
85.7M
        match reader.read_event()? {
123
960
            XmlEvent::Eof => break,
124
1.92k
            XmlEvent::Decl(..) | XmlEvent::DocType(..) => continue,
125
32.3M
            XmlEvent::Start(tag) => match tag.name().as_ref() {
126
32.3M
                b"document" => continue,
127
32.3M
                b"paragraph"
128
668k
                    if block_container_stack
129
668k
                        .last()
130
668k
                        .map(|(_start, tight)| *tight)
131
668k
                        .unwrap_or(false) =>
132
                {
133
467k
                    continue;
134
                }
135
200k
                b"paragraph" => events.push(Event::Start(Tag::Paragraph)),
136
31.7M
                b"heading" => match tag.try_get_attribute("level")? {
137
213k
                    Some(level) => {
138
213k
                        let level = level
139
213k
                            .unescape_value()?
140
213k
                            .parse::<usize>()?
141
213k
                            .try_into()
142
213k
                            .map_err(|err| anyhow!("Invalid level: {err:?}"))?;
143
213k
                        heading_stack.push(level);
144
213k
                        events.push(Event::Start(Tag::Heading {
145
213k
                            level,
146
213k
                            id: None,
147
213k
                            classes: Vec::new(),
148
213k
                            attrs: Vec::new(),
149
213k
                        }));
150
                    }
151
0
                    None => anyhow::bail!("Missing level in heading"),
152
                },
153
31.5M
                b"text" => {
154
16.7M
                    events.push(Event::Text(
155
16.7M
                        unescape(&reader.read_text(tag.to_end().name())?)?
156
16.7M
                            .into_owned()
157
16.7M
                            .into(),
158
                    ));
159
                }
160
6.42M
                b"code_block" => {
161
225k
                    match tag.try_get_attribute("info")? {
162
3.69k
                        Some(info) => events.push(Event::Start(Tag::CodeBlock(
163
3.69k
                            CodeBlockKind::Fenced(info.unescape_value()?.into_owned().into()),
164
                        ))),
165
221k
                        None => events.push(Event::Start(Tag::CodeBlock(CodeBlockKind::Indented))),
166
                    }
167
225k
                    events.push(Event::Text(
168
225k
                        unescape(&reader.read_text(tag.to_end().name())?)?
169
225k
                            .into_owned()
170
225k
                            .into(),
171
                    ));
172
225k
                    events.push(Event::End(TagEnd::CodeBlock));
173
                }
174
                b"list" => {
175
1.69M
                    let start = tag.try_get_attribute("start")?;
176
1.69M
                    match &start {
177
97
                        Some(start) => events.push(Event::Start(Tag::List(Some(
178
97
                            start.unescape_value()?.parse()?,
179
                        )))),
180
1.69M
                        None => events.push(Event::Start(Tag::List(None))),
181
                    };
182
1.69M
                    let tight = match tag.try_get_attribute("tight") {
183
1.69M
                        Ok(Some(value)) if value.unescape_value()? == "true" => true,
184
111
                        _ => false,
185
                    };
186
1.69M
                    block_container_stack.push((start.is_some(), tight));
187
                }
188
3.38M
                b"item" => events.push(Event::Start(Tag::Item)),
189
6.19M
                b"strong" => events.push(Event::Start(Tag::Strong)),
190
1.88M
                b"emph" => events.push(Event::Start(Tag::Emphasis)),
191
1.15M
                b"code" => events.push(Event::Code(
192
1.15M
                    unescape(&reader.read_text(tag.to_end().name())?)?
193
1.15M
                        .into_owned()
194
1.15M
                        .into(),
195
                )),
196
5.36M
                name @ (b"link" | b"image") => {
197
184k
                    let dest_url = tag
198
184k
                        .try_get_attribute("destination")?
199
184k
                        .ok_or(anyhow!("Missing destination"))?
200
184k
                        .unescape_value()?
201
184k
                        .into_owned()
202
184k
                        .into();
203
184k
                    let title = match tag.try_get_attribute("title")? {
204
184k
                        Some(title) => title.unescape_value()?.into_owned().into(),
205
0
                        None => "".into(),
206
                    };
207
184k
                    let link_type = LinkType::Inline; // commonmark.js does not distinguish.
208
184k
                    let id = "".into(); // commonmark.js does not record this.
209
184k
                    events.push(Event::Start(if name == b"link" {
210
184k
                        Tag::Link {
211
184k
                            link_type,
212
184k
                            dest_url,
213
184k
                            title,
214
184k
                            id,
215
184k
                        }
216
                    } else {
217
66
                        Tag::Image {
218
66
                            link_type,
219
66
                            dest_url,
220
66
                            title,
221
66
                            id,
222
66
                        }
223
                    }));
224
                }
225
5.36M
                b"block_quote" => {
226
39.5k
                    block_container_stack.push((true, false));
227
39.5k
                    events.push(Event::Start(Tag::BlockQuote(None)))
228
                }
229
                b"html_block" => {
230
2.80k
                    events.push(Event::Start(Tag::HtmlBlock));
231
2.80k
                    events.push(Event::Html(
232
2.80k
                        unescape(&reader.read_text(tag.to_end().name())?)?
233
2.80k
                            .into_owned()
234
2.80k
                            .into(),
235
                    ));
236
2.80k
                    events.push(Event::End(TagEnd::HtmlBlock));
237
                }
238
5.32M
                b"html_inline" => events.push(Event::InlineHtml(
239
5.32M
                    unescape(&reader.read_text(tag.to_end().name())?)?
240
5.32M
                        .into_owned()
241
5.32M
                        .into(),
242
                )),
243
0
                name => anyhow::bail!("start tag: {}", String::from_utf8_lossy(name)),
244
            },
245
8.90M
            XmlEvent::End(tag) => match tag.name().as_ref() {
246
8.90M
                b"document" => continue,
247
8.90M
                b"paragraph"
248
668k
                    if block_container_stack
249
668k
                        .last()
250
668k
                        .map(|(_numbered, tight)| *tight)
251
668k
                        .unwrap_or(false) =>
252
                {
253
467k
                    continue;
254
                }
255
200k
                b"paragraph" => events.push(Event::End(TagEnd::Paragraph)),
256
8.23M
                b"heading" => events.push(Event::End(TagEnd::Heading(
257
213k
                    heading_stack.pop().ok_or(anyhow!("Heading stack empty"))?,
258
                ))),
259
8.02M
                b"list" => events.push(Event::End(TagEnd::List(
260
1.69M
                    block_container_stack
261
1.69M
                        .pop()
262
1.69M
                        .ok_or(anyhow!("List stack empty"))?
263
                        .0,
264
                ))),
265
3.38M
                b"item" => events.push(Event::End(TagEnd::Item)),
266
1.88M
                b"emph" => events.push(Event::End(TagEnd::Emphasis)),
267
873k
                b"strong" => events.push(Event::End(TagEnd::Strong)),
268
184k
                b"link" => events.push(Event::End(TagEnd::Link)),
269
39.6k
                b"image" => events.push(Event::End(TagEnd::Image)),
270
39.5k
                b"block_quote" => {
271
39.5k
                    block_container_stack
272
39.5k
                        .pop()
273
39.5k
                        .ok_or(anyhow!("List stack empty"))?;
274
39.5k
                    events.push(Event::End(TagEnd::BlockQuote(None)))
275
                }
276
0
                name => anyhow::bail!("end tag: {}", String::from_utf8_lossy(name)),
277
            },
278
42.8M
            XmlEvent::Text(_) => continue,
279
1.56M
            XmlEvent::Empty(tag) => match tag.name().as_ref() {
280
1.56M
                b"thematic_break" => events.push(Event::Rule),
281
1.55M
                b"softbreak" => events.push(Event::SoftBreak),
282
264k
                b"linebreak" => events.push(Event::HardBreak),
283
0
                name => anyhow::bail!("empty tag: {}", String::from_utf8_lossy(name)),
284
            },
285
0
            event => anyhow::bail!("event {event:?}"),
286
        }
287
    }
288
289
960
    Ok(events)
290
960
}
291
292
/// Normalize Markdown events
293
///
294
/// - Joins adjacent `Event::Text` and `Event::Html` events.
295
///
296
/// - Ensures every `Tag::Item` has a `Tag::Paragraph` as its first
297
///   child (commonmark.js tracks items loseness via an attribute).
298
///
299
/// - Adds a final newline to non-empty `Tag::CodeBlock` tags.
300
///
301
/// - Resets the link type to `LinkType::Inline`.
302
///
303
/// - Resets all code blocks to `CodeBlockKind::Fenced`.
304
1.92k
pub fn normalize(events: Vec<Event<'_>>) -> Vec<Event<'_>> {
305
1.92k
    let mut normalized = Vec::with_capacity(events.len());
306
83.2M
    for event in events.into_iter() {
307
83.2M
        match (normalized.last_mut(), &event) {
308
            // Join adjacent text and HTML events.
309
15.0M
            (Some(Event::Text(prev)), Event::Text(next)) => *prev = format!("{prev}{next}").into(),
310
273k
            (Some(Event::Html(prev)), Event::Html(next)) => *prev = format!("{prev}{next}").into(),
311
312
            // commonmark.js wraps non-empty list items in a paragraph.
313
4.01M
            (Some(Event::Start(Tag::Item)), next)
314
6.76M
                if next != &Event::Start(Tag::Paragraph) && next != &Event::End(TagEnd::Item) =>
315
4.01M
            {
316
4.01M
                normalized.push(Event::Start(Tag::Paragraph));
317
4.01M
                normalized.push(event);
318
4.01M
            }
319
4.01M
            (Some(prev), Event::End(TagEnd::Item))
320
6.76M
                if prev != &Event::End(TagEnd::Paragraph) && prev != &Event::Start(Tag::Item) =>
321
4.01M
            {
322
4.01M
                normalized.push(Event::End(TagEnd::Paragraph));
323
4.01M
                normalized.push(event);
324
4.01M
            }
325
326
            // commonmark.js always adds a final newline to code blocks.
327
314k
            (Some(Event::Text(prev)), Event::End(TagEnd::CodeBlock))
328
314k
                if !prev.is_empty() && !prev.ends_with('\n') =>
329
80
            {
330
80
                *prev = format!("{prev}\n").into();
331
80
                normalized.push(event);
332
80
            }
333
334
            // Other events are passed through.
335
59.8M
            (_, _) => normalized.push(event),
336
        }
337
    }
338
339
1.92k
    normalized
340
1.92k
        .into_iter()
341
451k
        .filter_map(|event| match event {
342
            // commonmark.js does not record the link type.
343
            Event::Start(Tag::Link {
344
                link_type: LinkType::Email,
345
435
                dest_url,
346
435
                title,
347
                ..
348
435
            }) => Some(Event::Start(Tag::Link {
349
435
                link_type: LinkType::Inline,
350
435
                dest_url: urldecode(&format!("mailto:{dest_url}")).into(),
351
435
                title: title.clone(),
352
435
                id: "".into(), // commonmark.js does not record this
353
435
            })),
354
            Event::Start(Tag::Link {
355
369k
                dest_url, title, ..
356
369k
            }) => Some(Event::Start(Tag::Link {
357
369k
                link_type: LinkType::Inline,
358
369k
                dest_url: urldecode(&dest_url).into(),
359
369k
                title: title.clone(),
360
369k
                id: "".into(), // commonmark.js does not record this
361
369k
            })),
362
            // commonmark.js does not record the link type.
363
            Event::Start(Tag::Image {
364
132
                dest_url,
365
132
                title,
366
132
                id,
367
                ..
368
132
            }) => Some(Event::Start(Tag::Image {
369
132
                link_type: LinkType::Inline,
370
132
                dest_url: urldecode(&dest_url).into(),
371
132
                title: title.clone(),
372
132
                id: id.clone(),
373
132
            })),
374
            // commonmark.js does not distinguish between fenced code
375
            // blocks with a "" info string and indented code blocks.
376
255k
            Event::Start(Tag::CodeBlock(CodeBlockKind::Indented)) => Some(Event::Start(
377
255k
                Tag::CodeBlock(CodeBlockKind::Fenced("".into())),
378
255k
            )),
379
380
            // pulldown-cmark can generate empty text and HTML events.
381
17.1M
            Event::Text(text) if text.is_empty() => None,
382
5.60k
            Event::Html(html) if html.is_empty() => None,
383
384
            // pulldown-cmark includes trailing newlines in HTML.
385
5.60k
            Event::Html(html) => Some(Event::Html(html.trim_end_matches('\n').to_string().into())),
386
387
75.1M
            event => Some(event),
388
75.9M
        })
389
1.92k
        .collect()
390
1.92k
}
391
392
/// Print Markdown events with indentation.
393
///
394
/// The `text` label indicates the source of the events.
395
0
pub fn print_events(text: &str, events: &[Event]) {
396
0
    eprintln!("{text:?} -> [");
397
0
    let mut width = 0;
398
0
    for event in events {
399
0
        if let Event::End(_) = event {
400
0
            width -= 2;
401
0
        }
402
0
        eprintln!("  {:width$}{event:?}", "");
403
0
        if let Event::Start(_) = event {
404
0
            width += 2;
405
0
        }
406
    }
407
0
    eprintln!("]");
408
0
}
409
410
#[cfg(test)]
411
mod tests {
412
    use super::*;
413
    use pretty_assertions::assert_eq;
414
    use pulldown_cmark::{CodeBlockKind, Event, Tag, TagEnd};
415
416
    #[test]
417
    fn test_normalize_text() {
418
        assert_eq!(
419
            normalize(vec![
420
                Event::Text("foo".into()),
421
                Event::Text("bar".into()),
422
                Event::Text("baz".into())
423
            ]),
424
            vec![Event::Text("foobarbaz".into())]
425
        );
426
    }
427
428
    #[test]
429
    fn test_normalize_empty_text() {
430
        assert_eq!(normalize(vec![Event::Text("".into())]), vec![]);
431
    }
432
433
    #[test]
434
    fn test_normalize_html() {
435
        assert_eq!(
436
            normalize(vec![
437
                Event::Html("<foo>".into()),
438
                Event::Html("<bar>".into()),
439
                Event::Html("<baz>".into())
440
            ]),
441
            vec![Event::Html("<foo><bar><baz>".into())]
442
        );
443
    }
444
445
    #[test]
446
    fn test_normalize_empty_html() {
447
        assert_eq!(normalize(vec![Event::Html("".into())]), vec![]);
448
    }
449
450
    #[test]
451
    fn test_normalize_non_empty_list() {
452
        assert_eq!(
453
            normalize(vec![
454
                Event::Start(Tag::List(None)),
455
                Event::Start(Tag::Item),
456
                Event::Text("foo".into()),
457
                Event::End(TagEnd::Item),
458
                Event::End(TagEnd::List(false)),
459
            ]),
460
            vec![
461
                Event::Start(Tag::List(None)),
462
                Event::Start(Tag::Item),
463
                Event::Start(Tag::Paragraph),
464
                Event::Text("foo".into()),
465
                Event::End(TagEnd::Paragraph),
466
                Event::End(TagEnd::Item),
467
                Event::End(TagEnd::List(false)),
468
            ]
469
        );
470
    }
471
472
    #[test]
473
    fn test_normalize_empty_list() {
474
        assert_eq!(
475
            normalize(vec![
476
                Event::Start(Tag::List(None)),
477
                Event::Start(Tag::Item),
478
                Event::End(TagEnd::Item),
479
                Event::End(TagEnd::List(false)),
480
            ]),
481
            vec![
482
                Event::Start(Tag::List(None)),
483
                Event::Start(Tag::Item),
484
                Event::End(TagEnd::Item),
485
                Event::End(TagEnd::List(false)),
486
            ]
487
        );
488
    }
489
490
    #[test]
491
    fn test_normalize_empty_code_block() {
492
        assert_eq!(
493
            normalize(vec![
494
                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced("rust".into()))),
495
                Event::Text("".into()),
496
                Event::End(TagEnd::CodeBlock)
497
            ]),
498
            vec![
499
                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced("rust".into()))),
500
                Event::End(TagEnd::CodeBlock)
501
            ]
502
        );
503
    }
504
505
    #[test]
506
    fn test_normalize_non_empty_code_block() {
507
        assert_eq!(
508
            normalize(vec![
509
                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced("rust".into()))),
510
                Event::Text("fn main() {}".into()),
511
                Event::End(TagEnd::CodeBlock)
512
            ]),
513
            vec![
514
                Event::Start(Tag::CodeBlock(CodeBlockKind::Fenced("rust".into()))),
515
                Event::Text("fn main() {}\n".into()),
516
                Event::End(TagEnd::CodeBlock)
517
            ]
518
        );
519
    }
520
}