Coverage Report

Created: 2025-12-14 06:20

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cmark/src/xml.c
Line
Count
Source
1
#include <assert.h>
2
#include <stdbool.h>
3
#include <stdio.h>
4
#include <stdlib.h>
5
#include <string.h>
6
7
#include "cmark.h"
8
#include "node.h"
9
#include "buffer.h"
10
11
13.9M
#define BUFFER_SIZE 100
12
171M
#define MAX_INDENT 40
13
14
// Functions to convert cmark_nodes to XML strings.
15
16
// C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
17
static const char XML_ESCAPE_TABLE[256] = {
18
    /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
19
    /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20
    /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
    /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
22
    /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
    /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
    /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
    /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
    /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
    /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
    /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
    /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
30
    /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
    /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
    /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
    /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
};
35
36
// U+FFFD Replacement Character encoded in UTF-8
37
#define UTF8_REPL "\xEF\xBF\xBD"
38
39
static const char *XML_ESCAPES[] = {
40
  "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
41
};
42
43
static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
44
1.06M
                       bufsize_t size) {
45
1.06M
  bufsize_t i = 0, org, esc = 0;
46
47
85.5M
  while (i < size) {
48
85.3M
    org = i;
49
284M
    while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
50
199M
      i++;
51
52
85.3M
    if (i > org)
53
81.6M
      cmark_strbuf_put(ob, src + org, i - org);
54
55
85.3M
    if (i >= size)
56
824k
      break;
57
58
84.5M
    if (esc == 9) {
59
      // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
60
      // be changed.
61
      // We know that src[i] is 0xBE or 0xBF.
62
78.6M
      if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
63
7.63k
        cmark_strbuf_putc(ob, 0xBD);
64
78.6M
      } else {
65
78.6M
        cmark_strbuf_putc(ob, src[i]);
66
78.6M
      }
67
78.6M
    } else {
68
5.88M
      cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
69
5.88M
    }
70
71
84.5M
    i++;
72
84.5M
  }
73
1.06M
}
74
75
90.7k
static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
76
90.7k
  if (source)
77
90.7k
    escape_xml(dest, source, (bufsize_t)strlen((char *)source));
78
90.7k
}
79
80
struct render_state {
81
  cmark_strbuf *xml;
82
  int indent;
83
};
84
85
19.4M
static inline void indent(struct render_state *state) {
86
19.4M
  int i;
87
188M
  for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
88
168M
    cmark_strbuf_putc(state->xml, ' ');
89
168M
  }
90
19.4M
}
91
92
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
93
22.3M
                         struct render_state *state, int options) {
94
22.3M
  cmark_strbuf *xml = state->xml;
95
22.3M
  bool literal = false;
96
22.3M
  cmark_delim_type delim;
97
22.3M
  bool entering = (ev_type == CMARK_EVENT_ENTER);
98
22.3M
  char buffer[BUFFER_SIZE];
99
100
22.3M
  if (entering) {
101
11.8M
    indent(state);
102
11.8M
    cmark_strbuf_putc(xml, '<');
103
11.8M
    cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
104
105
11.8M
    if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
106
10.5M
      snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
107
10.5M
               node->start_line, node->start_column, node->end_line,
108
10.5M
               node->end_column);
109
10.5M
      cmark_strbuf_puts(xml, buffer);
110
10.5M
    }
111
112
11.8M
    literal = false;
113
114
11.8M
    switch (node->type) {
115
31.5k
    case CMARK_NODE_DOCUMENT:
116
31.5k
      cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
117
31.5k
      break;
118
891k
    case CMARK_NODE_TEXT:
119
918k
    case CMARK_NODE_CODE:
120
934k
    case CMARK_NODE_HTML_BLOCK:
121
955k
    case CMARK_NODE_HTML_INLINE:
122
955k
      cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
123
955k
      escape_xml(xml, node->data, node->len);
124
955k
      cmark_strbuf_puts(xml, "</");
125
955k
      cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
126
955k
      literal = true;
127
955k
      break;
128
3.28M
    case CMARK_NODE_LIST:
129
3.28M
      switch (cmark_node_get_list_type(node)) {
130
2.80k
      case CMARK_ORDERED_LIST:
131
2.80k
        cmark_strbuf_puts(xml, " type=\"ordered\"");
132
2.80k
        snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
133
2.80k
                 cmark_node_get_list_start(node));
134
2.80k
        cmark_strbuf_puts(xml, buffer);
135
2.80k
        delim = cmark_node_get_list_delim(node);
136
2.80k
        if (delim == CMARK_PAREN_DELIM) {
137
1.30k
          cmark_strbuf_puts(xml, " delim=\"paren\"");
138
1.50k
        } else if (delim == CMARK_PERIOD_DELIM) {
139
1.50k
          cmark_strbuf_puts(xml, " delim=\"period\"");
140
1.50k
        }
141
2.80k
        break;
142
3.28M
      case CMARK_BULLET_LIST:
143
3.28M
        cmark_strbuf_puts(xml, " type=\"bullet\"");
144
3.28M
        break;
145
0
      default:
146
0
        break;
147
3.28M
      }
148
3.28M
      snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
149
3.28M
               (cmark_node_get_list_tight(node) ? "true" : "false"));
150
3.28M
      cmark_strbuf_puts(xml, buffer);
151
3.28M
      break;
152
104k
    case CMARK_NODE_HEADING:
153
104k
      snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
154
104k
      cmark_strbuf_puts(xml, buffer);
155
104k
      break;
156
14.0k
    case CMARK_NODE_CODE_BLOCK:
157
14.0k
      if (node->as.code.info) {
158
1.78k
        cmark_strbuf_puts(xml, " info=\"");
159
1.78k
        escape_xml_str(xml, node->as.code.info);
160
1.78k
        cmark_strbuf_putc(xml, '"');
161
1.78k
      }
162
14.0k
      cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
163
14.0k
      escape_xml(xml, node->data, node->len);
164
14.0k
      cmark_strbuf_puts(xml, "</");
165
14.0k
      cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
166
14.0k
      literal = true;
167
14.0k
      break;
168
0
    case CMARK_NODE_CUSTOM_BLOCK:
169
0
    case CMARK_NODE_CUSTOM_INLINE:
170
0
      cmark_strbuf_puts(xml, " on_enter=\"");
171
0
      escape_xml_str(xml, node->as.custom.on_enter);
172
0
      cmark_strbuf_putc(xml, '"');
173
0
      cmark_strbuf_puts(xml, " on_exit=\"");
174
0
      escape_xml_str(xml, node->as.custom.on_exit);
175
0
      cmark_strbuf_putc(xml, '"');
176
0
      break;
177
83.3k
    case CMARK_NODE_LINK:
178
85.2k
    case CMARK_NODE_IMAGE:
179
85.2k
      cmark_strbuf_puts(xml, " destination=\"");
180
85.2k
      escape_xml_str(xml, node->as.link.url);
181
85.2k
      cmark_strbuf_putc(xml, '"');
182
85.2k
      if (node->as.link.title) {
183
3.65k
        cmark_strbuf_puts(xml, " title=\"");
184
3.65k
        escape_xml_str(xml, node->as.link.title);
185
3.65k
        cmark_strbuf_putc(xml, '"');
186
3.65k
      }
187
85.2k
      break;
188
7.41M
    default:
189
7.41M
      break;
190
11.8M
    }
191
11.8M
    if (node->first_child) {
192
7.58M
      state->indent += 2;
193
7.58M
    } else if (!literal) {
194
3.33M
      cmark_strbuf_puts(xml, " /");
195
3.33M
    }
196
11.8M
    cmark_strbuf_puts(xml, ">\n");
197
198
11.8M
  } else if (node->first_child) {
199
7.58M
    state->indent -= 2;
200
7.58M
    indent(state);
201
7.58M
    cmark_strbuf_puts(xml, "</");
202
7.58M
    cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
203
7.58M
    cmark_strbuf_puts(xml, ">\n");
204
7.58M
  }
205
206
22.3M
  return 1;
207
22.3M
}
208
209
31.5k
char *cmark_render_xml(cmark_node *root, int options) {
210
31.5k
  char *result;
211
31.5k
  cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
212
31.5k
  cmark_event_type ev_type;
213
31.5k
  cmark_node *cur;
214
31.5k
  struct render_state state = {&xml, 0};
215
216
31.5k
  cmark_iter *iter = cmark_iter_new(root);
217
218
31.5k
  cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
219
31.5k
  cmark_strbuf_puts(state.xml,
220
31.5k
                    "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
221
22.4M
  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
222
22.3M
    cur = cmark_iter_get_node(iter);
223
22.3M
    S_render_node(cur, ev_type, &state, options);
224
22.3M
  }
225
31.5k
  result = (char *)cmark_strbuf_detach(&xml);
226
227
31.5k
  cmark_iter_free(iter);
228
31.5k
  return result;
229
31.5k
}