Coverage Report

Created: 2023-03-17 06:19

/src/cmark/src/xml.c
Line
Count
Source (jump to first uncovered line)
1
#include <stdlib.h>
2
#include <stdio.h>
3
#include <string.h>
4
#include <assert.h>
5
6
#include "config.h"
7
#include "cmark.h"
8
#include "node.h"
9
#include "buffer.h"
10
11
8.52M
#define BUFFER_SIZE 100
12
284M
#define MAX_INDENT 40
13
14
// Functions to convert cmark_nodes to XML strings.
15
16
// C0 control characters, U+FFFE and U+FFF aren't allowed in XML.
17
static const char XML_ESCAPE_TABLE[256] = {
18
    /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,
19
    /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
20
    /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,
21
    /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,
22
    /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
23
    /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
24
    /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
25
    /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
26
    /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
27
    /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28
    /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
29
    /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,
30
    /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
31
    /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32
    /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
33
    /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34
};
35
36
// U+FFFD Replacement Character encoded in UTF-8
37
#define UTF8_REPL "\xEF\xBF\xBD"
38
39
static const char *XML_ESCAPES[] = {
40
  "", UTF8_REPL, "&quot;", "&amp;", "&lt;", "&gt;"
41
};
42
43
static void escape_xml(cmark_strbuf *ob, const unsigned char *src,
44
5.08M
                       bufsize_t size) {
45
5.08M
  bufsize_t i = 0, org, esc = 0;
46
47
127M
  while (i < size) {
48
126M
    org = i;
49
501M
    while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)
50
374M
      i++;
51
52
126M
    if (i > org)
53
121M
      cmark_strbuf_put(ob, src + org, i - org);
54
55
126M
    if (i >= size)
56
4.28M
      break;
57
58
122M
    if (esc == 9) {
59
      // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to
60
      // be changed.
61
      // We know that src[i] is 0xBE or 0xBF.
62
108M
      if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) {
63
4.43k
        cmark_strbuf_putc(ob, 0xBD);
64
108M
      } else {
65
108M
        cmark_strbuf_putc(ob, src[i]);
66
108M
      }
67
108M
    } else {
68
14.3M
      cmark_strbuf_puts(ob, XML_ESCAPES[esc]);
69
14.3M
    }
70
71
122M
    i++;
72
122M
  }
73
5.08M
}
74
75
317k
static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) {
76
317k
  if (source)
77
317k
    escape_xml(dest, source, strlen((char *)source));
78
317k
}
79
80
struct render_state {
81
  cmark_strbuf *xml;
82
  int indent;
83
};
84
85
17.9M
static CMARK_INLINE void indent(struct render_state *state) {
86
17.9M
  int i;
87
297M
  for (i = 0; i < state->indent && i < MAX_INDENT; i++) {
88
279M
    cmark_strbuf_putc(state->xml, ' ');
89
279M
  }
90
17.9M
}
91
92
static int S_render_node(cmark_node *node, cmark_event_type ev_type,
93
18.3M
                         struct render_state *state, int options) {
94
18.3M
  cmark_strbuf *xml = state->xml;
95
18.3M
  bool literal = false;
96
18.3M
  cmark_delim_type delim;
97
18.3M
  bool entering = (ev_type == CMARK_EVENT_ENTER);
98
18.3M
  char buffer[BUFFER_SIZE];
99
100
18.3M
  if (entering) {
101
12.6M
    indent(state);
102
12.6M
    cmark_strbuf_putc(xml, '<');
103
12.6M
    cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
104
105
12.6M
    if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) {
106
7.44M
      snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",
107
7.44M
               node->start_line, node->start_column, node->end_line,
108
7.44M
               node->end_column);
109
7.44M
      cmark_strbuf_puts(xml, buffer);
110
7.44M
    }
111
112
12.6M
    literal = false;
113
114
12.6M
    switch (node->type) {
115
42.5k
    case CMARK_NODE_DOCUMENT:
116
42.5k
      cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");
117
42.5k
      break;
118
4.16M
    case CMARK_NODE_TEXT:
119
4.31M
    case CMARK_NODE_CODE:
120
4.40M
    case CMARK_NODE_HTML_BLOCK:
121
4.70M
    case CMARK_NODE_HTML_INLINE:
122
4.70M
      cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
123
4.70M
      escape_xml(xml, node->data, node->len);
124
4.70M
      cmark_strbuf_puts(xml, "</");
125
4.70M
      cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
126
4.70M
      literal = true;
127
4.70M
      break;
128
976k
    case CMARK_NODE_LIST:
129
976k
      switch (cmark_node_get_list_type(node)) {
130
12.2k
      case CMARK_ORDERED_LIST:
131
12.2k
        cmark_strbuf_puts(xml, " type=\"ordered\"");
132
12.2k
        snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",
133
12.2k
                 cmark_node_get_list_start(node));
134
12.2k
        cmark_strbuf_puts(xml, buffer);
135
12.2k
        delim = cmark_node_get_list_delim(node);
136
12.2k
        if (delim == CMARK_PAREN_DELIM) {
137
1.89k
          cmark_strbuf_puts(xml, " delim=\"paren\"");
138
10.3k
        } else if (delim == CMARK_PERIOD_DELIM) {
139
10.3k
          cmark_strbuf_puts(xml, " delim=\"period\"");
140
10.3k
        }
141
12.2k
        break;
142
964k
      case CMARK_BULLET_LIST:
143
964k
        cmark_strbuf_puts(xml, " type=\"bullet\"");
144
964k
        break;
145
0
      default:
146
0
        break;
147
976k
      }
148
976k
      snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",
149
976k
               (cmark_node_get_list_tight(node) ? "true" : "false"));
150
976k
      cmark_strbuf_puts(xml, buffer);
151
976k
      break;
152
83.8k
    case CMARK_NODE_HEADING:
153
83.8k
      snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);
154
83.8k
      cmark_strbuf_puts(xml, buffer);
155
83.8k
      break;
156
66.4k
    case CMARK_NODE_CODE_BLOCK:
157
66.4k
      if (node->as.code.info) {
158
4.94k
        cmark_strbuf_puts(xml, " info=\"");
159
4.94k
        escape_xml_str(xml, node->as.code.info);
160
4.94k
        cmark_strbuf_putc(xml, '"');
161
4.94k
      }
162
66.4k
      cmark_strbuf_puts(xml, " xml:space=\"preserve\">");
163
66.4k
      escape_xml(xml, node->data, node->len);
164
66.4k
      cmark_strbuf_puts(xml, "</");
165
66.4k
      cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
166
66.4k
      literal = true;
167
66.4k
      break;
168
0
    case CMARK_NODE_CUSTOM_BLOCK:
169
0
    case CMARK_NODE_CUSTOM_INLINE:
170
0
      cmark_strbuf_puts(xml, " on_enter=\"");
171
0
      escape_xml_str(xml, node->as.custom.on_enter);
172
0
      cmark_strbuf_putc(xml, '"');
173
0
      cmark_strbuf_puts(xml, " on_exit=\"");
174
0
      escape_xml_str(xml, node->as.custom.on_exit);
175
0
      cmark_strbuf_putc(xml, '"');
176
0
      break;
177
295k
    case CMARK_NODE_LINK:
178
304k
    case CMARK_NODE_IMAGE:
179
304k
      cmark_strbuf_puts(xml, " destination=\"");
180
304k
      escape_xml_str(xml, node->as.link.url);
181
304k
      cmark_strbuf_putc(xml, '"');
182
304k
      if (node->as.link.title) {
183
8.45k
        cmark_strbuf_puts(xml, " title=\"");
184
8.45k
        escape_xml_str(xml, node->as.link.title);
185
8.45k
        cmark_strbuf_putc(xml, '"');
186
8.45k
      }
187
304k
      break;
188
6.50M
    default:
189
6.50M
      break;
190
12.6M
    }
191
12.6M
    if (node->first_child) {
192
5.25M
      state->indent += 2;
193
7.43M
    } else if (!literal) {
194
2.66M
      cmark_strbuf_puts(xml, " /");
195
2.66M
    }
196
12.6M
    cmark_strbuf_puts(xml, ">\n");
197
198
12.6M
  } else if (node->first_child) {
199
5.25M
    state->indent -= 2;
200
5.25M
    indent(state);
201
5.25M
    cmark_strbuf_puts(xml, "</");
202
5.25M
    cmark_strbuf_puts(xml, cmark_node_get_type_string(node));
203
5.25M
    cmark_strbuf_puts(xml, ">\n");
204
5.25M
  }
205
206
18.3M
  return 1;
207
18.3M
}
208
209
42.5k
char *cmark_render_xml(cmark_node *root, int options) {
210
42.5k
  char *result;
211
42.5k
  cmark_strbuf xml = CMARK_BUF_INIT(root->mem);
212
42.5k
  cmark_event_type ev_type;
213
42.5k
  cmark_node *cur;
214
42.5k
  struct render_state state = {&xml, 0};
215
216
42.5k
  cmark_iter *iter = cmark_iter_new(root);
217
218
42.5k
  cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
219
42.5k
  cmark_strbuf_puts(state.xml,
220
42.5k
                    "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");
221
18.3M
  while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) {
222
18.3M
    cur = cmark_iter_get_node(iter);
223
18.3M
    S_render_node(cur, ev_type, &state, options);
224
18.3M
  }
225
42.5k
  result = (char *)cmark_strbuf_detach(&xml);
226
227
42.5k
  cmark_iter_free(iter);
228
42.5k
  return result;
229
42.5k
}