/src/cmark/src/commonmark.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include <stdlib.h> |
2 | | #include <stdio.h> |
3 | | #include <string.h> |
4 | | #include <stdint.h> |
5 | | #include <assert.h> |
6 | | |
7 | | #include "config.h" |
8 | | #include "cmark.h" |
9 | | #include "node.h" |
10 | | #include "buffer.h" |
11 | | #include "utf8.h" |
12 | | #include "scanners.h" |
13 | | #include "render.h" |
14 | | |
15 | 6.20M | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) |
16 | 8.14M | #define LIT(s) renderer->out(renderer, s, false, LITERAL) |
17 | 2.33M | #define CR() renderer->cr(renderer) |
18 | 3.41M | #define BLANKLINE() renderer->blankline(renderer) |
19 | 2.81M | #define ENCODED_SIZE 20 |
20 | 31.8k | #define LISTMARKER_SIZE 20 |
21 | | |
22 | | // Functions to convert cmark_nodes to commonmark strings. |
23 | | |
24 | | static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape, |
25 | 109M | int32_t c, unsigned char nextc) { |
26 | 109M | bool needs_escaping = false; |
27 | 109M | bool follows_digit = |
28 | 109M | renderer->buffer->size > 0 && |
29 | 109M | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); |
30 | 109M | char encoded[ENCODED_SIZE]; |
31 | 109M | int options = renderer->options; |
32 | | |
33 | 109M | needs_escaping = |
34 | 109M | c < 0x80 && escape != LITERAL && |
35 | 109M | ((escape == NORMAL && |
36 | 60.7M | (c < 0x20 || |
37 | 56.2M | c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || |
38 | 56.2M | c == '>' || c == '\\' || c == '`' || |
39 | 56.2M | (c == '!' && (!nextc || nextc == '[')) || |
40 | 56.2M | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || |
41 | 56.2M | ((CMARK_OPT_SMART & options) && |
42 | 44.8M | ((c == '-' && nextc == '-') || |
43 | 20.7M | (c == '.' && nextc == '.') || |
44 | 20.7M | c == '"' || c == '\'')) || |
45 | 56.2M | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && |
46 | | // begin_content doesn't get set to false til we've passed digits |
47 | | // at the beginning of line, so... |
48 | 44.6M | !follows_digit) || |
49 | 56.2M | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && |
50 | 44.5M | (nextc == 0 || cmark_isspace(nextc))))) || |
51 | 60.7M | (escape == URL && |
52 | 49.1M | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || |
53 | 3.73M | c == ')' || c == '(')) || |
54 | 60.7M | (escape == TITLE && |
55 | 49.0M | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); |
56 | | |
57 | 109M | if (needs_escaping) { |
58 | 11.7M | if (escape == URL && cmark_isspace(c)) { |
59 | | // use percent encoding for spaces |
60 | 596 | snprintf(encoded, ENCODED_SIZE, "%%%2X", c); |
61 | 596 | cmark_strbuf_puts(renderer->buffer, encoded); |
62 | 596 | renderer->column += 3; |
63 | 11.7M | } else if (cmark_ispunct(c)) { |
64 | 8.95M | cmark_render_ascii(renderer, "\\"); |
65 | 8.95M | cmark_render_code_point(renderer, c); |
66 | 8.95M | } else { // render as entity |
67 | 2.81M | snprintf(encoded, ENCODED_SIZE, "&#%d;", c); |
68 | 2.81M | cmark_strbuf_puts(renderer->buffer, encoded); |
69 | 2.81M | renderer->column += strlen(encoded); |
70 | 2.81M | } |
71 | 97.6M | } else { |
72 | 97.6M | cmark_render_code_point(renderer, c); |
73 | 97.6M | } |
74 | 109M | } |
75 | | |
76 | 66.4k | static int longest_backtick_sequence(const char *code) { |
77 | 66.4k | int longest = 0; |
78 | 66.4k | int current = 0; |
79 | 66.4k | size_t i = 0; |
80 | 66.4k | size_t code_len = strlen(code); |
81 | 19.4M | while (i <= code_len) { |
82 | 19.3M | if (code[i] == '`') { |
83 | 753k | current++; |
84 | 18.5M | } else { |
85 | 18.5M | if (current > longest) { |
86 | 3.40k | longest = current; |
87 | 3.40k | } |
88 | 18.5M | current = 0; |
89 | 18.5M | } |
90 | 19.3M | i++; |
91 | 19.3M | } |
92 | 66.4k | return longest; |
93 | 66.4k | } |
94 | | |
95 | 147k | static int shortest_unused_backtick_sequence(const char *code) { |
96 | | // note: if the shortest sequence is >= 32, this returns 32 |
97 | | // so as not to overflow the bit array. |
98 | 147k | uint32_t used = 1; |
99 | 147k | int current = 0; |
100 | 147k | size_t i = 0; |
101 | 147k | size_t code_len = strlen(code); |
102 | 100M | while (i <= code_len) { |
103 | 100M | if (code[i] == '`') { |
104 | 4.18M | current++; |
105 | 95.9M | } else { |
106 | 95.9M | if (current > 0 && current < 32) { |
107 | 491k | used |= (1U << current); |
108 | 491k | } |
109 | 95.9M | current = 0; |
110 | 95.9M | } |
111 | 100M | i++; |
112 | 100M | } |
113 | | // return number of first bit that is 0: |
114 | 147k | i = 0; |
115 | 439k | while (i < 32 && used & 1) { |
116 | 291k | used = used >> 1; |
117 | 291k | i++; |
118 | 291k | } |
119 | 147k | return (int)i; |
120 | 147k | } |
121 | | |
122 | 500k | static bool is_autolink(cmark_node *node) { |
123 | 500k | const unsigned char *title; |
124 | 500k | const unsigned char *url; |
125 | 500k | cmark_node *link_text; |
126 | | |
127 | 500k | if (node->type != CMARK_NODE_LINK) { |
128 | 0 | return false; |
129 | 0 | } |
130 | | |
131 | 500k | url = node->as.link.url; |
132 | 500k | if (url == NULL || _scan_scheme(url) == 0) { |
133 | 351k | return false; |
134 | 351k | } |
135 | | |
136 | 149k | title = node->as.link.title; |
137 | | // if it has a title, we can't treat it as an autolink: |
138 | 149k | if (title && title[0]) { |
139 | 912 | return false; |
140 | 912 | } |
141 | | |
142 | 148k | link_text = node->first_child; |
143 | 148k | if (link_text == NULL) { |
144 | 3.06k | return false; |
145 | 3.06k | } |
146 | 145k | cmark_consolidate_text_nodes(link_text); |
147 | 145k | if (strncmp((const char *)url, "mailto:", 7) == 0) { |
148 | 74.2k | url += 7; |
149 | 74.2k | } |
150 | 145k | return link_text->data != NULL && |
151 | 145k | strcmp((const char *)url, (char *)link_text->data) == 0; |
152 | 148k | } |
153 | | |
154 | | static int S_render_node(cmark_renderer *renderer, cmark_node *node, |
155 | 18.1M | cmark_event_type ev_type, int options) { |
156 | 18.1M | cmark_node *tmp; |
157 | 18.1M | int list_number; |
158 | 18.1M | cmark_delim_type list_delim; |
159 | 18.1M | size_t numticks; |
160 | 18.1M | bool extra_spaces; |
161 | 18.1M | size_t i; |
162 | 18.1M | bool entering = (ev_type == CMARK_EVENT_ENTER); |
163 | 18.1M | const char *info, *code, *title; |
164 | 18.1M | char fencechar[2] = {'\0', '\0'}; |
165 | 18.1M | size_t code_len; |
166 | 18.1M | char listmarker[LISTMARKER_SIZE]; |
167 | 18.1M | const char *emph_delim; |
168 | 18.1M | bool first_in_list_item; |
169 | 18.1M | bufsize_t marker_width; |
170 | 18.1M | bool has_nonspace; |
171 | 18.1M | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && |
172 | 18.1M | !(CMARK_OPT_HARDBREAKS & options); |
173 | | |
174 | | // Don't adjust tight list status til we've started the list. |
175 | | // Otherwise we lose the blank line between a paragraph and |
176 | | // a following list. |
177 | 18.1M | if (entering) { |
178 | 12.5M | if (node->parent && node->parent->type == CMARK_NODE_ITEM) { |
179 | 781k | renderer->in_tight_list_item = node->parent->parent->as.list.tight; |
180 | 781k | } |
181 | 12.5M | } else { |
182 | 5.58M | if (node->type == CMARK_NODE_LIST) { |
183 | 976k | renderer->in_tight_list_item = |
184 | 976k | node->parent && |
185 | 976k | node->parent->type == CMARK_NODE_ITEM && |
186 | 976k | node->parent->parent->as.list.tight; |
187 | 976k | } |
188 | 5.58M | } |
189 | | |
190 | 18.1M | switch (node->type) { |
191 | 85.1k | case CMARK_NODE_DOCUMENT: |
192 | 85.1k | break; |
193 | | |
194 | 3.99M | case CMARK_NODE_BLOCK_QUOTE: |
195 | 3.99M | if (entering) { |
196 | 1.99M | LIT("> "); |
197 | 1.99M | renderer->begin_content = true; |
198 | 1.99M | cmark_strbuf_puts(renderer->prefix, "> "); |
199 | 1.99M | } else { |
200 | 1.99M | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); |
201 | 1.99M | BLANKLINE(); |
202 | 1.99M | } |
203 | 3.99M | break; |
204 | | |
205 | 1.95M | case CMARK_NODE_LIST: |
206 | 1.95M | if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { |
207 | | // this ensures that a following indented code block or list will be |
208 | | // inteprereted correctly. |
209 | 169k | CR(); |
210 | 169k | LIT("<!-- end list -->"); |
211 | 169k | BLANKLINE(); |
212 | 169k | } |
213 | 1.95M | break; |
214 | | |
215 | 2.15M | case CMARK_NODE_ITEM: |
216 | 2.15M | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
217 | 2.12M | marker_width = 4; |
218 | 2.12M | } else { |
219 | 31.8k | list_number = cmark_node_get_list_start(node->parent); |
220 | 31.8k | list_delim = cmark_node_get_list_delim(node->parent); |
221 | 31.8k | tmp = node; |
222 | 67.2k | while (tmp->prev) { |
223 | 35.3k | tmp = tmp->prev; |
224 | 35.3k | list_number += 1; |
225 | 35.3k | } |
226 | | // we ensure a width of at least 4 so |
227 | | // we get nice transition from single digits |
228 | | // to double |
229 | 31.8k | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, |
230 | 31.8k | list_delim == CMARK_PAREN_DELIM ? ")" : ".", |
231 | 31.8k | list_number < 10 ? " " : " "); |
232 | 31.8k | marker_width = strlen(listmarker); |
233 | 31.8k | } |
234 | 2.15M | if (entering) { |
235 | 1.07M | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
236 | 1.06M | LIT(" - "); |
237 | 1.06M | renderer->begin_content = true; |
238 | 1.06M | } else { |
239 | 15.9k | LIT(listmarker); |
240 | 15.9k | renderer->begin_content = true; |
241 | 15.9k | } |
242 | 5.39M | for (i = marker_width; i--;) { |
243 | 4.31M | cmark_strbuf_putc(renderer->prefix, ' '); |
244 | 4.31M | } |
245 | 1.07M | } else { |
246 | 1.07M | cmark_strbuf_truncate(renderer->prefix, |
247 | 1.07M | renderer->prefix->size - marker_width); |
248 | 1.07M | CR(); |
249 | 1.07M | } |
250 | 2.15M | break; |
251 | | |
252 | 167k | case CMARK_NODE_HEADING: |
253 | 167k | if (entering) { |
254 | 232k | for (i = cmark_node_get_heading_level(node); i > 0; i--) { |
255 | 148k | LIT("#"); |
256 | 148k | } |
257 | 83.8k | LIT(" "); |
258 | 83.8k | renderer->begin_content = true; |
259 | 83.8k | renderer->no_linebreaks = true; |
260 | 83.8k | } else { |
261 | 83.8k | renderer->no_linebreaks = false; |
262 | 83.8k | BLANKLINE(); |
263 | 83.8k | } |
264 | 167k | break; |
265 | | |
266 | 66.4k | case CMARK_NODE_CODE_BLOCK: |
267 | | |
268 | 66.4k | first_in_list_item = node->prev == NULL && node->parent && |
269 | 66.4k | node->parent->type == CMARK_NODE_ITEM; |
270 | | |
271 | 66.4k | if (!first_in_list_item) { |
272 | 59.9k | BLANKLINE(); |
273 | 59.9k | } |
274 | 66.4k | info = cmark_node_get_fence_info(node); |
275 | 66.4k | fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; |
276 | 66.4k | code = cmark_node_get_literal(node); |
277 | | |
278 | 66.4k | numticks = longest_backtick_sequence(code) + 1; |
279 | 66.4k | if (numticks < 3) { |
280 | 64.5k | numticks = 3; |
281 | 64.5k | } |
282 | 698k | for (i = 0; i < numticks; i++) { |
283 | 631k | LIT(fencechar); |
284 | 631k | } |
285 | 66.4k | LIT(" "); |
286 | 66.4k | OUT(info, false, LITERAL); |
287 | 66.4k | CR(); |
288 | 66.4k | OUT(cmark_node_get_literal(node), false, LITERAL); |
289 | 66.4k | CR(); |
290 | 698k | for (i = 0; i < numticks; i++) { |
291 | 631k | LIT(fencechar); |
292 | 631k | } |
293 | | |
294 | 66.4k | BLANKLINE(); |
295 | 66.4k | break; |
296 | | |
297 | 90.3k | case CMARK_NODE_HTML_BLOCK: |
298 | 90.3k | BLANKLINE(); |
299 | 90.3k | OUT(cmark_node_get_literal(node), false, LITERAL); |
300 | 90.3k | BLANKLINE(); |
301 | 90.3k | break; |
302 | | |
303 | 0 | case CMARK_NODE_CUSTOM_BLOCK: |
304 | 0 | BLANKLINE(); |
305 | 0 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
306 | 0 | false, LITERAL); |
307 | 0 | BLANKLINE(); |
308 | 0 | break; |
309 | | |
310 | 47.1k | case CMARK_NODE_THEMATIC_BREAK: |
311 | 47.1k | BLANKLINE(); |
312 | 47.1k | LIT("-----"); |
313 | 47.1k | BLANKLINE(); |
314 | 47.1k | break; |
315 | | |
316 | 1.53M | case CMARK_NODE_PARAGRAPH: |
317 | 1.53M | if (!entering) { |
318 | 766k | BLANKLINE(); |
319 | 766k | } |
320 | 1.53M | break; |
321 | | |
322 | 4.07M | case CMARK_NODE_TEXT: |
323 | 4.07M | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); |
324 | 4.07M | break; |
325 | | |
326 | 23.8k | case CMARK_NODE_LINEBREAK: |
327 | 23.8k | if (!(CMARK_OPT_HARDBREAKS & options)) { |
328 | 20.3k | LIT(" "); |
329 | 20.3k | } |
330 | 23.8k | CR(); |
331 | 23.8k | break; |
332 | | |
333 | 2.17M | case CMARK_NODE_SOFTBREAK: |
334 | 2.17M | if (CMARK_OPT_HARDBREAKS & options) { |
335 | 915k | LIT(" "); |
336 | 915k | CR(); |
337 | 1.25M | } else if (!renderer->no_linebreaks && renderer->width == 0 && |
338 | 1.25M | !(CMARK_OPT_HARDBREAKS & options) && |
339 | 1.25M | !(CMARK_OPT_NOBREAKS & options)) { |
340 | 11.3k | CR(); |
341 | 1.24M | } else { |
342 | 1.24M | OUT(" ", allow_wrap, LITERAL); |
343 | 1.24M | } |
344 | 2.17M | break; |
345 | | |
346 | 147k | case CMARK_NODE_CODE: |
347 | 147k | code = cmark_node_get_literal(node); |
348 | 147k | code_len = strlen(code); |
349 | 147k | numticks = shortest_unused_backtick_sequence(code); |
350 | 147k | has_nonspace = false; |
351 | 165k | for (i=0; i < code_len; i++) { |
352 | 158k | if (code[i] != ' ') { |
353 | 141k | has_nonspace = true; |
354 | 141k | break; |
355 | 141k | } |
356 | 158k | } |
357 | 147k | extra_spaces = code_len == 0 || |
358 | 147k | code[0] == '`' || code[code_len - 1] == '`' || |
359 | 147k | (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); |
360 | 439k | for (i = 0; i < numticks; i++) { |
361 | 291k | LIT("`"); |
362 | 291k | } |
363 | 147k | if (extra_spaces) { |
364 | 2.99k | LIT(" "); |
365 | 2.99k | } |
366 | 147k | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); |
367 | 147k | if (extra_spaces) { |
368 | 2.99k | LIT(" "); |
369 | 2.99k | } |
370 | 439k | for (i = 0; i < numticks; i++) { |
371 | 291k | LIT("`"); |
372 | 291k | } |
373 | 147k | break; |
374 | | |
375 | 296k | case CMARK_NODE_HTML_INLINE: |
376 | 296k | OUT(cmark_node_get_literal(node), false, LITERAL); |
377 | 296k | break; |
378 | | |
379 | 0 | case CMARK_NODE_CUSTOM_INLINE: |
380 | 0 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
381 | 0 | false, LITERAL); |
382 | 0 | break; |
383 | | |
384 | 273k | case CMARK_NODE_STRONG: |
385 | 273k | if (entering) { |
386 | 136k | LIT("**"); |
387 | 136k | } else { |
388 | 136k | LIT("**"); |
389 | 136k | } |
390 | 273k | break; |
391 | | |
392 | 570k | case CMARK_NODE_EMPH: |
393 | | // If we have EMPH(EMPH(x)), we need to use *_x_* |
394 | | // because **x** is STRONG(x): |
395 | 570k | if (node->parent && node->parent->type == CMARK_NODE_EMPH && |
396 | 570k | node->next == NULL && node->prev == NULL) { |
397 | 1.51k | emph_delim = "_"; |
398 | 568k | } else { |
399 | 568k | emph_delim = "*"; |
400 | 568k | } |
401 | 570k | if (entering) { |
402 | 285k | LIT(emph_delim); |
403 | 285k | } else { |
404 | 285k | LIT(emph_delim); |
405 | 285k | } |
406 | 570k | break; |
407 | | |
408 | 500k | case CMARK_NODE_LINK: |
409 | 500k | if (is_autolink(node)) { |
410 | 90.1k | if (entering) { |
411 | 90.1k | LIT("<"); |
412 | 90.1k | if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { |
413 | 72.6k | LIT((const char *)cmark_node_get_url(node) + 7); |
414 | 72.6k | } else { |
415 | 17.5k | LIT((const char *)cmark_node_get_url(node)); |
416 | 17.5k | } |
417 | 90.1k | LIT(">"); |
418 | | // return signal to skip contents of node... |
419 | 90.1k | return 0; |
420 | 90.1k | } |
421 | 410k | } else { |
422 | 410k | if (entering) { |
423 | 205k | LIT("["); |
424 | 205k | } else { |
425 | 205k | LIT("]("); |
426 | 205k | OUT(cmark_node_get_url(node), false, URL); |
427 | 205k | title = cmark_node_get_title(node); |
428 | 205k | if (strlen(title) > 0) { |
429 | 2.47k | LIT(" \""); |
430 | 2.47k | OUT(title, false, TITLE); |
431 | 2.47k | LIT("\""); |
432 | 2.47k | } |
433 | 205k | LIT(")"); |
434 | 205k | } |
435 | 410k | } |
436 | 410k | break; |
437 | | |
438 | 410k | case CMARK_NODE_IMAGE: |
439 | 17.3k | if (entering) { |
440 | 8.69k | LIT("; |
443 | 8.69k | OUT(cmark_node_get_url(node), false, URL); |
444 | 8.69k | title = cmark_node_get_title(node); |
445 | 8.69k | if (strlen(title) > 0) { |
446 | 1.04k | OUT(" \"", allow_wrap, LITERAL); |
447 | 1.04k | OUT(title, false, TITLE); |
448 | 1.04k | LIT("\""); |
449 | 1.04k | } |
450 | 8.69k | LIT(")"); |
451 | 8.69k | } |
452 | 17.3k | break; |
453 | | |
454 | 0 | default: |
455 | 0 | assert(false); |
456 | 0 | break; |
457 | 18.1M | } |
458 | | |
459 | 18.0M | return 1; |
460 | 18.1M | } |
461 | | |
462 | 42.5k | char *cmark_render_commonmark(cmark_node *root, int options, int width) { |
463 | 42.5k | if (options & CMARK_OPT_HARDBREAKS) { |
464 | | // disable breaking on width, since it has |
465 | | // a different meaning with OPT_HARDBREAKS |
466 | 19.2k | width = 0; |
467 | 19.2k | } |
468 | 42.5k | return cmark_render(root, options, width, outc, S_render_node); |
469 | 42.5k | } |