/src/cmark/src/commonmark.c
Line | Count | Source |
1 | | #include <assert.h> |
2 | | #include <stdbool.h> |
3 | | #include <stdint.h> |
4 | | #include <stdio.h> |
5 | | #include <stdlib.h> |
6 | | #include <string.h> |
7 | | |
8 | | #include "cmark.h" |
9 | | #include "node.h" |
10 | | #include "buffer.h" |
11 | | #include "utf8.h" |
12 | | #include "scanners.h" |
13 | | #include "render.h" |
14 | | |
15 | 606k | #define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping) |
16 | 6.33M | #define LIT(s) renderer->out(renderer, s, false, LITERAL) |
17 | 1.74M | #define CR() renderer->cr(renderer) |
18 | 3.44M | #define BLANKLINE() renderer->blankline(renderer) |
19 | 708k | #define ENCODED_SIZE 20 |
20 | 70 | #define LISTMARKER_SIZE 20 |
21 | | |
22 | | // Functions to convert cmark_nodes to commonmark strings. |
23 | | |
24 | | static inline void outc(cmark_renderer *renderer, cmark_escaping escape, |
25 | 25.9M | int32_t c, unsigned char nextc) { |
26 | 25.9M | bool needs_escaping = false; |
27 | 25.9M | bool follows_digit = |
28 | 25.9M | renderer->buffer->size > 0 && |
29 | 25.9M | cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]); |
30 | 25.9M | char encoded[ENCODED_SIZE]; |
31 | 25.9M | int options = renderer->options; |
32 | | |
33 | 25.9M | needs_escaping = |
34 | 25.9M | c < 0x80 && escape != LITERAL && |
35 | 6.48M | ((escape == NORMAL && |
36 | 6.47M | (c < 0x20 || |
37 | 5.76M | c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' || |
38 | 4.85M | c == '>' || c == '\\' || c == '`' || |
39 | 3.60M | (c == '!' && (!nextc || nextc == '[')) || |
40 | 3.60M | (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') || |
41 | 3.60M | ((CMARK_OPT_SMART & options) && |
42 | 1.99M | ((c == '-' && nextc == '-') || |
43 | 1.99M | (c == '.' && nextc == '.') || |
44 | 1.99M | c == '"' || c == '\'')) || |
45 | 3.60M | (renderer->begin_content && (c == '-' || c == '+' || c == '=') && |
46 | | // begin_content doesn't get set to false til we've passed digits |
47 | | // at the beginning of line, so... |
48 | 19.7k | !follows_digit) || |
49 | 3.58M | (renderer->begin_content && (c == '.' || c == ')') && follows_digit && |
50 | 113 | (nextc == 0 || cmark_isspace(nextc))))) || |
51 | 3.59M | (escape == URL && |
52 | 15.6k | (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' || |
53 | 15.1k | c == ')' || c == '(')) || |
54 | 3.59M | (escape == TITLE && |
55 | 93 | (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\'))); |
56 | | |
57 | 25.9M | if (needs_escaping) { |
58 | 2.89M | if (escape == URL && cmark_isspace(c)) { |
59 | | // use percent encoding for spaces |
60 | 0 | snprintf(encoded, ENCODED_SIZE, "%%%2X", c); |
61 | 0 | cmark_strbuf_puts(renderer->buffer, encoded); |
62 | 0 | renderer->column += 3; |
63 | 2.89M | } else if (cmark_ispunct(c)) { |
64 | 2.18M | cmark_render_ascii(renderer, "\\"); |
65 | 2.18M | cmark_render_code_point(renderer, c); |
66 | 2.18M | } else { // render as entity |
67 | 708k | snprintf(encoded, ENCODED_SIZE, "&#%d;", c); |
68 | 708k | cmark_strbuf_puts(renderer->buffer, encoded); |
69 | 708k | renderer->column += (int)strlen(encoded); |
70 | 708k | } |
71 | 23.0M | } else { |
72 | 23.0M | cmark_render_code_point(renderer, c); |
73 | 23.0M | } |
74 | 25.9M | } |
75 | | |
76 | 35.1k | static int longest_backtick_sequence(const char *code) { |
77 | 35.1k | int longest = 0; |
78 | 35.1k | int current = 0; |
79 | 35.1k | size_t i = 0; |
80 | 35.1k | size_t code_len = strlen(code); |
81 | 5.22M | while (i <= code_len) { |
82 | 5.18M | if (code[i] == '`') { |
83 | 1.15M | current++; |
84 | 4.02M | } else { |
85 | 4.02M | if (current > longest) { |
86 | 17.4k | longest = current; |
87 | 17.4k | } |
88 | 4.02M | current = 0; |
89 | 4.02M | } |
90 | 5.18M | i++; |
91 | 5.18M | } |
92 | 35.1k | return longest; |
93 | 35.1k | } |
94 | | |
95 | 17.3k | static int shortest_unused_backtick_sequence(const char *code) { |
96 | | // note: if the shortest sequence is >= 32, this returns 32 |
97 | | // so as not to overflow the bit array. |
98 | 17.3k | uint32_t used = 1; |
99 | 17.3k | int current = 0; |
100 | 17.3k | size_t i = 0; |
101 | 17.3k | size_t code_len = strlen(code); |
102 | 62.7M | while (i <= code_len) { |
103 | 62.7M | if (code[i] == '`') { |
104 | 1.40M | current++; |
105 | 61.3M | } else { |
106 | 61.3M | if (current > 0 && current < 32) { |
107 | 70.0k | used |= (1U << current); |
108 | 70.0k | } |
109 | 61.3M | current = 0; |
110 | 61.3M | } |
111 | 62.7M | i++; |
112 | 62.7M | } |
113 | | // return number of first bit that is 0: |
114 | 17.3k | i = 0; |
115 | 59.3k | while (i < 32 && used & 1) { |
116 | 42.0k | used = used >> 1; |
117 | 42.0k | i++; |
118 | 42.0k | } |
119 | 17.3k | return (int)i; |
120 | 17.3k | } |
121 | | |
122 | 8.81k | static bool is_autolink(cmark_node *node) { |
123 | 8.81k | const unsigned char *title; |
124 | 8.81k | const unsigned char *url; |
125 | 8.81k | cmark_node *link_text; |
126 | | |
127 | 8.81k | if (node->type != CMARK_NODE_LINK) { |
128 | 0 | return false; |
129 | 0 | } |
130 | | |
131 | 8.81k | url = node->as.link.url; |
132 | 8.81k | if (url == NULL || _scan_scheme(url) == 0) { |
133 | 8.13k | return false; |
134 | 8.13k | } |
135 | | |
136 | 689 | title = node->as.link.title; |
137 | | // if it has a title, we can't treat it as an autolink: |
138 | 689 | if (title && title[0]) { |
139 | 0 | return false; |
140 | 0 | } |
141 | | |
142 | 689 | link_text = node->first_child; |
143 | 689 | if (link_text == NULL) { |
144 | 0 | return false; |
145 | 0 | } |
146 | 689 | cmark_consolidate_text_nodes(link_text); |
147 | 689 | if (strncmp((const char *)url, "mailto:", 7) == 0) { |
148 | 472 | url += 7; |
149 | 472 | } |
150 | 689 | return link_text->data != NULL && |
151 | 689 | strcmp((const char *)url, (char *)link_text->data) == 0; |
152 | 689 | } |
153 | | |
154 | | static int S_render_node(cmark_renderer *renderer, cmark_node *node, |
155 | 11.4M | cmark_event_type ev_type, int options) { |
156 | 11.4M | cmark_node *tmp; |
157 | 11.4M | int list_number; |
158 | 11.4M | cmark_delim_type list_delim; |
159 | 11.4M | size_t numticks; |
160 | 11.4M | bool extra_spaces; |
161 | 11.4M | size_t i; |
162 | 11.4M | bool entering = (ev_type == CMARK_EVENT_ENTER); |
163 | 11.4M | const char *info, *code, *title; |
164 | 11.4M | char fencechar[2] = {'\0', '\0'}; |
165 | 11.4M | size_t code_len; |
166 | 11.4M | char listmarker[LISTMARKER_SIZE]; |
167 | 11.4M | const char *emph_delim; |
168 | 11.4M | bool first_in_list_item; |
169 | 11.4M | bufsize_t marker_width; |
170 | 11.4M | bool has_nonspace; |
171 | 11.4M | bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) && |
172 | 284k | !(CMARK_OPT_HARDBREAKS & options); |
173 | | |
174 | | // Don't adjust tight list status til we've started the list. |
175 | | // Otherwise we lose the blank line between a paragraph and |
176 | | // a following list. |
177 | 11.4M | if (entering) { |
178 | 6.00M | if (node->parent && node->parent->type == CMARK_NODE_ITEM) { |
179 | 43.5k | renderer->in_tight_list_item = node->parent->parent->as.list.tight; |
180 | 43.5k | } |
181 | 6.00M | } else { |
182 | 5.41M | if (node->type == CMARK_NODE_LIST) { |
183 | 1.64M | renderer->in_tight_list_item = |
184 | 1.64M | node->parent && |
185 | 1.64M | node->parent->type == CMARK_NODE_ITEM && |
186 | 39.9k | node->parent->parent->as.list.tight; |
187 | 1.64M | } |
188 | 5.41M | } |
189 | | |
190 | 11.4M | switch (node->type) { |
191 | 384 | case CMARK_NODE_DOCUMENT: |
192 | 384 | break; |
193 | | |
194 | 3.27M | case CMARK_NODE_BLOCK_QUOTE: |
195 | 3.27M | if (entering) { |
196 | 1.63M | LIT("> "); |
197 | 1.63M | renderer->begin_content = true; |
198 | 1.63M | cmark_strbuf_puts(renderer->prefix, "> "); |
199 | 1.63M | } else { |
200 | 1.63M | cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2); |
201 | 1.63M | BLANKLINE(); |
202 | 1.63M | } |
203 | 3.27M | break; |
204 | | |
205 | 3.29M | case CMARK_NODE_LIST: |
206 | 3.29M | if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) { |
207 | | // this ensures that a following indented code block or list will be |
208 | | // inteprereted correctly. |
209 | 29 | CR(); |
210 | 29 | LIT("<!-- end list -->"); |
211 | 29 | BLANKLINE(); |
212 | 29 | } |
213 | 3.29M | break; |
214 | | |
215 | 3.29M | case CMARK_NODE_ITEM: |
216 | 3.29M | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
217 | 3.29M | marker_width = 4; |
218 | 3.29M | } else { |
219 | 70 | list_number = cmark_node_get_list_start(node->parent); |
220 | 70 | list_delim = cmark_node_get_list_delim(node->parent); |
221 | 70 | tmp = node; |
222 | 74 | while (tmp->prev) { |
223 | 4 | tmp = tmp->prev; |
224 | 4 | list_number += 1; |
225 | 4 | } |
226 | | // we ensure a width of at least 4 so |
227 | | // we get nice transition from single digits |
228 | | // to double |
229 | 70 | snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number, |
230 | 70 | list_delim == CMARK_PAREN_DELIM ? ")" : ".", |
231 | 70 | list_number < 10 ? " " : " "); |
232 | 70 | marker_width = (bufsize_t)strlen(listmarker); |
233 | 70 | } |
234 | 3.29M | if (entering) { |
235 | 1.64M | if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) { |
236 | 1.64M | LIT(" - "); |
237 | 1.64M | renderer->begin_content = true; |
238 | 1.64M | } else { |
239 | 35 | LIT(listmarker); |
240 | 35 | renderer->begin_content = true; |
241 | 35 | } |
242 | 1.64M | if (node->first_child == NULL) { |
243 | 1.60M | BLANKLINE(); |
244 | 1.60M | } else { |
245 | 217k | for (i = marker_width; i--;) { |
246 | 174k | cmark_strbuf_putc(renderer->prefix, ' '); |
247 | 174k | } |
248 | 43.5k | } |
249 | 1.64M | } else { |
250 | 1.64M | cmark_strbuf_truncate(renderer->prefix, |
251 | 1.64M | renderer->prefix->size - marker_width); |
252 | 1.64M | CR(); |
253 | 1.64M | } |
254 | 3.29M | break; |
255 | | |
256 | 102k | case CMARK_NODE_HEADING: |
257 | 102k | if (entering) { |
258 | 153k | for (i = cmark_node_get_heading_level(node); i > 0; i--) { |
259 | 102k | LIT("#"); |
260 | 102k | } |
261 | 51.1k | LIT(" "); |
262 | 51.1k | renderer->begin_content = true; |
263 | 51.1k | renderer->no_linebreaks = true; |
264 | 51.1k | } else { |
265 | 51.1k | renderer->no_linebreaks = false; |
266 | 51.1k | BLANKLINE(); |
267 | 51.1k | } |
268 | 102k | break; |
269 | | |
270 | 35.1k | case CMARK_NODE_CODE_BLOCK: |
271 | | |
272 | 35.1k | first_in_list_item = node->prev == NULL && node->parent && |
273 | 57 | node->parent->type == CMARK_NODE_ITEM; |
274 | | |
275 | 35.1k | if (!first_in_list_item) { |
276 | 35.1k | BLANKLINE(); |
277 | 35.1k | } |
278 | 35.1k | info = cmark_node_get_fence_info(node); |
279 | 35.1k | fencechar[0] = strchr(info, '`') == NULL ? '`' : '~'; |
280 | 35.1k | code = cmark_node_get_literal(node); |
281 | | |
282 | 35.1k | numticks = longest_backtick_sequence(code) + 1; |
283 | 35.1k | if (numticks < 3) { |
284 | 35.1k | numticks = 3; |
285 | 35.1k | } |
286 | 1.05M | for (i = 0; i < numticks; i++) { |
287 | 1.01M | LIT(fencechar); |
288 | 1.01M | } |
289 | 35.1k | LIT(" "); |
290 | 35.1k | OUT(info, false, LITERAL); |
291 | 35.1k | CR(); |
292 | 35.1k | OUT(cmark_node_get_literal(node), false, LITERAL); |
293 | 35.1k | CR(); |
294 | 1.05M | for (i = 0; i < numticks; i++) { |
295 | 1.01M | LIT(fencechar); |
296 | 1.01M | } |
297 | | |
298 | 35.1k | BLANKLINE(); |
299 | 35.1k | break; |
300 | | |
301 | 178 | case CMARK_NODE_HTML_BLOCK: |
302 | 178 | BLANKLINE(); |
303 | 178 | OUT(cmark_node_get_literal(node), false, LITERAL); |
304 | 178 | BLANKLINE(); |
305 | 178 | break; |
306 | | |
307 | 0 | case CMARK_NODE_CUSTOM_BLOCK: |
308 | 0 | BLANKLINE(); |
309 | 0 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
310 | 0 | false, LITERAL); |
311 | 0 | BLANKLINE(); |
312 | 0 | break; |
313 | | |
314 | 1 | case CMARK_NODE_THEMATIC_BREAK: |
315 | 1 | BLANKLINE(); |
316 | 1 | LIT("-----"); |
317 | 1 | BLANKLINE(); |
318 | 1 | break; |
319 | | |
320 | 158k | case CMARK_NODE_PARAGRAPH: |
321 | 158k | if (!entering) { |
322 | 79.3k | BLANKLINE(); |
323 | 79.3k | } |
324 | 158k | break; |
325 | | |
326 | 356k | case CMARK_NODE_TEXT: |
327 | 356k | OUT(cmark_node_get_literal(node), allow_wrap, NORMAL); |
328 | 356k | break; |
329 | | |
330 | 939 | case CMARK_NODE_LINEBREAK: |
331 | 939 | if (!(CMARK_OPT_HARDBREAKS & options)) { |
332 | 910 | LIT(" "); |
333 | 910 | } |
334 | 939 | CR(); |
335 | 939 | break; |
336 | | |
337 | 179k | case CMARK_NODE_SOFTBREAK: |
338 | 179k | if (CMARK_OPT_HARDBREAKS & options) { |
339 | 27.0k | LIT(" "); |
340 | 27.0k | CR(); |
341 | 152k | } else if (!renderer->no_linebreaks && renderer->width == 0 && |
342 | 98 | !(CMARK_OPT_HARDBREAKS & options) && |
343 | 98 | !(CMARK_OPT_NOBREAKS & options)) { |
344 | 66 | CR(); |
345 | 152k | } else { |
346 | 152k | OUT(" ", allow_wrap, LITERAL); |
347 | 152k | } |
348 | 179k | break; |
349 | | |
350 | 17.3k | case CMARK_NODE_CODE: |
351 | 17.3k | code = cmark_node_get_literal(node); |
352 | 17.3k | code_len = strlen(code); |
353 | 17.3k | numticks = shortest_unused_backtick_sequence(code); |
354 | 17.3k | has_nonspace = false; |
355 | 18.0k | for (i=0; i < code_len; i++) { |
356 | 17.6k | if (code[i] != ' ') { |
357 | 16.9k | has_nonspace = true; |
358 | 16.9k | break; |
359 | 16.9k | } |
360 | 17.6k | } |
361 | 17.3k | extra_spaces = code_len == 0 || |
362 | 17.3k | code[0] == '`' || code[code_len - 1] == '`' || |
363 | 17.3k | (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' '); |
364 | 59.3k | for (i = 0; i < numticks; i++) { |
365 | 42.0k | LIT("`"); |
366 | 42.0k | } |
367 | 17.3k | if (extra_spaces) { |
368 | 4 | LIT(" "); |
369 | 4 | } |
370 | 17.3k | OUT(cmark_node_get_literal(node), allow_wrap, LITERAL); |
371 | 17.3k | if (extra_spaces) { |
372 | 4 | LIT(" "); |
373 | 4 | } |
374 | 59.3k | for (i = 0; i < numticks; i++) { |
375 | 42.0k | LIT("`"); |
376 | 42.0k | } |
377 | 17.3k | break; |
378 | | |
379 | 4.42k | case CMARK_NODE_HTML_INLINE: |
380 | 4.42k | OUT(cmark_node_get_literal(node), false, LITERAL); |
381 | 4.42k | break; |
382 | | |
383 | 0 | case CMARK_NODE_CUSTOM_INLINE: |
384 | 0 | OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node), |
385 | 0 | false, LITERAL); |
386 | 0 | break; |
387 | | |
388 | 668k | case CMARK_NODE_STRONG: |
389 | 668k | if (entering) { |
390 | 334k | LIT("**"); |
391 | 334k | } else { |
392 | 334k | LIT("**"); |
393 | 334k | } |
394 | 668k | break; |
395 | | |
396 | 29.4k | case CMARK_NODE_EMPH: |
397 | | // If we have EMPH(EMPH(x)), we need to use *_x_* |
398 | | // because **x** is STRONG(x): |
399 | 29.4k | if (node->parent && node->parent->type == CMARK_NODE_EMPH && |
400 | 13.4k | node->next == NULL && node->prev == NULL) { |
401 | 56 | emph_delim = "_"; |
402 | 29.4k | } else { |
403 | 29.4k | emph_delim = "*"; |
404 | 29.4k | } |
405 | 29.4k | if (entering) { |
406 | 14.7k | LIT(emph_delim); |
407 | 14.7k | } else { |
408 | 14.7k | LIT(emph_delim); |
409 | 14.7k | } |
410 | 29.4k | break; |
411 | | |
412 | 8.81k | case CMARK_NODE_LINK: |
413 | 8.81k | if (is_autolink(node)) { |
414 | 685 | if (entering) { |
415 | 685 | LIT("<"); |
416 | 685 | if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) { |
417 | 472 | LIT((const char *)cmark_node_get_url(node) + 7); |
418 | 472 | } else { |
419 | 213 | LIT((const char *)cmark_node_get_url(node)); |
420 | 213 | } |
421 | 685 | LIT(">"); |
422 | | // return signal to skip contents of node... |
423 | 685 | return 0; |
424 | 685 | } |
425 | 8.13k | } else { |
426 | 8.13k | if (entering) { |
427 | 4.06k | LIT("["); |
428 | 4.06k | } else { |
429 | 4.06k | LIT("]("); |
430 | 4.06k | OUT(cmark_node_get_url(node), false, URL); |
431 | 4.06k | title = cmark_node_get_title(node); |
432 | 4.06k | if (strlen(title) > 0) { |
433 | 3 | LIT(" \""); |
434 | 3 | OUT(title, false, TITLE); |
435 | 3 | LIT("\""); |
436 | 3 | } |
437 | 4.06k | LIT(")"); |
438 | 4.06k | } |
439 | 8.13k | } |
440 | 8.13k | break; |
441 | | |
442 | 8.13k | case CMARK_NODE_IMAGE: |
443 | 248 | if (entering) { |
444 | 124 | LIT("; |
447 | 124 | OUT(cmark_node_get_url(node), false, URL); |
448 | 124 | title = cmark_node_get_title(node); |
449 | 124 | if (strlen(title) > 0) { |
450 | 2 | OUT(" \"", allow_wrap, LITERAL); |
451 | 2 | OUT(title, false, TITLE); |
452 | 2 | LIT("\""); |
453 | 2 | } |
454 | 124 | LIT(")"); |
455 | 124 | } |
456 | 248 | break; |
457 | | |
458 | 0 | default: |
459 | 0 | assert(false); |
460 | 0 | break; |
461 | 11.4M | } |
462 | | |
463 | 11.4M | return 1; |
464 | 11.4M | } |
465 | | |
466 | 192 | char *cmark_render_commonmark(cmark_node *root, int options, int width) { |
467 | 192 | if (options & CMARK_OPT_HARDBREAKS) { |
468 | | // disable breaking on width, since it has |
469 | | // a different meaning with OPT_HARDBREAKS |
470 | 36 | width = 0; |
471 | 36 | } |
472 | 192 | return cmark_render(root, options, width, outc, S_render_node); |
473 | 192 | } |