Coverage Report

Created: 2026-01-13 06:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/cmark/src/commonmark.c
Line
Count
Source
1
#include <assert.h>
2
#include <stdbool.h>
3
#include <stdint.h>
4
#include <stdio.h>
5
#include <stdlib.h>
6
#include <string.h>
7
8
#include "cmark.h"
9
#include "node.h"
10
#include "buffer.h"
11
#include "utf8.h"
12
#include "scanners.h"
13
#include "render.h"
14
15
606k
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
16
6.33M
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
17
1.74M
#define CR() renderer->cr(renderer)
18
3.44M
#define BLANKLINE() renderer->blankline(renderer)
19
708k
#define ENCODED_SIZE 20
20
70
#define LISTMARKER_SIZE 20
21
22
// Functions to convert cmark_nodes to commonmark strings.
23
24
static inline void outc(cmark_renderer *renderer, cmark_escaping escape,
25
25.9M
                        int32_t c, unsigned char nextc) {
26
25.9M
  bool needs_escaping = false;
27
25.9M
  bool follows_digit =
28
25.9M
      renderer->buffer->size > 0 &&
29
25.9M
      cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
30
25.9M
  char encoded[ENCODED_SIZE];
31
25.9M
  int options = renderer->options;
32
33
25.9M
  needs_escaping =
34
25.9M
      c < 0x80 && escape != LITERAL &&
35
6.48M
      ((escape == NORMAL &&
36
6.47M
        (c < 0x20 ||
37
5.76M
         c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38
4.85M
         c == '>' || c == '\\' || c == '`' ||
39
3.60M
   (c == '!' && (!nextc || nextc == '[')) ||
40
3.60M
         (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
41
3.60M
         ((CMARK_OPT_SMART & options) &&
42
1.99M
            ((c == '-' && nextc == '-') ||
43
1.99M
             (c == '.' && nextc == '.') ||
44
1.99M
             c == '"' || c == '\'')) ||
45
3.60M
         (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
46
          // begin_content doesn't get set to false til we've passed digits
47
          // at the beginning of line, so...
48
19.7k
          !follows_digit) ||
49
3.58M
         (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
50
113
          (nextc == 0 || cmark_isspace(nextc))))) ||
51
3.59M
       (escape == URL &&
52
15.6k
        (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
53
15.1k
         c == ')' || c == '(')) ||
54
3.59M
       (escape == TITLE &&
55
93
        (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
56
57
25.9M
  if (needs_escaping) {
58
2.89M
    if (escape == URL && cmark_isspace(c)) {
59
      // use percent encoding for spaces
60
0
      snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
61
0
      cmark_strbuf_puts(renderer->buffer, encoded);
62
0
      renderer->column += 3;
63
2.89M
    } else if (cmark_ispunct(c)) {
64
2.18M
      cmark_render_ascii(renderer, "\\");
65
2.18M
      cmark_render_code_point(renderer, c);
66
2.18M
    } else { // render as entity
67
708k
      snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
68
708k
      cmark_strbuf_puts(renderer->buffer, encoded);
69
708k
      renderer->column += (int)strlen(encoded);
70
708k
    }
71
23.0M
  } else {
72
23.0M
    cmark_render_code_point(renderer, c);
73
23.0M
  }
74
25.9M
}
75
76
35.1k
static int longest_backtick_sequence(const char *code) {
77
35.1k
  int longest = 0;
78
35.1k
  int current = 0;
79
35.1k
  size_t i = 0;
80
35.1k
  size_t code_len = strlen(code);
81
5.22M
  while (i <= code_len) {
82
5.18M
    if (code[i] == '`') {
83
1.15M
      current++;
84
4.02M
    } else {
85
4.02M
      if (current > longest) {
86
17.4k
        longest = current;
87
17.4k
      }
88
4.02M
      current = 0;
89
4.02M
    }
90
5.18M
    i++;
91
5.18M
  }
92
35.1k
  return longest;
93
35.1k
}
94
95
17.3k
static int shortest_unused_backtick_sequence(const char *code) {
96
  // note: if the shortest sequence is >= 32, this returns 32
97
  // so as not to overflow the bit array.
98
17.3k
  uint32_t used = 1;
99
17.3k
  int current = 0;
100
17.3k
  size_t i = 0;
101
17.3k
  size_t code_len = strlen(code);
102
62.7M
  while (i <= code_len) {
103
62.7M
    if (code[i] == '`') {
104
1.40M
      current++;
105
61.3M
    } else {
106
61.3M
      if (current > 0 && current < 32) {
107
70.0k
        used |= (1U << current);
108
70.0k
      }
109
61.3M
      current = 0;
110
61.3M
    }
111
62.7M
    i++;
112
62.7M
  }
113
  // return number of first bit that is 0:
114
17.3k
  i = 0;
115
59.3k
  while (i < 32 && used & 1) {
116
42.0k
    used = used >> 1;
117
42.0k
    i++;
118
42.0k
  }
119
17.3k
  return (int)i;
120
17.3k
}
121
122
8.81k
static bool is_autolink(cmark_node *node) {
123
8.81k
  const unsigned char *title;
124
8.81k
  const unsigned char *url;
125
8.81k
  cmark_node *link_text;
126
127
8.81k
  if (node->type != CMARK_NODE_LINK) {
128
0
    return false;
129
0
  }
130
131
8.81k
  url = node->as.link.url;
132
8.81k
  if (url == NULL || _scan_scheme(url) == 0) {
133
8.13k
    return false;
134
8.13k
  }
135
136
689
  title = node->as.link.title;
137
  // if it has a title, we can't treat it as an autolink:
138
689
  if (title && title[0]) {
139
0
    return false;
140
0
  }
141
142
689
  link_text = node->first_child;
143
689
  if (link_text == NULL) {
144
0
    return false;
145
0
  }
146
689
  cmark_consolidate_text_nodes(link_text);
147
689
  if (strncmp((const char *)url, "mailto:", 7) == 0) {
148
472
    url += 7;
149
472
  }
150
689
  return link_text->data != NULL &&
151
689
         strcmp((const char *)url, (char *)link_text->data) == 0;
152
689
}
153
154
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
155
11.4M
                         cmark_event_type ev_type, int options) {
156
11.4M
  cmark_node *tmp;
157
11.4M
  int list_number;
158
11.4M
  cmark_delim_type list_delim;
159
11.4M
  size_t numticks;
160
11.4M
  bool extra_spaces;
161
11.4M
  size_t i;
162
11.4M
  bool entering = (ev_type == CMARK_EVENT_ENTER);
163
11.4M
  const char *info, *code, *title;
164
11.4M
  char fencechar[2] = {'\0', '\0'};
165
11.4M
  size_t code_len;
166
11.4M
  char listmarker[LISTMARKER_SIZE];
167
11.4M
  const char *emph_delim;
168
11.4M
  bool first_in_list_item;
169
11.4M
  bufsize_t marker_width;
170
11.4M
  bool has_nonspace;
171
11.4M
  bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
172
284k
                    !(CMARK_OPT_HARDBREAKS & options);
173
174
  // Don't adjust tight list status til we've started the list.
175
  // Otherwise we lose the blank line between a paragraph and
176
  // a following list.
177
11.4M
  if (entering) {
178
6.00M
    if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
179
43.5k
      renderer->in_tight_list_item = node->parent->parent->as.list.tight;
180
43.5k
    }
181
6.00M
  } else {
182
5.41M
    if (node->type == CMARK_NODE_LIST) {
183
1.64M
      renderer->in_tight_list_item =
184
1.64M
        node->parent &&
185
1.64M
        node->parent->type == CMARK_NODE_ITEM &&
186
39.9k
        node->parent->parent->as.list.tight;
187
1.64M
    }
188
5.41M
  }
189
190
11.4M
  switch (node->type) {
191
384
  case CMARK_NODE_DOCUMENT:
192
384
    break;
193
194
3.27M
  case CMARK_NODE_BLOCK_QUOTE:
195
3.27M
    if (entering) {
196
1.63M
      LIT("> ");
197
1.63M
      renderer->begin_content = true;
198
1.63M
      cmark_strbuf_puts(renderer->prefix, "> ");
199
1.63M
    } else {
200
1.63M
      cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
201
1.63M
      BLANKLINE();
202
1.63M
    }
203
3.27M
    break;
204
205
3.29M
  case CMARK_NODE_LIST:
206
3.29M
    if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) {
207
      // this ensures that a following indented code block or list will be
208
      // inteprereted correctly.
209
29
      CR();
210
29
      LIT("<!-- end list -->");
211
29
      BLANKLINE();
212
29
    }
213
3.29M
    break;
214
215
3.29M
  case CMARK_NODE_ITEM:
216
3.29M
    if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
217
3.29M
      marker_width = 4;
218
3.29M
    } else {
219
70
      list_number = cmark_node_get_list_start(node->parent);
220
70
      list_delim = cmark_node_get_list_delim(node->parent);
221
70
      tmp = node;
222
74
      while (tmp->prev) {
223
4
        tmp = tmp->prev;
224
4
        list_number += 1;
225
4
      }
226
      // we ensure a width of at least 4 so
227
      // we get nice transition from single digits
228
      // to double
229
70
      snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
230
70
               list_delim == CMARK_PAREN_DELIM ? ")" : ".",
231
70
               list_number < 10 ? "  " : " ");
232
70
      marker_width = (bufsize_t)strlen(listmarker);
233
70
    }
234
3.29M
    if (entering) {
235
1.64M
      if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
236
1.64M
        LIT("  - ");
237
1.64M
        renderer->begin_content = true;
238
1.64M
      } else {
239
35
        LIT(listmarker);
240
35
        renderer->begin_content = true;
241
35
      }
242
1.64M
      if (node->first_child == NULL) {
243
1.60M
        BLANKLINE();
244
1.60M
      } else {
245
217k
        for (i = marker_width; i--;) {
246
174k
          cmark_strbuf_putc(renderer->prefix, ' ');
247
174k
        }
248
43.5k
      }
249
1.64M
    } else {
250
1.64M
      cmark_strbuf_truncate(renderer->prefix,
251
1.64M
                            renderer->prefix->size - marker_width);
252
1.64M
      CR();
253
1.64M
    }
254
3.29M
    break;
255
256
102k
  case CMARK_NODE_HEADING:
257
102k
    if (entering) {
258
153k
      for (i = cmark_node_get_heading_level(node); i > 0; i--) {
259
102k
        LIT("#");
260
102k
      }
261
51.1k
      LIT(" ");
262
51.1k
      renderer->begin_content = true;
263
51.1k
      renderer->no_linebreaks = true;
264
51.1k
    } else {
265
51.1k
      renderer->no_linebreaks = false;
266
51.1k
      BLANKLINE();
267
51.1k
    }
268
102k
    break;
269
270
35.1k
  case CMARK_NODE_CODE_BLOCK:
271
272
35.1k
    first_in_list_item = node->prev == NULL && node->parent &&
273
57
                         node->parent->type == CMARK_NODE_ITEM;
274
275
35.1k
    if (!first_in_list_item) {
276
35.1k
      BLANKLINE();
277
35.1k
    }
278
35.1k
    info = cmark_node_get_fence_info(node);
279
35.1k
    fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
280
35.1k
    code = cmark_node_get_literal(node);
281
282
35.1k
    numticks = longest_backtick_sequence(code) + 1;
283
35.1k
    if (numticks < 3) {
284
35.1k
      numticks = 3;
285
35.1k
    }
286
1.05M
    for (i = 0; i < numticks; i++) {
287
1.01M
      LIT(fencechar);
288
1.01M
    }
289
35.1k
    LIT(" ");
290
35.1k
    OUT(info, false, LITERAL);
291
35.1k
    CR();
292
35.1k
    OUT(cmark_node_get_literal(node), false, LITERAL);
293
35.1k
    CR();
294
1.05M
    for (i = 0; i < numticks; i++) {
295
1.01M
      LIT(fencechar);
296
1.01M
    }
297
298
35.1k
    BLANKLINE();
299
35.1k
    break;
300
301
178
  case CMARK_NODE_HTML_BLOCK:
302
178
    BLANKLINE();
303
178
    OUT(cmark_node_get_literal(node), false, LITERAL);
304
178
    BLANKLINE();
305
178
    break;
306
307
0
  case CMARK_NODE_CUSTOM_BLOCK:
308
0
    BLANKLINE();
309
0
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
310
0
        false, LITERAL);
311
0
    BLANKLINE();
312
0
    break;
313
314
1
  case CMARK_NODE_THEMATIC_BREAK:
315
1
    BLANKLINE();
316
1
    LIT("-----");
317
1
    BLANKLINE();
318
1
    break;
319
320
158k
  case CMARK_NODE_PARAGRAPH:
321
158k
    if (!entering) {
322
79.3k
      BLANKLINE();
323
79.3k
    }
324
158k
    break;
325
326
356k
  case CMARK_NODE_TEXT:
327
356k
    OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
328
356k
    break;
329
330
939
  case CMARK_NODE_LINEBREAK:
331
939
    if (!(CMARK_OPT_HARDBREAKS & options)) {
332
910
      LIT("  ");
333
910
    }
334
939
    CR();
335
939
    break;
336
337
179k
  case CMARK_NODE_SOFTBREAK:
338
179k
    if (CMARK_OPT_HARDBREAKS & options) {
339
27.0k
      LIT("  ");
340
27.0k
      CR();
341
152k
    } else if (!renderer->no_linebreaks && renderer->width == 0 &&
342
98
               !(CMARK_OPT_HARDBREAKS & options) &&
343
98
               !(CMARK_OPT_NOBREAKS & options)) {
344
66
      CR();
345
152k
    } else {
346
152k
      OUT(" ", allow_wrap, LITERAL);
347
152k
    }
348
179k
    break;
349
350
17.3k
  case CMARK_NODE_CODE:
351
17.3k
    code = cmark_node_get_literal(node);
352
17.3k
    code_len = strlen(code);
353
17.3k
    numticks = shortest_unused_backtick_sequence(code);
354
17.3k
    has_nonspace = false;
355
18.0k
    for (i=0; i < code_len; i++) {
356
17.6k
      if (code[i] != ' ') {
357
16.9k
        has_nonspace = true;
358
16.9k
        break;
359
16.9k
      }
360
17.6k
    }
361
17.3k
    extra_spaces = code_len == 0 ||
362
17.3k
            code[0] == '`' || code[code_len - 1] == '`' ||
363
17.3k
            (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' ');
364
59.3k
    for (i = 0; i < numticks; i++) {
365
42.0k
      LIT("`");
366
42.0k
    }
367
17.3k
    if (extra_spaces) {
368
4
      LIT(" ");
369
4
    }
370
17.3k
    OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
371
17.3k
    if (extra_spaces) {
372
4
      LIT(" ");
373
4
    }
374
59.3k
    for (i = 0; i < numticks; i++) {
375
42.0k
      LIT("`");
376
42.0k
    }
377
17.3k
    break;
378
379
4.42k
  case CMARK_NODE_HTML_INLINE:
380
4.42k
    OUT(cmark_node_get_literal(node), false, LITERAL);
381
4.42k
    break;
382
383
0
  case CMARK_NODE_CUSTOM_INLINE:
384
0
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
385
0
        false, LITERAL);
386
0
    break;
387
388
668k
  case CMARK_NODE_STRONG:
389
668k
    if (entering) {
390
334k
      LIT("**");
391
334k
    } else {
392
334k
      LIT("**");
393
334k
    }
394
668k
    break;
395
396
29.4k
  case CMARK_NODE_EMPH:
397
    // If we have EMPH(EMPH(x)), we need to use *_x_*
398
    // because **x** is STRONG(x):
399
29.4k
    if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
400
13.4k
        node->next == NULL && node->prev == NULL) {
401
56
      emph_delim = "_";
402
29.4k
    } else {
403
29.4k
      emph_delim = "*";
404
29.4k
    }
405
29.4k
    if (entering) {
406
14.7k
      LIT(emph_delim);
407
14.7k
    } else {
408
14.7k
      LIT(emph_delim);
409
14.7k
    }
410
29.4k
    break;
411
412
8.81k
  case CMARK_NODE_LINK:
413
8.81k
    if (is_autolink(node)) {
414
685
      if (entering) {
415
685
        LIT("<");
416
685
        if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
417
472
          LIT((const char *)cmark_node_get_url(node) + 7);
418
472
        } else {
419
213
          LIT((const char *)cmark_node_get_url(node));
420
213
        }
421
685
        LIT(">");
422
        // return signal to skip contents of node...
423
685
        return 0;
424
685
      }
425
8.13k
    } else {
426
8.13k
      if (entering) {
427
4.06k
        LIT("[");
428
4.06k
      } else {
429
4.06k
        LIT("](");
430
4.06k
        OUT(cmark_node_get_url(node), false, URL);
431
4.06k
        title = cmark_node_get_title(node);
432
4.06k
        if (strlen(title) > 0) {
433
3
          LIT(" \"");
434
3
          OUT(title, false, TITLE);
435
3
          LIT("\"");
436
3
        }
437
4.06k
        LIT(")");
438
4.06k
      }
439
8.13k
    }
440
8.13k
    break;
441
442
8.13k
  case CMARK_NODE_IMAGE:
443
248
    if (entering) {
444
124
      LIT("![");
445
124
    } else {
446
124
      LIT("](");
447
124
      OUT(cmark_node_get_url(node), false, URL);
448
124
      title = cmark_node_get_title(node);
449
124
      if (strlen(title) > 0) {
450
2
        OUT(" \"", allow_wrap, LITERAL);
451
2
        OUT(title, false, TITLE);
452
2
        LIT("\"");
453
2
      }
454
124
      LIT(")");
455
124
    }
456
248
    break;
457
458
0
  default:
459
0
    assert(false);
460
0
    break;
461
11.4M
  }
462
463
11.4M
  return 1;
464
11.4M
}
465
466
192
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
467
192
  if (options & CMARK_OPT_HARDBREAKS) {
468
    // disable breaking on width, since it has
469
    // a different meaning with OPT_HARDBREAKS
470
36
    width = 0;
471
36
  }
472
192
  return cmark_render(root, options, width, outc, S_render_node);
473
192
}