Coverage Report

Created: 2023-03-17 06:19

/src/cmark/src/commonmark.c
Line
Count
Source (jump to first uncovered line)
1
#include <stdlib.h>
2
#include <stdio.h>
3
#include <string.h>
4
#include <stdint.h>
5
#include <assert.h>
6
7
#include "config.h"
8
#include "cmark.h"
9
#include "node.h"
10
#include "buffer.h"
11
#include "utf8.h"
12
#include "scanners.h"
13
#include "render.h"
14
15
6.20M
#define OUT(s, wrap, escaping) renderer->out(renderer, s, wrap, escaping)
16
8.14M
#define LIT(s) renderer->out(renderer, s, false, LITERAL)
17
2.33M
#define CR() renderer->cr(renderer)
18
3.41M
#define BLANKLINE() renderer->blankline(renderer)
19
2.81M
#define ENCODED_SIZE 20
20
31.8k
#define LISTMARKER_SIZE 20
21
22
// Functions to convert cmark_nodes to commonmark strings.
23
24
static CMARK_INLINE void outc(cmark_renderer *renderer, cmark_escaping escape,
25
109M
                              int32_t c, unsigned char nextc) {
26
109M
  bool needs_escaping = false;
27
109M
  bool follows_digit =
28
109M
      renderer->buffer->size > 0 &&
29
109M
      cmark_isdigit(renderer->buffer->ptr[renderer->buffer->size - 1]);
30
109M
  char encoded[ENCODED_SIZE];
31
109M
  int options = renderer->options;
32
33
109M
  needs_escaping =
34
109M
      c < 0x80 && escape != LITERAL &&
35
109M
      ((escape == NORMAL &&
36
60.7M
        (c < 0x20 ||
37
56.2M
         c == '*' || c == '_' || c == '[' || c == ']' || c == '#' || c == '<' ||
38
56.2M
         c == '>' || c == '\\' || c == '`' ||
39
56.2M
   (c == '!' && (!nextc || nextc == '[')) ||
40
56.2M
         (c == '&' && cmark_isalpha(nextc)) || (c == '!' && nextc == '[') ||
41
56.2M
         ((CMARK_OPT_SMART & options) &&
42
44.8M
            ((c == '-' && nextc == '-') ||
43
20.7M
             (c == '.' && nextc == '.') ||
44
20.7M
             c == '"' || c == '\'')) ||
45
56.2M
         (renderer->begin_content && (c == '-' || c == '+' || c == '=') &&
46
          // begin_content doesn't get set to false til we've passed digits
47
          // at the beginning of line, so...
48
44.6M
          !follows_digit) ||
49
56.2M
         (renderer->begin_content && (c == '.' || c == ')') && follows_digit &&
50
44.5M
          (nextc == 0 || cmark_isspace(nextc))))) ||
51
60.7M
       (escape == URL &&
52
49.1M
        (c == '`' || c == '<' || c == '>' || cmark_isspace(c) || c == '\\' ||
53
3.73M
         c == ')' || c == '(')) ||
54
60.7M
       (escape == TITLE &&
55
49.0M
        (c == '`' || c == '<' || c == '>' || c == '"' || c == '\\')));
56
57
109M
  if (needs_escaping) {
58
11.7M
    if (escape == URL && cmark_isspace(c)) {
59
      // use percent encoding for spaces
60
596
      snprintf(encoded, ENCODED_SIZE, "%%%2X", c);
61
596
      cmark_strbuf_puts(renderer->buffer, encoded);
62
596
      renderer->column += 3;
63
11.7M
    } else if (cmark_ispunct(c)) {
64
8.95M
      cmark_render_ascii(renderer, "\\");
65
8.95M
      cmark_render_code_point(renderer, c);
66
8.95M
    } else { // render as entity
67
2.81M
      snprintf(encoded, ENCODED_SIZE, "&#%d;", c);
68
2.81M
      cmark_strbuf_puts(renderer->buffer, encoded);
69
2.81M
      renderer->column += strlen(encoded);
70
2.81M
    }
71
97.6M
  } else {
72
97.6M
    cmark_render_code_point(renderer, c);
73
97.6M
  }
74
109M
}
75
76
66.4k
static int longest_backtick_sequence(const char *code) {
77
66.4k
  int longest = 0;
78
66.4k
  int current = 0;
79
66.4k
  size_t i = 0;
80
66.4k
  size_t code_len = strlen(code);
81
19.4M
  while (i <= code_len) {
82
19.3M
    if (code[i] == '`') {
83
753k
      current++;
84
18.5M
    } else {
85
18.5M
      if (current > longest) {
86
3.40k
        longest = current;
87
3.40k
      }
88
18.5M
      current = 0;
89
18.5M
    }
90
19.3M
    i++;
91
19.3M
  }
92
66.4k
  return longest;
93
66.4k
}
94
95
147k
static int shortest_unused_backtick_sequence(const char *code) {
96
  // note: if the shortest sequence is >= 32, this returns 32
97
  // so as not to overflow the bit array.
98
147k
  uint32_t used = 1;
99
147k
  int current = 0;
100
147k
  size_t i = 0;
101
147k
  size_t code_len = strlen(code);
102
100M
  while (i <= code_len) {
103
100M
    if (code[i] == '`') {
104
4.18M
      current++;
105
95.9M
    } else {
106
95.9M
      if (current > 0 && current < 32) {
107
491k
        used |= (1U << current);
108
491k
      }
109
95.9M
      current = 0;
110
95.9M
    }
111
100M
    i++;
112
100M
  }
113
  // return number of first bit that is 0:
114
147k
  i = 0;
115
439k
  while (i < 32 && used & 1) {
116
291k
    used = used >> 1;
117
291k
    i++;
118
291k
  }
119
147k
  return (int)i;
120
147k
}
121
122
500k
static bool is_autolink(cmark_node *node) {
123
500k
  const unsigned char *title;
124
500k
  const unsigned char *url;
125
500k
  cmark_node *link_text;
126
127
500k
  if (node->type != CMARK_NODE_LINK) {
128
0
    return false;
129
0
  }
130
131
500k
  url = node->as.link.url;
132
500k
  if (url == NULL || _scan_scheme(url) == 0) {
133
351k
    return false;
134
351k
  }
135
136
149k
  title = node->as.link.title;
137
  // if it has a title, we can't treat it as an autolink:
138
149k
  if (title && title[0]) {
139
912
    return false;
140
912
  }
141
142
148k
  link_text = node->first_child;
143
148k
  if (link_text == NULL) {
144
3.06k
    return false;
145
3.06k
  }
146
145k
  cmark_consolidate_text_nodes(link_text);
147
145k
  if (strncmp((const char *)url, "mailto:", 7) == 0) {
148
74.2k
    url += 7;
149
74.2k
  }
150
145k
  return link_text->data != NULL &&
151
145k
         strcmp((const char *)url, (char *)link_text->data) == 0;
152
148k
}
153
154
static int S_render_node(cmark_renderer *renderer, cmark_node *node,
155
18.1M
                         cmark_event_type ev_type, int options) {
156
18.1M
  cmark_node *tmp;
157
18.1M
  int list_number;
158
18.1M
  cmark_delim_type list_delim;
159
18.1M
  size_t numticks;
160
18.1M
  bool extra_spaces;
161
18.1M
  size_t i;
162
18.1M
  bool entering = (ev_type == CMARK_EVENT_ENTER);
163
18.1M
  const char *info, *code, *title;
164
18.1M
  char fencechar[2] = {'\0', '\0'};
165
18.1M
  size_t code_len;
166
18.1M
  char listmarker[LISTMARKER_SIZE];
167
18.1M
  const char *emph_delim;
168
18.1M
  bool first_in_list_item;
169
18.1M
  bufsize_t marker_width;
170
18.1M
  bool has_nonspace;
171
18.1M
  bool allow_wrap = renderer->width > 0 && !(CMARK_OPT_NOBREAKS & options) &&
172
18.1M
                    !(CMARK_OPT_HARDBREAKS & options);
173
174
  // Don't adjust tight list status til we've started the list.
175
  // Otherwise we lose the blank line between a paragraph and
176
  // a following list.
177
18.1M
  if (entering) {
178
12.5M
    if (node->parent && node->parent->type == CMARK_NODE_ITEM) {
179
781k
      renderer->in_tight_list_item = node->parent->parent->as.list.tight;
180
781k
    }
181
12.5M
  } else {
182
5.58M
    if (node->type == CMARK_NODE_LIST) {
183
976k
      renderer->in_tight_list_item =
184
976k
        node->parent &&
185
976k
        node->parent->type == CMARK_NODE_ITEM &&
186
976k
        node->parent->parent->as.list.tight;
187
976k
    }
188
5.58M
  }
189
190
18.1M
  switch (node->type) {
191
85.1k
  case CMARK_NODE_DOCUMENT:
192
85.1k
    break;
193
194
3.99M
  case CMARK_NODE_BLOCK_QUOTE:
195
3.99M
    if (entering) {
196
1.99M
      LIT("> ");
197
1.99M
      renderer->begin_content = true;
198
1.99M
      cmark_strbuf_puts(renderer->prefix, "> ");
199
1.99M
    } else {
200
1.99M
      cmark_strbuf_truncate(renderer->prefix, renderer->prefix->size - 2);
201
1.99M
      BLANKLINE();
202
1.99M
    }
203
3.99M
    break;
204
205
1.95M
  case CMARK_NODE_LIST:
206
1.95M
    if (!entering && node->next && (node->next->type == CMARK_NODE_LIST)) {
207
      // this ensures that a following indented code block or list will be
208
      // inteprereted correctly.
209
169k
      CR();
210
169k
      LIT("<!-- end list -->");
211
169k
      BLANKLINE();
212
169k
    }
213
1.95M
    break;
214
215
2.15M
  case CMARK_NODE_ITEM:
216
2.15M
    if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
217
2.12M
      marker_width = 4;
218
2.12M
    } else {
219
31.8k
      list_number = cmark_node_get_list_start(node->parent);
220
31.8k
      list_delim = cmark_node_get_list_delim(node->parent);
221
31.8k
      tmp = node;
222
67.2k
      while (tmp->prev) {
223
35.3k
        tmp = tmp->prev;
224
35.3k
        list_number += 1;
225
35.3k
      }
226
      // we ensure a width of at least 4 so
227
      // we get nice transition from single digits
228
      // to double
229
31.8k
      snprintf(listmarker, LISTMARKER_SIZE, "%d%s%s", list_number,
230
31.8k
               list_delim == CMARK_PAREN_DELIM ? ")" : ".",
231
31.8k
               list_number < 10 ? "  " : " ");
232
31.8k
      marker_width = strlen(listmarker);
233
31.8k
    }
234
2.15M
    if (entering) {
235
1.07M
      if (cmark_node_get_list_type(node->parent) == CMARK_BULLET_LIST) {
236
1.06M
        LIT("  - ");
237
1.06M
        renderer->begin_content = true;
238
1.06M
      } else {
239
15.9k
        LIT(listmarker);
240
15.9k
        renderer->begin_content = true;
241
15.9k
      }
242
5.39M
      for (i = marker_width; i--;) {
243
4.31M
        cmark_strbuf_putc(renderer->prefix, ' ');
244
4.31M
      }
245
1.07M
    } else {
246
1.07M
      cmark_strbuf_truncate(renderer->prefix,
247
1.07M
                            renderer->prefix->size - marker_width);
248
1.07M
      CR();
249
1.07M
    }
250
2.15M
    break;
251
252
167k
  case CMARK_NODE_HEADING:
253
167k
    if (entering) {
254
232k
      for (i = cmark_node_get_heading_level(node); i > 0; i--) {
255
148k
        LIT("#");
256
148k
      }
257
83.8k
      LIT(" ");
258
83.8k
      renderer->begin_content = true;
259
83.8k
      renderer->no_linebreaks = true;
260
83.8k
    } else {
261
83.8k
      renderer->no_linebreaks = false;
262
83.8k
      BLANKLINE();
263
83.8k
    }
264
167k
    break;
265
266
66.4k
  case CMARK_NODE_CODE_BLOCK:
267
268
66.4k
    first_in_list_item = node->prev == NULL && node->parent &&
269
66.4k
                         node->parent->type == CMARK_NODE_ITEM;
270
271
66.4k
    if (!first_in_list_item) {
272
59.9k
      BLANKLINE();
273
59.9k
    }
274
66.4k
    info = cmark_node_get_fence_info(node);
275
66.4k
    fencechar[0] = strchr(info, '`') == NULL ? '`' : '~';
276
66.4k
    code = cmark_node_get_literal(node);
277
278
66.4k
    numticks = longest_backtick_sequence(code) + 1;
279
66.4k
    if (numticks < 3) {
280
64.5k
      numticks = 3;
281
64.5k
    }
282
698k
    for (i = 0; i < numticks; i++) {
283
631k
      LIT(fencechar);
284
631k
    }
285
66.4k
    LIT(" ");
286
66.4k
    OUT(info, false, LITERAL);
287
66.4k
    CR();
288
66.4k
    OUT(cmark_node_get_literal(node), false, LITERAL);
289
66.4k
    CR();
290
698k
    for (i = 0; i < numticks; i++) {
291
631k
      LIT(fencechar);
292
631k
    }
293
294
66.4k
    BLANKLINE();
295
66.4k
    break;
296
297
90.3k
  case CMARK_NODE_HTML_BLOCK:
298
90.3k
    BLANKLINE();
299
90.3k
    OUT(cmark_node_get_literal(node), false, LITERAL);
300
90.3k
    BLANKLINE();
301
90.3k
    break;
302
303
0
  case CMARK_NODE_CUSTOM_BLOCK:
304
0
    BLANKLINE();
305
0
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
306
0
        false, LITERAL);
307
0
    BLANKLINE();
308
0
    break;
309
310
47.1k
  case CMARK_NODE_THEMATIC_BREAK:
311
47.1k
    BLANKLINE();
312
47.1k
    LIT("-----");
313
47.1k
    BLANKLINE();
314
47.1k
    break;
315
316
1.53M
  case CMARK_NODE_PARAGRAPH:
317
1.53M
    if (!entering) {
318
766k
      BLANKLINE();
319
766k
    }
320
1.53M
    break;
321
322
4.07M
  case CMARK_NODE_TEXT:
323
4.07M
    OUT(cmark_node_get_literal(node), allow_wrap, NORMAL);
324
4.07M
    break;
325
326
23.8k
  case CMARK_NODE_LINEBREAK:
327
23.8k
    if (!(CMARK_OPT_HARDBREAKS & options)) {
328
20.3k
      LIT("  ");
329
20.3k
    }
330
23.8k
    CR();
331
23.8k
    break;
332
333
2.17M
  case CMARK_NODE_SOFTBREAK:
334
2.17M
    if (CMARK_OPT_HARDBREAKS & options) {
335
915k
      LIT("  ");
336
915k
      CR();
337
1.25M
    } else if (!renderer->no_linebreaks && renderer->width == 0 &&
338
1.25M
               !(CMARK_OPT_HARDBREAKS & options) &&
339
1.25M
               !(CMARK_OPT_NOBREAKS & options)) {
340
11.3k
      CR();
341
1.24M
    } else {
342
1.24M
      OUT(" ", allow_wrap, LITERAL);
343
1.24M
    }
344
2.17M
    break;
345
346
147k
  case CMARK_NODE_CODE:
347
147k
    code = cmark_node_get_literal(node);
348
147k
    code_len = strlen(code);
349
147k
    numticks = shortest_unused_backtick_sequence(code);
350
147k
    has_nonspace = false;
351
165k
    for (i=0; i < code_len; i++) {
352
158k
      if (code[i] != ' ') {
353
141k
        has_nonspace = true;
354
141k
        break;
355
141k
      }
356
158k
    }
357
147k
    extra_spaces = code_len == 0 ||
358
147k
            code[0] == '`' || code[code_len - 1] == '`' ||
359
147k
            (has_nonspace && code[0] == ' ' && code[code_len - 1] == ' ');
360
439k
    for (i = 0; i < numticks; i++) {
361
291k
      LIT("`");
362
291k
    }
363
147k
    if (extra_spaces) {
364
2.99k
      LIT(" ");
365
2.99k
    }
366
147k
    OUT(cmark_node_get_literal(node), allow_wrap, LITERAL);
367
147k
    if (extra_spaces) {
368
2.99k
      LIT(" ");
369
2.99k
    }
370
439k
    for (i = 0; i < numticks; i++) {
371
291k
      LIT("`");
372
291k
    }
373
147k
    break;
374
375
296k
  case CMARK_NODE_HTML_INLINE:
376
296k
    OUT(cmark_node_get_literal(node), false, LITERAL);
377
296k
    break;
378
379
0
  case CMARK_NODE_CUSTOM_INLINE:
380
0
    OUT(entering ? cmark_node_get_on_enter(node) : cmark_node_get_on_exit(node),
381
0
        false, LITERAL);
382
0
    break;
383
384
273k
  case CMARK_NODE_STRONG:
385
273k
    if (entering) {
386
136k
      LIT("**");
387
136k
    } else {
388
136k
      LIT("**");
389
136k
    }
390
273k
    break;
391
392
570k
  case CMARK_NODE_EMPH:
393
    // If we have EMPH(EMPH(x)), we need to use *_x_*
394
    // because **x** is STRONG(x):
395
570k
    if (node->parent && node->parent->type == CMARK_NODE_EMPH &&
396
570k
        node->next == NULL && node->prev == NULL) {
397
1.51k
      emph_delim = "_";
398
568k
    } else {
399
568k
      emph_delim = "*";
400
568k
    }
401
570k
    if (entering) {
402
285k
      LIT(emph_delim);
403
285k
    } else {
404
285k
      LIT(emph_delim);
405
285k
    }
406
570k
    break;
407
408
500k
  case CMARK_NODE_LINK:
409
500k
    if (is_autolink(node)) {
410
90.1k
      if (entering) {
411
90.1k
        LIT("<");
412
90.1k
        if (strncmp(cmark_node_get_url(node), "mailto:", 7) == 0) {
413
72.6k
          LIT((const char *)cmark_node_get_url(node) + 7);
414
72.6k
        } else {
415
17.5k
          LIT((const char *)cmark_node_get_url(node));
416
17.5k
        }
417
90.1k
        LIT(">");
418
        // return signal to skip contents of node...
419
90.1k
        return 0;
420
90.1k
      }
421
410k
    } else {
422
410k
      if (entering) {
423
205k
        LIT("[");
424
205k
      } else {
425
205k
        LIT("](");
426
205k
        OUT(cmark_node_get_url(node), false, URL);
427
205k
        title = cmark_node_get_title(node);
428
205k
        if (strlen(title) > 0) {
429
2.47k
          LIT(" \"");
430
2.47k
          OUT(title, false, TITLE);
431
2.47k
          LIT("\"");
432
2.47k
        }
433
205k
        LIT(")");
434
205k
      }
435
410k
    }
436
410k
    break;
437
438
410k
  case CMARK_NODE_IMAGE:
439
17.3k
    if (entering) {
440
8.69k
      LIT("![");
441
8.69k
    } else {
442
8.69k
      LIT("](");
443
8.69k
      OUT(cmark_node_get_url(node), false, URL);
444
8.69k
      title = cmark_node_get_title(node);
445
8.69k
      if (strlen(title) > 0) {
446
1.04k
        OUT(" \"", allow_wrap, LITERAL);
447
1.04k
        OUT(title, false, TITLE);
448
1.04k
        LIT("\"");
449
1.04k
      }
450
8.69k
      LIT(")");
451
8.69k
    }
452
17.3k
    break;
453
454
0
  default:
455
0
    assert(false);
456
0
    break;
457
18.1M
  }
458
459
18.0M
  return 1;
460
18.1M
}
461
462
42.5k
char *cmark_render_commonmark(cmark_node *root, int options, int width) {
463
42.5k
  if (options & CMARK_OPT_HARDBREAKS) {
464
    // disable breaking on width, since it has
465
    // a different meaning with OPT_HARDBREAKS
466
19.2k
    width = 0;
467
19.2k
  }
468
42.5k
  return cmark_render(root, options, width, outc, S_render_node);
469
42.5k
}