Coverage Report

Created: 2025-08-29 06:15

/src/hoextdown/src/document.c
Line
Count
Source (jump to first uncovered line)
1
#include "document.h"
2
3
#include <assert.h>
4
#include <string.h>
5
#include <ctype.h>
6
#include <stdio.h>
7
8
#include "stack.h"
9
10
#ifndef _MSC_VER
11
#include <strings.h>
12
#else
13
#define strncasecmp _strnicmp
14
#endif
15
16
922k
#define REF_TABLE_SIZE 8
17
18
28.3M
#define BUFFER_BLOCK 0
19
20.4M
#define BUFFER_SPAN 1
20
19.6M
#define BUFFER_ATTRIBUTE 2
21
22
const char *hoedown_find_block_tag(const char *str, unsigned int len);
23
const char *hoedown_find_html5_block_tag(const char *str, unsigned int len);
24
25
/***************
26
 * LOCAL TYPES *
27
 ***************/
28
29
/* link_ref: reference to a link */
30
struct link_ref {
31
  unsigned int id;
32
33
  hoedown_buffer *link;
34
  hoedown_buffer *title;
35
  hoedown_buffer *attr;
36
37
  struct link_ref *next;
38
};
39
40
/* footnote_ref: reference to a footnote */
41
struct footnote_ref {
42
  unsigned int id;
43
44
  int is_used;
45
  unsigned int num;
46
47
  hoedown_buffer *contents;
48
49
  /* the original string id of the footnote, before conversion to an int */
50
  hoedown_buffer *name;
51
};
52
53
/* footnote_item: an item in a footnote_list */
54
struct footnote_item {
55
  struct footnote_ref *ref;
56
  struct footnote_item *next;
57
};
58
59
/* footnote_list: linked list of footnote_item */
60
struct footnote_list {
61
  unsigned int count;
62
  struct footnote_item *head;
63
  struct footnote_item *tail;
64
};
65
66
/* char_trigger: function pointer to render active chars */
67
/*   returns the number of chars taken care of */
68
/*   data is the pointer of the beginning of the span */
69
/*   offset is the number of valid chars before data */
70
typedef size_t
71
(*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
72
73
static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
74
static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
75
static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
76
static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
77
static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
78
static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
79
static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
80
static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
81
static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
82
static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
83
static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
84
static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
85
static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
86
static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
87
88
enum markdown_char_t {
89
  MD_CHAR_NONE = 0,
90
  MD_CHAR_EMPHASIS,
91
  MD_CHAR_CODESPAN,
92
  MD_CHAR_LINEBREAK,
93
  MD_CHAR_LINK,
94
  MD_CHAR_IMAGE,
95
  MD_CHAR_LANGLE,
96
  MD_CHAR_ESCAPE,
97
  MD_CHAR_ENTITY,
98
  MD_CHAR_AUTOLINK_URL,
99
  MD_CHAR_AUTOLINK_EMAIL,
100
  MD_CHAR_AUTOLINK_WWW,
101
  MD_CHAR_SUPERSCRIPT,
102
  MD_CHAR_QUOTE,
103
  MD_CHAR_MATH
104
};
105
106
static char_trigger markdown_char_ptrs[] = {
107
  NULL,
108
  &char_emphasis,
109
  &char_codespan,
110
  &char_linebreak,
111
  &char_link,
112
  &char_image,
113
  &char_langle_tag,
114
  &char_escape,
115
  &char_entity,
116
  &char_autolink_url,
117
  &char_autolink_email,
118
  &char_autolink_www,
119
  &char_superscript,
120
  &char_quote,
121
  &char_math
122
};
123
124
struct hoedown_document {
125
  hoedown_renderer md;
126
  hoedown_renderer_data data;
127
128
  uint8_t attr_activation;
129
130
  struct link_ref *refs[REF_TABLE_SIZE];
131
  struct footnote_list footnotes_found;
132
  struct footnote_list footnotes_used;
133
  uint8_t active_char[256];
134
  hoedown_stack work_bufs[3];
135
  hoedown_extensions ext_flags;
136
  size_t max_nesting;
137
  int in_link_body;
138
139
  /* extra information provided to callbacks */
140
  const hoedown_buffer *link_id;
141
  const hoedown_buffer *link_inline_attr;
142
  const hoedown_buffer *link_ref_attr;
143
  int is_escape_char;
144
  hoedown_header_type header_type;
145
  hoedown_link_type link_type;
146
  const hoedown_buffer *footnote_id;
147
  int list_depth;
148
  int blockquote_depth;
149
  uint8_t ul_item_char;
150
  uint8_t hrule_char;
151
  uint8_t fencedcode_char;
152
  const hoedown_buffer *ol_numeral;
153
154
  hoedown_user_block user_block;
155
  hoedown_buffer *meta;
156
};
157
158
/***************************
159
 * HELPER FUNCTIONS *
160
 ***************************/
161
162
static hoedown_buffer *
163
newbuf(hoedown_document *doc, int type)
164
32.5M
{
165
32.5M
  static const size_t buf_size[3] = {256, 64, 64};
166
32.5M
  hoedown_buffer *work = NULL;
167
32.5M
  hoedown_stack *pool = &doc->work_bufs[type];
168
169
32.5M
  if (pool->size < pool->asize &&
170
32.5M
    pool->item[pool->size] != NULL) {
171
32.5M
    work = pool->item[pool->size++];
172
32.5M
    work->size = 0;
173
32.5M
  } else {
174
75.6k
    work = hoedown_buffer_new(buf_size[type]);
175
75.6k
    hoedown_stack_push(pool, work);
176
75.6k
  }
177
178
32.5M
  return work;
179
32.5M
}
180
181
static void
182
popbuf(hoedown_document *doc, int type)
183
31.3M
{
184
31.3M
  doc->work_bufs[type].size--;
185
31.3M
}
186
187
static void
188
unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
189
305k
{
190
305k
  size_t i = 0, org;
191
311k
  while (i < src->size) {
192
310k
    org = i;
193
18.6M
    while (i < src->size && src->data[i] != '\\')
194
18.3M
      i++;
195
196
310k
    if (i > org)
197
226k
      hoedown_buffer_put(ob, src->data + org, i - org);
198
199
310k
    if (i + 1 >= src->size)
200
305k
      break;
201
202
5.94k
    hoedown_buffer_putc(ob, src->data[i + 1]);
203
5.94k
    i += 2;
204
5.94k
  }
205
305k
}
206
207
static unsigned int
208
hash_link_ref(const uint8_t *link_ref, size_t length)
209
1.19M
{
210
1.19M
  size_t i;
211
1.19M
  unsigned int hash = 0;
212
213
96.5M
  for (i = 0; i < length; ++i)
214
95.3M
    hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
215
216
1.19M
  return hash;
217
1.19M
}
218
219
static struct link_ref *
220
add_link_ref(
221
  struct link_ref **references,
222
  const uint8_t *name, size_t name_size)
223
132k
{
224
132k
  struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
225
226
132k
  ref->id = hash_link_ref(name, name_size);
227
132k
  ref->next = references[ref->id % REF_TABLE_SIZE];
228
229
132k
  references[ref->id % REF_TABLE_SIZE] = ref;
230
132k
  return ref;
231
132k
}
232
233
static struct link_ref *
234
find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
235
555k
{
236
555k
  unsigned int hash = hash_link_ref(name, length);
237
555k
  struct link_ref *ref = NULL;
238
239
555k
  ref = references[hash % REF_TABLE_SIZE];
240
241
30.8M
  while (ref != NULL) {
242
30.6M
    if (ref->id == hash)
243
294k
      return ref;
244
245
30.3M
    ref = ref->next;
246
30.3M
  }
247
248
260k
  return NULL;
249
555k
}
250
251
static void
252
free_link_refs(struct link_ref **references)
253
10.1k
{
254
10.1k
  size_t i;
255
256
91.7k
  for (i = 0; i < REF_TABLE_SIZE; ++i) {
257
81.5k
    struct link_ref *r = references[i];
258
81.5k
    struct link_ref *next;
259
260
214k
    while (r) {
261
132k
      next = r->next;
262
132k
      hoedown_buffer_free(r->link);
263
132k
      hoedown_buffer_free(r->title);
264
132k
      hoedown_buffer_free(r->attr);
265
132k
      free(r);
266
132k
      r = next;
267
132k
    }
268
81.5k
  }
269
10.1k
}
270
271
static struct footnote_ref *
272
create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
273
497k
{
274
497k
  struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
275
276
497k
  ref->id = hash_link_ref(name, name_size);
277
278
497k
  return ref;
279
497k
}
280
281
static int
282
add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
283
497k
{
284
497k
  struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
285
497k
  if (!item)
286
0
    return 0;
287
497k
  item->ref = ref;
288
289
497k
  if (list->head == NULL) {
290
1.61k
    list->head = list->tail = item;
291
496k
  } else {
292
496k
    list->tail->next = item;
293
496k
    list->tail = item;
294
496k
  }
295
497k
  list->count++;
296
297
497k
  return 1;
298
497k
}
299
300
static struct footnote_ref *
301
find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
302
14.6k
{
303
14.6k
  unsigned int hash = hash_link_ref(name, length);
304
14.6k
  struct footnote_item *item = NULL;
305
306
14.6k
  item = list->head;
307
308
26.5M
  while (item != NULL) {
309
26.5M
    if (item->ref->id == hash)
310
6.45k
      return item->ref;
311
26.5M
    item = item->next;
312
26.5M
  }
313
314
8.16k
  return NULL;
315
14.6k
}
316
317
static void
318
free_footnote_ref(struct footnote_ref *ref)
319
497k
{
320
497k
  hoedown_buffer_free(ref->contents);
321
497k
  hoedown_buffer_free(ref->name);
322
497k
  free(ref);
323
497k
}
324
325
static void
326
free_footnote_list(struct footnote_list *list, int free_refs)
327
20.3k
{
328
20.3k
  struct footnote_item *item = list->head;
329
20.3k
  struct footnote_item *next;
330
331
518k
  while (item) {
332
497k
    next = item->next;
333
497k
    if (free_refs)
334
497k
      free_footnote_ref(item->ref);
335
497k
    free(item);
336
497k
    item = next;
337
497k
  }
338
20.3k
}
339
340
341
/*
342
 * Check whether a char is a Markdown spacing char.
343
344
 * Right now we only consider spaces the actual
345
 * space and a newline: tabs and carriage returns
346
 * are filtered out during the preprocessing phase.
347
 *
348
 * If we wanted to actually be UTF-8 compliant, we
349
 * should instead extract an Unicode codepoint from
350
 * this character and check for space properties.
351
 */
352
static int
353
_isspace(int c)
354
61.8M
{
355
61.8M
  return c == ' ' || c == '\n';
356
61.8M
}
357
358
/* is_empty_all: verify that all the data is spacing */
359
static int
360
is_empty_all(const uint8_t *data, size_t size)
361
41.7k
{
362
41.7k
  size_t i = 0;
363
171k
  while (i < size && _isspace(data[i])) i++;
364
41.7k
  return i == size;
365
41.7k
}
366
367
/*
368
 * Replace all spacing characters in data with spaces. As a special
369
 * case, this collapses a newline with the previous space, if possible.
370
 */
371
static void
372
replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
373
525k
{
374
525k
  size_t i = 0, mark;
375
525k
  hoedown_buffer_grow(ob, size);
376
1.21M
  while (1) {
377
1.21M
    mark = i;
378
85.3M
    while (i < size && data[i] != '\n') i++;
379
1.21M
    hoedown_buffer_put(ob, data + mark, i - mark);
380
381
1.21M
    if (i >= size) break;
382
383
690k
    if (!(i > 0 && data[i-1] == ' '))
384
217k
      hoedown_buffer_putc(ob, ' ');
385
690k
    i++;
386
690k
  }
387
525k
}
388
389
/****************************
390
 * INLINE PARSING FUNCTIONS *
391
 ****************************/
392
393
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
394
/* this is less strict than the original markdown e-mail address matching */
395
static size_t
396
is_mail_autolink(uint8_t *data, size_t size)
397
8.63k
{
398
8.63k
  size_t i = 0, nb = 0;
399
400
  /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
401
120k
  for (i = 0; i < size; ++i) {
402
118k
    if (isalnum(data[i]))
403
89.8k
      continue;
404
405
28.6k
    switch (data[i]) {
406
19.2k
      case '@':
407
19.2k
        nb++;
408
409
20.0k
      case '-':
410
21.3k
      case '.':
411
21.7k
      case '_':
412
21.7k
        break;
413
414
3.81k
      case '>':
415
3.81k
        return (nb == 1) ? i + 1 : 0;
416
417
3.06k
      default:
418
3.06k
        return 0;
419
28.6k
    }
420
28.6k
  }
421
422
1.75k
  return 0;
423
8.63k
}
424
425
static size_t
426
script_tag_length(uint8_t *data, size_t size)
427
285k
{
428
285k
  size_t i = 2;
429
285k
  char comment = 0;
430
431
285k
  if (size < 3 || data[0] != '<' || data[1] != '?') {
432
269k
    return 0;
433
269k
  }
434
435
15.9k
  i = 2;
436
437
208M
  while (i < size) {
438
208M
    if (data[i - 1] == '?' && data[i] == '>' && comment == 0) {
439
7.75k
      break;
440
7.75k
    }
441
442
208M
    if (data[i] == '\'' || data[i] == '"') {
443
5.69M
      if (comment != 0) {
444
3.63M
        if (data[i] == comment && data[i - 1] != '\\') {
445
2.05M
          comment = 0;
446
2.05M
        }
447
3.63M
      } else {
448
2.05M
        comment = data[i];
449
2.05M
      }
450
5.69M
    }
451
452
208M
    ++i;
453
208M
  }
454
455
15.9k
  if (i >= size) return i;
456
457
7.75k
  return i + 1;
458
15.9k
}
459
460
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
461
static size_t
462
tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink, int script_tag)
463
234k
{
464
234k
  size_t i, j;
465
466
  /* a valid tag can't be shorter than 3 chars */
467
234k
  if (size < 3) return 0;
468
469
218k
  if (data[0] != '<') return 0;
470
471
  /* HTML comment, laxist form */
472
218k
  if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
473
39.4k
    i = 5;
474
475
9.25M
    while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
476
9.21M
      i++;
477
478
39.4k
    i++;
479
480
39.4k
    if (i <= size)
481
17.7k
      return i;
482
39.4k
  }
483
484
  /* begins with a '<' optionally followed by '/', followed by letter or number */
485
201k
  i = (data[1] == '/') ? 2 : 1;
486
487
201k
  if (!isalnum(data[i])) {
488
96.9k
    if (script_tag) {
489
96.9k
      return script_tag_length(data, size);
490
96.9k
    }
491
0
    return 0;
492
96.9k
  }
493
494
  /* scheme test */
495
104k
  *autolink = HOEDOWN_AUTOLINK_NONE;
496
497
  /* try to find the beginning of an URI */
498
691k
  while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
499
587k
    i++;
500
501
104k
  if (i > 1 && i < size && data[i] == '@') {
502
8.63k
    if ((j = is_mail_autolink(data + i, size - i)) != 0) {
503
1.08k
      *autolink = HOEDOWN_AUTOLINK_EMAIL;
504
1.08k
      return i + j;
505
1.08k
    }
506
8.63k
  }
507
508
103k
  if (i > 2 && i < size && data[i] == ':') {
509
27.2k
    *autolink = HOEDOWN_AUTOLINK_NORMAL;
510
27.2k
    i++;
511
27.2k
  }
512
513
  /* completing autolink test: no spacing or ' or " */
514
103k
  if (i >= size)
515
5.57k
    *autolink = HOEDOWN_AUTOLINK_NONE;
516
517
97.4k
  else if (*autolink) {
518
27.2k
    j = i;
519
520
45.8M
    while (i < size) {
521
45.8M
      if (data[i] == '\\') i += 2;
522
45.8M
      else if (data[i] == '>' || data[i] == '\'' ||
523
45.8M
          data[i] == '"' || data[i] == ' ' || data[i] == '\n')
524
18.5k
          break;
525
45.8M
      else i++;
526
45.8M
    }
527
528
27.2k
    if (i >= size) return 0;
529
18.5k
    if (i > j && data[i] == '>') return i + 1;
530
    /* one of the forbidden chars has been found */
531
11.2k
    *autolink = HOEDOWN_AUTOLINK_NONE;
532
11.2k
  }
533
534
  /* looking for something looking like a tag end */
535
159M
  while (i < size && data[i] != '>') i++;
536
87.0k
  if (i >= size) return 0;
537
25.7k
  return i + 1;
538
87.0k
}
539
540
/* parse_inline • parses inline markdown elements */
541
static void
542
parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
543
1.05M
{
544
1.05M
  size_t i = 0, end = 0, consumed = 0;
545
1.05M
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
546
1.05M
  uint8_t *active_char = doc->active_char;
547
548
1.05M
  if (doc->work_bufs[BUFFER_SPAN].size +
549
1.05M
    doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
550
3.35k
    return;
551
552
7.44M
  while (i < size) {
553
7.22M
    size_t user_block = 0;
554
134M
    while (end < size) {
555
133M
      if (doc->user_block) {
556
0
        user_block = doc->user_block(data+end, size - end, &doc->data);
557
0
        if (user_block) {
558
0
          break;
559
0
        }
560
0
      }
561
      /* copying inactive chars into the output */
562
133M
      if (active_char[data[end]] != 0) {
563
6.39M
        break;
564
6.39M
      }
565
127M
      end++;
566
127M
    }
567
568
7.22M
    if (doc->md.normal_text) {
569
7.22M
      work.data = data + i;
570
7.22M
      work.size = end - i;
571
7.22M
      doc->md.normal_text(ob, &work, &doc->data);
572
7.22M
    }
573
0
    else
574
0
      hoedown_buffer_put(ob, data + i, end - i);
575
576
7.22M
    if (end >= size) {
577
830k
      break;
578
830k
    }
579
6.39M
    i = end;
580
581
6.39M
    if (user_block) {
582
0
      work.data = data + i;
583
0
      work.size = user_block;
584
0
      end = user_block;
585
0
      if (doc->md.user_block) {
586
0
        doc->md.user_block(ob, &work, &doc->data);
587
0
      } else {
588
0
        hoedown_buffer_put(ob, data + i, size - i);
589
0
      }
590
0
      if (!end) {
591
0
        end = i + 1;
592
0
      } else {
593
0
        i += end;
594
0
        end = i;
595
0
        consumed = i;
596
0
      }
597
6.39M
    } else {
598
6.39M
      end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
599
6.39M
      if (!end) /* no action from the callback */
600
5.42M
        end = i + 1;
601
966k
      else {
602
966k
        i += end;
603
966k
        end = i;
604
966k
        consumed = i;
605
966k
      }
606
6.39M
    }
607
6.39M
  }
608
1.05M
}
609
610
/* parse_inline_attributes • parses inline attributes, returning the end position of the
611
 * attributes. attributes must be in the start. differs from parse_attributes in
612
 * that parses_attributes assumes attributes are at the end of data.*/
613
static size_t parse_inline_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, uint8_t attr_activation)
614
329k
{
615
329k
  size_t attr_start, i = 0;
616
617
329k
  if (size < 1)
618
70.3k
    return 0;
619
620
259k
  if (data[i] == '{' && (!attr_activation || (i + 1 < size && data[i + 1] == attr_activation))) {
621
11.2k
    attr_start = i + 1;
622
    /* skip an extra character to skip over the activation character if any */
623
11.2k
    if (attr_activation) attr_start++;
624
247k
  } else {
625
247k
    return 0;
626
247k
  }
627
628
12.3M
  while (i < size) {
629
    /* ignore escaped characters */
630
12.3M
    if (data[i] == '\\') {
631
2.33k
      i += 2;
632
12.3M
    } else if (data[i] == '}') {
633
9.24k
      if (attr != NULL) {
634
5.08k
        hoedown_buffer_put(attr, data + attr_start, i - attr_start);
635
5.08k
      }
636
9.24k
      return i + 1;
637
12.3M
    } else {
638
12.3M
      i++;
639
12.3M
    }
640
12.3M
  }
641
2.04k
  return 0;
642
11.2k
}
643
644
645
/* parse_attributes • parses special attributes at the end of the data */
646
static size_t parse_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, struct hoedown_buffer *block_attr, const char *block_id, int is_header, uint8_t attr_activation)
647
891k
{
648
891k
  size_t i, len, begin = 0, end = 0;
649
650
891k
  if (size < 1)
651
36.2k
    return 0;
652
653
854k
  i = size;
654
1.09M
  while (i && data[i-1] == '\n') {
655
242k
    i--;
656
242k
  }
657
854k
  len = i;
658
659
854k
  if (i && data[i-1] == '}') {
660
31.1M
    do {
661
31.1M
      i--;
662
31.1M
    } while (i && data[i] != '{');
663
664
88.3k
    begin = i + 1;
665
88.3k
    end = len - 1;
666
423k
    while (i && data[i-1] == ' ') {
667
334k
      i--;
668
334k
    }
669
88.3k
  }
670
671
854k
  if (is_header && i && data[i-1] == '#') {
672
16.1k
    while (i && data[i-1] == '#') {
673
8.41k
      i--;
674
8.41k
    }
675
15.8k
    while (i && data[i-1] == ' ') {
676
8.11k
      i--;
677
8.11k
    }
678
7.76k
  }
679
680
854k
  if (begin && end && data[begin-1] == '{' && data[end] == '}') {
681
77.2k
    if (begin >=2 && data[begin-2] == '\\' && data[end-1] == '\\') {
682
230
      return len;
683
230
    }
684
685
77.0k
    if (block_attr && data[begin] == '@') {
686
      /* skip the @ by incrementing past it */
687
12.8k
      begin++;
688
12.8k
      if (*block_id) {
689
        /* if a block_id was fed in, check to make sure the string until the
690
         * space is identical */
691
51.1k
        while (begin < end && *block_id) {
692
41.4k
          if (data[begin] != (uint8_t)(*block_id)) {
693
3.15k
            return len;
694
3.15k
          }
695
38.3k
          begin++;
696
38.3k
          block_id++;
697
38.3k
        }
698
        /* it might have matched only the first portion of block_id; make sure
699
         * there's no more to it here */
700
9.68k
        if (*block_id) {
701
919
          return len;
702
919
        }
703
9.68k
      }
704
8.76k
      if (begin < end && data[begin] != ' ') {
705
924
        return len;
706
924
      }
707
7.84k
      if (block_attr) {
708
7.84k
        if (block_attr->size) {
709
5.69k
          hoedown_buffer_reset(block_attr);
710
5.69k
        }
711
7.84k
        hoedown_buffer_put(block_attr, data + begin, end - begin);
712
7.84k
      }
713
7.84k
      len = i;
714
7.84k
      if (attr) {
715
7.84k
        len = parse_attributes(data, len, attr, NULL, "", is_header, attr_activation);
716
7.84k
      }
717
64.1k
    } else if (attr && (!attr_activation || attr_activation == data[begin])) {
718
50.6k
      if (attr->size) {
719
0
        hoedown_buffer_reset(attr);
720
0
      }
721
50.6k
      if (attr_activation) {
722
0
        begin++;
723
0
      }
724
50.6k
      hoedown_buffer_put(attr, data + begin, end - begin);
725
50.6k
      len = i;
726
50.6k
    }
727
77.0k
  }
728
729
849k
  return len;
730
854k
}
731
732
/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
733
static int
734
is_escaped(uint8_t *data, size_t loc)
735
5.53M
{
736
5.53M
  size_t i = loc;
737
9.51M
  while (i >= 1 && data[i - 1] == '\\')
738
3.97M
    i--;
739
740
  /* odd numbers of backslashes escapes data[loc] */
741
5.53M
  return (loc - i) % 2;
742
5.53M
}
743
744
/* is_backslashed • returns whether special char at data[loc] is preceded by '\\', a stricter interpretation of escaping than is_escaped. */
745
static int
746
is_backslashed(uint8_t *data, size_t loc)
747
366M
{
748
366M
  return loc >= 1 && data[loc - 1] == '\\';
749
366M
}
750
751
/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
752
static size_t
753
find_emph_char(uint8_t *data, size_t size, uint8_t c)
754
1.55M
{
755
1.55M
  size_t i = 0;
756
757
4.72M
  while (i < size) {
758
259M
    while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
759
254M
      i++;
760
761
4.69M
    if (i == size)
762
247k
      return 0;
763
764
    /* not counting escaped chars */
765
4.44M
    if (is_escaped(data, i)) {
766
12.6k
      i++; continue;
767
12.6k
    }
768
769
4.43M
    if (data[i] == c)
770
1.05M
      return i;
771
772
    /* skipping a codespan */
773
3.38M
    if (data[i] == '`') {
774
31.0k
      size_t span_nb = 0, bt;
775
31.0k
      size_t tmp_i = 0;
776
777
      /* counting the number of opening backticks */
778
92.3k
      while (i < size && data[i] == '`') {
779
61.2k
        i++; span_nb++;
780
61.2k
      }
781
782
31.0k
      if (i >= size) return 0;
783
784
      /* finding the matching closing sequence */
785
30.1k
      bt = 0;
786
28.5M
      while (i < size && bt < span_nb) {
787
28.5M
        if (!tmp_i && data[i] == c) tmp_i = i;
788
28.5M
        if (data[i] == '`') bt++;
789
28.4M
        else bt = 0;
790
28.5M
        i++;
791
28.5M
      }
792
793
      /* not a well-formed codespan; use found matching emph char */
794
30.1k
      if (bt < span_nb && i >= size) return tmp_i;
795
30.1k
    }
796
    /* skipping a link */
797
3.35M
    else if (data[i] == '[') {
798
3.35M
      size_t tmp_i = 0;
799
3.35M
      uint8_t cc;
800
801
3.35M
      i++;
802
521M
      while (i < size && data[i] != ']') {
803
517M
        if (!tmp_i && data[i] == c) tmp_i = i;
804
517M
        i++;
805
517M
      }
806
807
3.35M
      i++;
808
7.15M
      while (i < size && _isspace(data[i]))
809
3.79M
        i++;
810
811
3.35M
      if (i >= size)
812
179k
        return tmp_i;
813
814
3.17M
      switch (data[i]) {
815
584k
      case '[':
816
584k
        cc = ']'; break;
817
818
22.5k
      case '(':
819
22.5k
        cc = ')'; break;
820
821
2.56M
      default:
822
2.56M
        if (tmp_i)
823
21.3k
          return tmp_i;
824
2.54M
        else
825
2.54M
          continue;
826
3.17M
      }
827
828
606k
      i++;
829
108M
      while (i < size && data[i] != cc) {
830
107M
        if (!tmp_i && data[i] == c) tmp_i = i;
831
107M
        i++;
832
107M
      }
833
834
606k
      if (i >= size)
835
12.5k
        return tmp_i;
836
837
594k
      i++;
838
594k
    }
839
3.38M
  }
840
841
27.8k
  return 0;
842
1.55M
}
843
844
/* find_separator_char • looks for the next unbackslashed separator character c */
845
static size_t
846
find_separator_char(uint8_t *data, size_t size, uint8_t c)
847
57.5k
{
848
57.5k
  size_t i = 0;
849
850
60.5k
  while (i < size) {
851
13.7M
    while (i < size && data[i] != c)
852
13.7M
      i++;
853
854
56.5k
    if (i == size)
855
23.3k
      return 0;
856
857
    /* not counting backslashed separators */
858
33.1k
    if (is_backslashed(data, i)) {
859
3.01k
      i++; continue;
860
3.01k
    }
861
862
30.1k
    if (data[i] == c)
863
30.1k
      return i;
864
30.1k
  }
865
866
4.06k
  return 0;
867
57.5k
}
868
869
/* parse_emph1 • parsing single emphase */
870
/* closed by a symbol not preceded by spacing and not followed by symbol */
871
static size_t
872
parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
873
90.0k
{
874
90.0k
  size_t i = 0, len;
875
90.0k
  hoedown_buffer *work = 0;
876
90.0k
  int r;
877
878
  /* skipping one symbol if coming from emph3 */
879
90.0k
  if (size > 1 && data[0] == c && data[1] == c) i = 1;
880
881
108k
  while (i < size) {
882
108k
    len = find_emph_char(data + i, size - i, c);
883
108k
    if (!len) return 0;
884
51.7k
    i += len;
885
51.7k
    if (i >= size) return 0;
886
887
51.7k
    if (data[i] == c && !_isspace(data[i - 1])) {
888
889
34.1k
      if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS ||
890
34.1k
        (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_UNDERLINE_EMPHASIS && c == '_')) {
891
34.1k
        if (i + 1 < size && isalnum(data[i + 1]))
892
368
          continue;
893
34.1k
      }
894
895
33.8k
      work = newbuf(doc, BUFFER_SPAN);
896
33.8k
      parse_inline(work, doc, data, i);
897
898
33.8k
      if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
899
25.6k
        r = doc->md.underline(ob, work, &doc->data);
900
8.10k
      else
901
8.10k
        r = doc->md.emphasis(ob, work, &doc->data);
902
903
33.8k
      popbuf(doc, BUFFER_SPAN);
904
33.8k
      return r ? i + 1 : 0;
905
34.1k
    }
906
51.7k
  }
907
908
0
  return 0;
909
90.0k
}
910
911
/* parse_emph2 • parsing single emphase */
912
static size_t
913
parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
914
40.6k
{
915
40.6k
  size_t i = 0, len;
916
40.6k
  hoedown_buffer *work = 0;
917
40.6k
  int r;
918
919
55.9k
  while (i < size) {
920
55.6k
    len = find_emph_char(data + i, size - i, c);
921
55.6k
    if (!len) return 0;
922
36.1k
    i += len;
923
924
36.1k
    if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
925
20.8k
      work = newbuf(doc, BUFFER_SPAN);
926
20.8k
      parse_inline(work, doc, data, i);
927
928
20.8k
      if (c == '~')
929
3.09k
        r = doc->md.strikethrough(ob, work, &doc->data);
930
17.7k
      else if (c == '=')
931
553
        r = doc->md.highlight(ob, work, &doc->data);
932
17.2k
      else
933
17.2k
        r = doc->md.double_emphasis(ob, work, &doc->data);
934
935
20.8k
      popbuf(doc, BUFFER_SPAN);
936
20.8k
      return r ? i + 2 : 0;
937
20.8k
    }
938
15.3k
    i++;
939
15.3k
  }
940
342
  return 0;
941
40.6k
}
942
943
/* parse_emph3 • parsing single emphase */
944
/* finds the first closing tag, and delegates to the other emph */
945
static size_t
946
parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
947
43.2k
{
948
43.2k
  size_t i = 0, len;
949
43.2k
  int r;
950
951
68.6k
  while (i < size) {
952
68.6k
    len = find_emph_char(data + i, size - i, c);
953
68.6k
    if (!len) return 0;
954
38.1k
    i += len;
955
956
    /* skip spacing preceded symbols */
957
38.1k
    if (data[i] != c || _isspace(data[i - 1]))
958
25.3k
      continue;
959
960
12.7k
    if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
961
      /* triple symbol found */
962
2.32k
      hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
963
964
2.32k
      parse_inline(work, doc, data, i);
965
2.32k
      r = doc->md.triple_emphasis(ob, work, &doc->data);
966
2.32k
      popbuf(doc, BUFFER_SPAN);
967
2.32k
      return r ? i + 3 : 0;
968
969
10.4k
    } else if (i + 1 < size && data[i + 1] == c) {
970
      /* double symbol found, handing over to emph1 */
971
5.19k
      len = parse_emph1(ob, doc, data - 2, size + 2, c);
972
5.19k
      if (!len) return 0;
973
0
      else return len - 2;
974
975
5.25k
    } else {
976
      /* single symbol found, handing over to emph2 */
977
5.25k
      len = parse_emph2(ob, doc, data - 1, size + 1, c);
978
5.25k
      if (!len) return 0;
979
0
      else return len - 1;
980
5.25k
    }
981
12.7k
  }
982
0
  return 0;
983
43.2k
}
984
985
/* parse_math • parses a math span until the given ending delimiter */
986
static size_t
987
parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
988
32.6k
{
989
32.6k
  hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
990
32.6k
  size_t i = delimsz;
991
992
32.6k
  if (!doc->md.math)
993
0
    return 0;
994
995
  /* find ending delimiter */
996
866k
  while (1) {
997
73.6M
    while (i < size && data[i] != (uint8_t)end[0])
998
72.8M
      i++;
999
1000
866k
    if (i >= size)
1001
19.4k
      return 0;
1002
1003
846k
    if (!is_escaped(data, i) && !(i + delimsz > size)
1004
846k
      && memcmp(data + i, end, delimsz) == 0)
1005
13.2k
      break;
1006
1007
833k
    i++;
1008
833k
  }
1009
1010
  /* prepare buffers */
1011
13.2k
  text.data = data + delimsz;
1012
13.2k
  text.size = i - delimsz;
1013
1014
  /* if this is a $$ and MATH_EXPLICIT is not active,
1015
   * guess whether displaymode should be enabled from the context */
1016
13.2k
  i += delimsz;
1017
13.2k
  if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
1018
0
    displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
1019
1020
  /* call callback */
1021
13.2k
  if (doc->md.math(ob, &text, displaymode, &doc->data))
1022
13.2k
    return i;
1023
1024
0
  return 0;
1025
13.2k
}
1026
1027
/* char_emphasis • single and double emphasis parsing */
1028
static size_t
1029
char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1030
1.27M
{
1031
1.27M
  uint8_t c = data[0];
1032
1.27M
  size_t ret;
1033
1034
1.27M
  if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
1035
1.27M
    if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
1036
987k
      return 0;
1037
1.27M
  }
1038
1039
291k
  if (size > 2 && data[1] != c) {
1040
    /* spacing cannot follow an opening emphasis;
1041
     * strikethrough and highlight only takes two characters '~~' */
1042
127k
    if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
1043
95.5k
      return 0;
1044
1045
32.1k
    return ret + 1;
1046
127k
  }
1047
1048
164k
  if (size > 3 && data[1] == c && data[2] != c) {
1049
38.0k
    if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
1050
25.3k
      return 0;
1051
1052
12.7k
    return ret + 2;
1053
38.0k
  }
1054
1055
126k
  if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
1056
52.2k
    if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
1057
51.1k
      return 0;
1058
1059
1.04k
    return ret + 3;
1060
52.2k
  }
1061
1062
73.8k
  return 0;
1063
126k
}
1064
1065
1066
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
1067
static size_t
1068
char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1069
536k
{
1070
536k
  if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
1071
509k
    return 0;
1072
1073
  /* removing the last space from ob and rendering */
1074
1.20M
  while (ob->size && ob->data[ob->size - 1] == ' ')
1075
1.17M
    ob->size--;
1076
1077
27.0k
  return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
1078
536k
}
1079
1080
1081
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
1082
static size_t
1083
char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1084
102k
{
1085
102k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1086
102k
  size_t end, nb = 0, i, f_begin, f_end;
1087
1088
  /* counting the number of backticks in the delimiter */
1089
761k
  while (nb < size && data[nb] == '`')
1090
659k
    nb++;
1091
1092
  /* finding the next delimiter */
1093
102k
  i = 0;
1094
42.5M
  for (end = nb; end < size && i < nb; end++) {
1095
42.4M
    if (data[end] == '`') {
1096
160k
      if (end + 1 == size || !is_escaped(data, end)) {
1097
159k
        i++;
1098
159k
      } else {
1099
516
        i = 0;
1100
516
      }
1101
160k
    }
1102
42.2M
    else i = 0;
1103
42.4M
  }
1104
1105
102k
  if (i < nb && end >= size)
1106
82.4k
    return 0; /* no matching delimiter */
1107
1108
  /* trimming outside whitespace */
1109
19.9k
  f_begin = nb;
1110
292k
  while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\n'))
1111
272k
    f_begin++;
1112
1113
19.9k
  f_end = end - nb;
1114
130k
  while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\n'))
1115
110k
    f_end--;
1116
1117
  /* real code span */
1118
19.9k
  if (f_begin < f_end) {
1119
    /* needed for parse_attribute functions as buffer functions do not work with
1120
     * buffers made on the stack */
1121
13.7k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
1122
1123
13.7k
    work.data = data + f_begin;
1124
13.7k
    work.size = f_end - f_begin;
1125
1126
13.7k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1127
13.7k
      end += parse_inline_attributes(data + end, size - end, attr, doc->attr_activation);
1128
13.7k
    }
1129
1130
13.7k
    if (!doc->md.codespan(ob, &work, attr, &doc->data))
1131
0
      end = 0;
1132
13.7k
    popbuf(doc, BUFFER_ATTRIBUTE);
1133
13.7k
  } else {
1134
6.20k
    if (!doc->md.codespan(ob, 0, 0, &doc->data))
1135
0
      end = 0;
1136
6.20k
  }
1137
1138
19.9k
  return end;
1139
102k
}
1140
1141
/* char_quote • '"' parsing a quote */
1142
static size_t
1143
char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1144
263k
{
1145
263k
  size_t end, nq = 0, i, f_begin, f_end;
1146
1147
  /* counting the number of quotes in the delimiter */
1148
34.8M
  while (nq < size && data[nq] == '"')
1149
34.6M
    nq++;
1150
1151
  /* finding the next delimiter */
1152
263k
  end = nq;
1153
464k
  while (1) {
1154
464k
    i = end;
1155
464k
    end += find_emph_char(data + end, size - end, '"');
1156
464k
    if (end == i) return 0;    /* no matching delimiter */
1157
301k
    i = end;
1158
7.42M
    while (end < size && data[end] == '"' && end - i < nq) end++;
1159
301k
    if (end - i >= nq) break;
1160
301k
  }
1161
1162
  /* trimming outside spaces */
1163
101k
  f_begin = nq;
1164
3.17M
  while (f_begin < end && data[f_begin] == ' ')
1165
3.07M
    f_begin++;
1166
1167
101k
  f_end = end - nq;
1168
3.04M
  while (f_end > nq && data[f_end-1] == ' ')
1169
2.94M
    f_end--;
1170
1171
  /* real quote */
1172
101k
  if (f_begin < f_end) {
1173
100k
    hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
1174
100k
    parse_inline(work, doc, data + f_begin, f_end - f_begin);
1175
1176
100k
    if (!doc->md.quote(ob, work, &doc->data))
1177
890
      end = 0;
1178
100k
    popbuf(doc, BUFFER_SPAN);
1179
100k
  } else {
1180
1.05k
    if (!doc->md.quote(ob, 0, &doc->data))
1181
1.05k
      end = 0;
1182
1.05k
  }
1183
1184
101k
  return end;
1185
263k
}
1186
1187
1188
/* char_escape • '\\' backslash escape */
1189
static size_t
1190
char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1191
158k
{
1192
158k
  static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
1193
158k
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1194
158k
  size_t w;
1195
1196
158k
  if (size > 1) {
1197
156k
    if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
1198
156k
      size > 2 && (data[2] == '(' || data[2] == '[')) {
1199
2.52k
      const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
1200
2.52k
      w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
1201
2.52k
      if (w) return w;
1202
2.52k
    }
1203
1204
155k
    if (strchr(escape_chars, data[1]) == NULL)
1205
66.2k
      return 0;
1206
1207
89.2k
    if (doc->md.normal_text) {
1208
89.2k
      work.data = data + 1;
1209
89.2k
      work.size = 1;
1210
89.2k
      doc->is_escape_char = 1;
1211
89.2k
      doc->md.normal_text(ob, &work, &doc->data);
1212
89.2k
      doc->is_escape_char = 0;
1213
89.2k
    }
1214
0
    else hoedown_buffer_putc(ob, data[1]);
1215
89.2k
  } else if (size == 1) {
1216
2.13k
    if (doc->md.normal_text) {
1217
2.13k
      work.data = data;
1218
2.13k
      work.size = 1;
1219
2.13k
      doc->md.normal_text(ob, &work, &doc->data);
1220
2.13k
    }
1221
0
    else hoedown_buffer_putc(ob, data[0]);
1222
2.13k
  }
1223
1224
91.4k
  return 2;
1225
158k
}
1226
1227
/* char_entity • '&' escaped when it doesn't belong to an entity */
1228
/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
1229
static size_t
1230
char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1231
193k
{
1232
193k
  size_t end = 1;
1233
193k
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1234
1235
193k
  if (end < size && data[end] == '#')
1236
247
    end++;
1237
1238
631k
  while (end < size && isalnum(data[end]))
1239
437k
    end++;
1240
1241
193k
  if (end < size && data[end] == ';')
1242
924
    end++; /* real entity */
1243
192k
  else
1244
192k
    return 0; /* lone '&' */
1245
1246
924
  if (doc->md.entity) {
1247
0
    work.data = data;
1248
0
    work.size = end;
1249
0
    doc->md.entity(ob, &work, &doc->data);
1250
0
  }
1251
924
  else hoedown_buffer_put(ob, data, end);
1252
1253
924
  return end;
1254
193k
}
1255
1256
/* char_langle_tag • '<' when tags or autolinks are allowed */
1257
static size_t
1258
char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1259
234k
{
1260
234k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1261
234k
  hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
1262
234k
  size_t end = tag_length(data, size, &altype, doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS);
1263
234k
  int ret = 0;
1264
1265
234k
  work.data = data;
1266
234k
  work.size = end;
1267
1268
234k
  if (end > 2) {
1269
58.5k
    if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
1270
8.39k
      hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
1271
8.39k
      work.data = data + 1;
1272
8.39k
      work.size = end - 2;
1273
8.39k
      unscape_text(u_link, &work);
1274
8.39k
      ret = doc->md.autolink(ob, u_link, altype, &doc->data);
1275
8.39k
      popbuf(doc, BUFFER_SPAN);
1276
8.39k
    }
1277
50.1k
    else if (doc->md.raw_html)
1278
50.1k
      ret = doc->md.raw_html(ob, &work, &doc->data);
1279
58.5k
  }
1280
1281
234k
  if (!ret) return 0;
1282
58.5k
  else return end;
1283
234k
}
1284
1285
static size_t
1286
char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1287
473k
{
1288
473k
  hoedown_buffer *link, *link_url, *link_text;
1289
473k
  size_t link_len, rewind;
1290
1291
473k
  if (!doc->md.link || doc->in_link_body)
1292
1.30k
    return 0;
1293
1294
472k
  link = newbuf(doc, BUFFER_SPAN);
1295
1296
472k
  if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
1297
23.3k
    link_url = newbuf(doc, BUFFER_SPAN);
1298
23.3k
    HOEDOWN_BUFPUTSL(link_url, "http://");
1299
23.3k
    hoedown_buffer_put(link_url, link->data, link->size);
1300
1301
23.3k
    if (ob->size > rewind)
1302
17.6k
      ob->size -= rewind;
1303
5.67k
    else
1304
5.67k
      ob->size = 0;
1305
1306
23.3k
    if (doc->md.normal_text) {
1307
23.3k
      link_text = newbuf(doc, BUFFER_SPAN);
1308
23.3k
      doc->md.normal_text(link_text, link, &doc->data);
1309
23.3k
      doc->md.link(ob, link_text, link_url, NULL, NULL, &doc->data);
1310
23.3k
      popbuf(doc, BUFFER_SPAN);
1311
23.3k
    } else {
1312
0
      doc->md.link(ob, link, link_url, NULL, NULL, &doc->data);
1313
0
    }
1314
23.3k
    popbuf(doc, BUFFER_SPAN);
1315
23.3k
  }
1316
1317
472k
  popbuf(doc, BUFFER_SPAN);
1318
472k
  return link_len;
1319
473k
}
1320
1321
static size_t
1322
char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1323
348k
{
1324
348k
  hoedown_buffer *link;
1325
348k
  size_t link_len, rewind;
1326
1327
348k
  if (!doc->md.autolink || doc->in_link_body)
1328
459
    return 0;
1329
1330
347k
  link = newbuf(doc, BUFFER_SPAN);
1331
1332
347k
  if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
1333
688
    if (ob->size > rewind)
1334
473
      ob->size -= rewind;
1335
215
    else
1336
215
      ob->size = 0;
1337
1338
688
    doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
1339
688
  }
1340
1341
347k
  popbuf(doc, BUFFER_SPAN);
1342
347k
  return link_len;
1343
348k
}
1344
1345
static size_t
1346
char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1347
1.57M
{
1348
1.57M
  hoedown_buffer *link;
1349
1.57M
  size_t link_len, rewind;
1350
1351
1.57M
  if (!doc->md.autolink || doc->in_link_body)
1352
16.6k
    return 0;
1353
1354
1.55M
  link = newbuf(doc, BUFFER_SPAN);
1355
1356
1.55M
  if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
1357
5.28k
    if (ob->size > rewind)
1358
4.47k
      ob->size -= rewind;
1359
813
    else
1360
813
      ob->size = 0;
1361
1362
5.28k
    doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
1363
5.28k
  }
1364
1365
1.55M
  popbuf(doc, BUFFER_SPAN);
1366
1.55M
  return link_len;
1367
1.57M
}
1368
1369
static size_t
1370
122k
char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
1371
122k
  size_t ret;
1372
1373
122k
  if (size < 2 || data[1] != '[') return 0;
1374
1375
45.0k
  ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
1376
45.0k
  if (!ret) return 0;
1377
3.47k
  return ret + 1;
1378
45.0k
}
1379
1380
/* char_link • '[': parsing a link, a footnote or an image */
1381
static size_t
1382
char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1383
828k
{
1384
828k
  int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
1385
828k
  int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && size > 1 && data[1] == '^');
1386
828k
  size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1387
828k
  hoedown_buffer *content = NULL;
1388
828k
  hoedown_buffer *link = NULL;
1389
828k
  hoedown_buffer *title = NULL;
1390
828k
  hoedown_buffer *u_link = NULL;
1391
828k
  hoedown_buffer *inline_attr = NULL;
1392
828k
  hoedown_buffer *ref_attr = NULL;
1393
828k
  hoedown_buffer *attr = NULL;
1394
828k
  hoedown_buffer *id = NULL;
1395
828k
  size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
1396
828k
  int ret = 0, in_title = 0, qtype = 0;
1397
828k
  hoedown_link_type link_type = HOEDOWN_LINK_NONE;
1398
828k
  int ref_attr_exists = 0, inline_attr_exists = 0;
1399
1400
  /* checking whether the correct renderer exists */
1401
828k
  if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
1402
828k
    || (!is_img && !is_footnote && !doc->md.link))
1403
0
    goto cleanup;
1404
1405
  /* looking for the matching closing bracket */
1406
828k
  i += find_emph_char(data + i, size - i, ']');
1407
828k
  txt_e = i;
1408
1409
828k
  if (i < size && data[i] == ']') i++;
1410
198k
  else goto cleanup;
1411
1412
  /* footnote link */
1413
630k
  if (is_footnote) {
1414
39.7k
    hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
1415
39.7k
    struct footnote_ref *fr;
1416
1417
39.7k
    if (txt_e < 3)
1418
25.1k
      goto cleanup;
1419
1420
14.6k
    id.data = data + 2;
1421
14.6k
    id.size = txt_e - 2;
1422
1423
14.6k
    fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
1424
1425
    /* mark footnote used */
1426
14.6k
    if (fr && !fr->is_used) {
1427
755
      if(!add_footnote_ref(&doc->footnotes_used, fr))
1428
0
        goto cleanup;
1429
755
      fr->is_used = 1;
1430
755
      fr->num = doc->footnotes_used.count;
1431
1432
      /* render */
1433
755
      if (doc->md.footnote_ref) {
1434
755
        doc->link_id = &id;
1435
755
        ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
1436
755
        doc->link_id = NULL;
1437
755
      }
1438
755
    }
1439
1440
14.6k
    goto cleanup;
1441
14.6k
  }
1442
1443
  /* skip any amount of spacing */
1444
  /* (this is much more laxist than original markdown syntax) */
1445
1.76M
  while (i < size && _isspace(data[i]))
1446
1.17M
    i++;
1447
1448
  /* inline style link */
1449
590k
  if (i < size && data[i] == '(') {
1450
32.0k
    size_t nb_p;
1451
1452
32.0k
    link_type = HOEDOWN_LINK_INLINE;
1453
1454
    /* skipping initial spacing */
1455
32.0k
    i++;
1456
1457
405k
    while (i < size && _isspace(data[i]))
1458
373k
      i++;
1459
1460
32.0k
    link_b = i;
1461
1462
    /* looking for link end: ' " ) */
1463
    /* Count the number of open parenthesis */
1464
32.0k
    nb_p = 0;
1465
1466
25.8M
    while (i < size) {
1467
25.8M
      if (data[i] == '\\') i += 2;
1468
25.8M
      else if (data[i] == '(' && i != 0) {
1469
142k
        nb_p++; i++;
1470
142k
      }
1471
25.6M
      else if (data[i] == ')') {
1472
10.8k
        if (nb_p == 0) break;
1473
5.57k
        nb_p--; i++;
1474
25.6M
      } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1475
25.6M
      else i++;
1476
25.8M
    }
1477
1478
32.0k
    if (i >= size) goto cleanup;
1479
22.8k
    link_e = i;
1480
1481
    /* looking for title end if present */
1482
22.8k
    if (data[i] == '\'' || data[i] == '"') {
1483
17.5k
      qtype = data[i];
1484
17.5k
      in_title = 1;
1485
17.5k
      i++;
1486
17.5k
      title_b = i;
1487
1488
556M
      while (i < size) {
1489
556M
        if (data[i] == '\\') i += 2;
1490
556M
        else if (data[i] == qtype) {in_title = 0; i++;}
1491
556M
        else if ((data[i] == ')') && !in_title) break;
1492
556M
        else i++;
1493
556M
      }
1494
1495
17.5k
      if (i >= size) goto cleanup;
1496
1497
      /* skipping spacing after title */
1498
11.0k
      title_e = i - 1;
1499
184k
      while (title_e > title_b && _isspace(data[title_e]))
1500
173k
        title_e--;
1501
1502
      /* checking for closing quote presence */
1503
11.0k
      if (data[title_e] != '\'' &&  data[title_e] != '"') {
1504
1.53k
        title_b = title_e = 0;
1505
1.53k
        link_e = i;
1506
1.53k
      }
1507
11.0k
    }
1508
1509
    /* remove spacing at the end of the link */
1510
178k
    while (link_e > link_b && _isspace(data[link_e - 1]))
1511
162k
      link_e--;
1512
1513
    /* remove optional angle brackets around the link */
1514
16.2k
    if (data[link_b] == '<' && data[link_e - 1] == '>') {
1515
201
      link_b++;
1516
201
      link_e--;
1517
201
    }
1518
1519
    /* building escaped link and title */
1520
16.2k
    if (link_e > link_b) {
1521
2.99k
      link = newbuf(doc, BUFFER_SPAN);
1522
2.99k
      hoedown_buffer_put(link, data + link_b, link_e - link_b);
1523
2.99k
    }
1524
1525
16.2k
    if (title_e > title_b) {
1526
2.40k
      title = newbuf(doc, BUFFER_SPAN);
1527
2.40k
      hoedown_buffer_put(title, data + title_b, title_e - title_b);
1528
2.40k
    }
1529
1530
16.2k
    i++;
1531
16.2k
  }
1532
1533
  /* reference style link */
1534
558k
  else if (i < size && data[i] == '[') {
1535
69.2k
    struct link_ref *lr;
1536
1537
69.2k
    id = newbuf(doc, BUFFER_SPAN);
1538
1539
    /* looking for the id */
1540
69.2k
    i++;
1541
69.2k
    link_b = i;
1542
6.38M
    while (i < size && data[i] != ']') i++;
1543
69.2k
    if (i >= size) goto cleanup;
1544
66.0k
    link_e = i;
1545
1546
    /* finding the link_ref */
1547
66.0k
    if (link_b == link_e) {
1548
36.5k
      link_type = HOEDOWN_LINK_EMPTY_REFERENCE;
1549
36.5k
      replace_spacing(id, data + 1, txt_e - 1);
1550
36.5k
    } else {
1551
29.5k
      link_type = HOEDOWN_LINK_REFERENCE;
1552
29.5k
      hoedown_buffer_put(id, data + link_b, link_e - link_b);
1553
29.5k
    }
1554
1555
66.0k
    lr = find_link_ref(doc->refs, id->data, id->size);
1556
66.0k
    if (!lr)
1557
60.9k
      goto cleanup;
1558
1559
    /* keeping link and title from link_ref */
1560
5.08k
    link = lr->link;
1561
5.08k
    title = lr->title;
1562
5.08k
    ref_attr = lr->attr;
1563
5.08k
    i++;
1564
5.08k
  }
1565
1566
  /* shortcut reference style link */
1567
489k
  else {
1568
489k
    struct link_ref *lr;
1569
1570
489k
    id = newbuf(doc, BUFFER_SPAN);
1571
1572
489k
    link_type = HOEDOWN_LINK_SHORTCUT;
1573
1574
    /* crafting the id */
1575
489k
    replace_spacing(id, data + 1, txt_e - 1);
1576
1577
    /* finding the link_ref */
1578
489k
    lr = find_link_ref(doc->refs, id->data, id->size);
1579
489k
    if (!lr)
1580
199k
      goto cleanup;
1581
1582
    /* keeping link and title from link_ref */
1583
289k
    link = lr->link;
1584
289k
    title = lr->title;
1585
289k
    ref_attr = lr->attr;
1586
1587
    /* rewinding the spacing */
1588
289k
    i = txt_e + 1;
1589
289k
  }
1590
1591
  /* building content: img alt is kept, only link content is parsed */
1592
310k
  if (txt_e > 1) {
1593
22.5k
    content = newbuf(doc, BUFFER_SPAN);
1594
22.5k
    if (is_img) {
1595
10.7k
      hoedown_buffer_put(content, data + 1, txt_e - 1);
1596
11.7k
    } else {
1597
      /* disable autolinking when parsing inline the
1598
       * content of a link */
1599
11.7k
      doc->in_link_body = 1;
1600
11.7k
      parse_inline(content, doc, data + 1, txt_e - 1);
1601
11.7k
      doc->in_link_body = 0;
1602
11.7k
    }
1603
22.5k
  }
1604
1605
310k
  if (link) {
1606
297k
    u_link = newbuf(doc, BUFFER_SPAN);
1607
297k
    unscape_text(u_link, link);
1608
297k
  }
1609
1610
  /* if special attributes are enabled, attempt to parse an inline one from
1611
   * the link */
1612
310k
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1613
    /* attr is a span because cleanup code depends on it being span */
1614
310k
    inline_attr = newbuf(doc, BUFFER_SPAN);
1615
310k
    i += parse_inline_attributes(data + i, size - i, inline_attr, doc->attr_activation);
1616
310k
  }
1617
1618
  /* remove optional < and > around inline and ref special attributes */
1619
310k
  if (ref_attr && ref_attr->size > 0) {
1620
60.8k
    if (ref_attr->size > 1) {
1621
4.47k
      if (ref_attr->data[0] == '<') {
1622
295
        hoedown_buffer_slurp(ref_attr, 1);
1623
295
      }
1624
4.47k
      if (ref_attr->data[ref_attr->size - 1] == '>') {
1625
80
        ref_attr->size--;
1626
80
      }
1627
4.47k
    }
1628
60.8k
  }
1629
310k
  if (inline_attr && inline_attr->size > 0) {
1630
3.77k
    if (inline_attr->size > 1) {
1631
1.53k
      if (inline_attr->data[0] == '<') {
1632
428
        hoedown_buffer_slurp(inline_attr, 1);
1633
428
      }
1634
1.53k
      if (inline_attr->data[inline_attr->size - 1] == '>') {
1635
780
        inline_attr->size--;
1636
780
      }
1637
1.53k
    }
1638
3.77k
  }
1639
1640
  /* construct the final attr that is actually applied to the link */
1641
310k
  ref_attr_exists = ref_attr && ref_attr->size > 0;
1642
310k
  inline_attr_exists = inline_attr && inline_attr->size > 0;
1643
310k
  if (ref_attr_exists || inline_attr_exists) {
1644
64.0k
    attr = newbuf(doc, BUFFER_SPAN);
1645
64.0k
    if (ref_attr_exists) {
1646
60.8k
      hoedown_buffer_put(attr, ref_attr->data, ref_attr->size);
1647
60.8k
    }
1648
    /* if both inline and ref attrs exist, join them with a space to prevent
1649
     * conflicts */
1650
64.0k
    if (ref_attr_exists && inline_attr_exists) {
1651
519
      hoedown_buffer_putc(attr, ' ');
1652
519
    }
1653
64.0k
    if (inline_attr_exists) {
1654
3.67k
      hoedown_buffer_put(attr, inline_attr->data, inline_attr->size);
1655
3.67k
    }
1656
64.0k
  }
1657
1658
  /* calling the relevant rendering function */
1659
310k
  doc->link_id = id;
1660
310k
  doc->link_type = link_type;
1661
310k
  doc->link_ref_attr = ref_attr;
1662
310k
  doc->link_inline_attr = inline_attr;
1663
310k
  if (is_img) {
1664
14.7k
    ret = doc->md.image(ob, u_link, title, content, attr, &doc->data);
1665
295k
  } else {
1666
295k
    ret = doc->md.link(ob, content, u_link, title, attr, &doc->data);
1667
295k
  }
1668
310k
  doc->link_inline_attr = NULL;
1669
310k
  doc->link_ref_attr = NULL;
1670
310k
  doc->link_type = HOEDOWN_LINK_NONE;
1671
310k
  doc->link_id = NULL;
1672
1673
  /* cleanup */
1674
828k
cleanup:
1675
828k
  doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1676
828k
  return ret ? i : 0;
1677
310k
}
1678
1679
static size_t
1680
char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1681
291k
{
1682
291k
  size_t sup_start, sup_len;
1683
291k
  hoedown_buffer *sup;
1684
1685
291k
  if (!doc->md.superscript)
1686
0
    return 0;
1687
1688
291k
  if (size < 2)
1689
7.38k
    return 0;
1690
1691
284k
  if (data[1] == '(') {
1692
30.2k
    sup_start = 2;
1693
30.2k
    sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
1694
1695
30.2k
    if (sup_len == size)
1696
359
      return 0;
1697
254k
  } else {
1698
254k
    sup_start = sup_len = 1;
1699
1700
23.0M
    while (sup_len < size && !_isspace(data[sup_len]))
1701
22.7M
      sup_len++;
1702
254k
  }
1703
1704
284k
  if (sup_len - sup_start == 0)
1705
31.7k
    return (sup_start == 2) ? 3 : 0;
1706
1707
252k
  sup = newbuf(doc, BUFFER_SPAN);
1708
252k
  parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
1709
252k
  doc->md.superscript(ob, sup, &doc->data);
1710
252k
  popbuf(doc, BUFFER_SPAN);
1711
1712
252k
  return (sup_start == 2) ? sup_len + 1 : sup_len;
1713
284k
}
1714
1715
static size_t
1716
char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1717
30.1k
{
1718
  /* double dollar */
1719
30.1k
  if (size > 1 && data[1] == '$')
1720
6.35k
    return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
1721
1722
  /* single dollar allowed only with MATH_EXPLICIT flag */
1723
23.7k
  if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
1724
23.7k
    return parse_math(ob, doc, data, offset, size, "$", 1, 0);
1725
1726
0
  return 0;
1727
23.7k
}
1728
1729
/*********************************
1730
 * BLOCK-LEVEL PARSING FUNCTIONS *
1731
 *********************************/
1732
1733
/* is_empty • returns the line length when it is empty, 0 otherwise */
1734
static size_t
1735
is_empty(const uint8_t *data, size_t size)
1736
9.16M
{
1737
9.16M
  size_t i;
1738
1739
80.6M
  for (i = 0; i < size && data[i] != '\n'; i++)
1740
79.0M
    if (data[i] != ' ')
1741
7.50M
      return 0;
1742
1743
1.65M
  return i + 1;
1744
9.16M
}
1745
1746
/* is_hrule • returns whether a line is a horizontal rule */
1747
static int
1748
is_hrule(uint8_t *data, size_t size)
1749
5.35M
{
1750
5.35M
  size_t i = 0, n = 0;
1751
5.35M
  uint8_t c;
1752
1753
  /* skipping initial spaces */
1754
5.35M
  if (size < 3) return 0;
1755
5.30M
  if (data[0] == ' ') { i++;
1756
222k
  if (data[1] == ' ') { i++;
1757
82.9k
  if (data[2] == ' ') { i++; } } }
1758
1759
  /* looking at the hrule uint8_t */
1760
5.30M
  if (i + 2 >= size
1761
5.30M
  || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1762
2.86M
    return 0;
1763
2.44M
  c = data[i];
1764
1765
  /* the whole line must be the char or space */
1766
6.34M
  while (i < size && data[i] != '\n') {
1767
4.23M
    if (data[i] == c) n++;
1768
1.53M
    else if (data[i] != ' ')
1769
334k
      return 0;
1770
1771
3.90M
    i++;
1772
3.90M
  }
1773
1774
2.11M
  return n >= 3;
1775
2.44M
}
1776
1777
/* check if a line is a code fence; return the
1778
 * end of the code fence. if passed, width of
1779
 * the fence rule and character will be returned */
1780
static size_t
1781
is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
1782
8.92M
{
1783
8.92M
  size_t i = 0, n = 1, j;
1784
8.92M
  uint8_t c;
1785
1786
  /* skipping initial spaces */
1787
8.92M
  if (size < 3)
1788
5.37M
    return 0;
1789
1790
3.54M
  if (data[0] == ' ') { i++;
1791
511k
  if (data[1] == ' ') { i++;
1792
404k
  if (data[2] == ' ') { i++; } } }
1793
1794
  /* looking at the hrule uint8_t */
1795
3.54M
  if (i + 2 >= size)
1796
188k
    return 0;
1797
3.35M
  c = data[i];
1798
3.35M
  if (!(c=='~' || c=='`'))
1799
3.15M
    return 0;
1800
1801
  /* the fence must be that same character */
1802
1.44M
  while (++i < size && data[i] == c)
1803
1.24M
    ++n;
1804
1805
197k
  if (n < 3)
1806
65.4k
    return 0;
1807
1808
8.97M
  for (j = i; j < size && data[j] != '\n'; ++j) {
1809
8.84M
    if (data[j] == c) {
1810
      /* Avoid parsing codespan as fence. */
1811
5.10k
      return 0;
1812
5.10k
    }
1813
8.84M
  }
1814
1815
127k
  if (width) *width = n;
1816
127k
  if (chr) *chr = c;
1817
127k
  return i;
1818
132k
}
1819
1820
/* expects single line, checks if it's a codefence and extracts language */
1821
static int
1822
parse_codefence(hoedown_document *doc, uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr, unsigned int flags, hoedown_buffer *attr)
1823
4.49M
{
1824
4.49M
  size_t i, w, lang_start, attr_start = 0;
1825
1826
4.49M
  i = w = is_codefence(data, size, width, chr);
1827
4.49M
  if (i == 0)
1828
4.47M
    return 0;
1829
1830
150k
  while (i < size && _isspace(data[i]))
1831
132k
    i++;
1832
1833
17.4k
  lang_start = i;
1834
1835
17.4k
  if (flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1836
17.4k
    attr_start = i + parse_attributes(data + i, size - i, attr, NULL, "", 0, doc->attr_activation);
1837
866k
    while (i < attr_start) {
1838
850k
      if (_isspace(data[i])) {
1839
1.05k
        break;
1840
1.05k
      }
1841
849k
      i++;
1842
849k
    }
1843
17.4k
  } else {
1844
0
    while (i < size && !_isspace(data[i]))
1845
0
      i++;
1846
0
  }
1847
1848
17.4k
  lang->data = data + lang_start;
1849
17.4k
  lang->size = i - lang_start;
1850
1851
17.4k
  return w;
1852
4.49M
}
1853
1854
/* is_atxheader • returns whether the line is a hash-prefixed header */
1855
static int
1856
is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
1857
5.45M
{
1858
5.45M
  size_t level = 0, begin = 0, len;
1859
5.45M
  uint8_t *p;
1860
1861
5.45M
  if (data[0] != '#')
1862
5.40M
    return 0;
1863
1864
113k
  while (level < size && level < 6 && data[level] == '#')
1865
61.4k
    level++;
1866
1867
51.6k
  if (level >= size || data[level] == '\n') {
1868
17.0k
      return 0;
1869
17.0k
  }
1870
1871
34.6k
  len = size - level;
1872
34.6k
  p = memchr(data + level, '\n', len);
1873
34.6k
  if (p) {
1874
31.7k
    len = p - (data + level) + 1;
1875
31.7k
  }
1876
1877
  /* if the header is only whitespace, it is not a header */
1878
34.6k
  if (len && is_empty_all(data + level, len)) {
1879
1.17k
    return 0;
1880
1.17k
  }
1881
1882
33.4k
  if ((doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) && level < size && data[level] != ' ') {
1883
7.54k
    return 0;
1884
7.54k
  }
1885
1886
  /* if the header is only special attribute, it is not a header */
1887
25.9k
  if (len && (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) {
1888
25.9k
    p = memchr(data + level, '{', len);
1889
25.9k
    if (p) {
1890
      /* get number of characters from # to { */
1891
7.08k
      begin = p - (data + level);
1892
7.08k
      if (begin > 0 && !is_empty_all(data + level, begin)) {
1893
1.99k
        return 1;
1894
1.99k
      }
1895
      /* check for special attributes after the # */
1896
5.09k
      return !parse_inline_attributes(data + level + begin, len - begin, NULL, doc->attr_activation);
1897
7.08k
    }
1898
25.9k
  }
1899
1900
18.8k
  return 1;
1901
25.9k
}
1902
1903
/* is_headerline • returns whether the line is a setext-style hdr underline */
1904
static int
1905
is_headerline(uint8_t *data, size_t size)
1906
1.31M
{
1907
1.31M
  size_t i = 0;
1908
1909
  /* test of level 1 header */
1910
1.31M
  if (data[i] == '=') {
1911
78.0k
    for (i = 1; i < size && data[i] == '='; i++);
1912
295k
    while (i < size && data[i] == ' ') i++;
1913
67.2k
    return (i >= size || data[i] == '\n') ? 1 : 0; }
1914
1915
  /* test of level 2 header */
1916
1.24M
  if (data[i] == '-') {
1917
254k
    for (i = 1; i < size && data[i] == '-'; i++);
1918
396k
    while (i < size && data[i] == ' ') i++;
1919
227k
    return (i >= size || data[i] == '\n') ? 2 : 0; }
1920
1921
1.01M
  return 0;
1922
1.24M
}
1923
1924
static int
1925
is_next_headerline(uint8_t *data, size_t size)
1926
613k
{
1927
613k
  size_t i = 0;
1928
1929
48.6M
  while (i < size && data[i] != '\n')
1930
48.0M
    i++;
1931
1932
613k
  if (++i >= size)
1933
190k
    return 0;
1934
1935
422k
  return is_headerline(data + i, size - i);
1936
613k
}
1937
1938
/* prefix_quote • returns blockquote prefix length */
1939
static size_t
1940
prefix_quote(uint8_t *data, size_t size)
1941
5.37M
{
1942
5.37M
  size_t i = 0;
1943
5.37M
  if (i < size && data[i] == ' ') i++;
1944
5.37M
  if (i < size && data[i] == ' ') i++;
1945
5.37M
  if (i < size && data[i] == ' ') i++;
1946
1947
5.37M
  if (i < size && data[i] == '>') {
1948
88.5k
    if (i + 1 < size && data[i + 1] == ' ')
1949
5.58k
      return i + 2;
1950
1951
83.0k
    return i + 1;
1952
88.5k
  }
1953
1954
5.28M
  return 0;
1955
5.37M
}
1956
1957
/* prefix_code • returns prefix length for block code*/
1958
static size_t
1959
prefix_code(uint8_t *data, size_t size)
1960
0
{
1961
0
  if (size > 3 && data[0] == ' ' && data[1] == ' '
1962
0
    && data[2] == ' ' && data[3] == ' ') return 4;
1963
1964
0
  return 0;
1965
0
}
1966
1967
/* prefix_oli • returns ordered list item prefix */
1968
static size_t
1969
prefix_oli(uint8_t *data, size_t size)
1970
4.90M
{
1971
4.90M
  size_t i = 0;
1972
1973
4.90M
  if (i < size && data[i] == ' ') i++;
1974
4.90M
  if (i < size && data[i] == ' ') i++;
1975
4.90M
  if (i < size && data[i] == ' ') i++;
1976
1977
4.90M
  if (i >= size || data[i] < '0' || data[i] > '9')
1978
4.72M
    return 0;
1979
1980
403k
  while (i < size && data[i] >= '0' && data[i] <= '9')
1981
224k
    i++;
1982
1983
179k
  if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1984
61.9k
    return 0;
1985
1986
117k
  if (is_next_headerline(data + i, size - i))
1987
1.86k
    return 0;
1988
1989
115k
  return i + 2;
1990
117k
}
1991
1992
/* prefix_uli • returns unordered list item prefix */
1993
static size_t
1994
prefix_uli(uint8_t *data, size_t size)
1995
5.07M
{
1996
5.07M
  size_t i = 0;
1997
1998
5.07M
  if (i < size && data[i] == ' ') i++;
1999
5.07M
  if (i < size && data[i] == ' ') i++;
2000
5.07M
  if (i < size && data[i] == ' ') i++;
2001
2002
5.07M
  if (i + 1 >= size ||
2003
5.07M
    (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
2004
5.07M
    data[i + 1] != ' ')
2005
4.80M
    return 0;
2006
2007
266k
  if (is_next_headerline(data + i, size - i))
2008
17.6k
    return 0;
2009
2010
248k
  return i + 2;
2011
266k
}
2012
2013
/* prefix_dt • returns dictionary definition prefix
2014
 * this is in the form of /\s{0,3}:/ (e.g. "  :", where spacing is optional) */
2015
static size_t
2016
prefix_dt(uint8_t *data, size_t size)
2017
4.83M
{
2018
4.83M
  size_t i = 0;
2019
2020
  /* skip up to 3 whitespaces (since it's an indented codeblock at 4) */
2021
4.83M
  if (i < size && data[i] == ' ') i++;
2022
4.83M
  if (i < size && data[i] == ' ') i++;
2023
4.83M
  if (i < size && data[i] == ' ') i++;
2024
2025
  /* if the first character after whitespaces isn't :, it isn't a dt */
2026
4.83M
  if (i + 1 >= size ||
2027
4.83M
    data[i] != ':' ||
2028
4.83M
    data[i + 1] != ' ')
2029
4.60M
    return 0;
2030
2031
229k
  if (is_next_headerline(data + i, size - i))
2032
34.2k
    return 0;
2033
2034
195k
  return i + 2;
2035
229k
}
2036
2037
/* is_paragraph • returns if the next block is a paragraph (doesn't follow any
2038
 * other special rules for other types of blocks) */
2039
static int
2040
is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end);
2041
2042
/* prefix_dli • returns dictionary definition prefix
2043
 * a dli looks like a block of text, followed by optional whitespace, followed
2044
 * by another block with : as the first non-whitespace character */
2045
static size_t
2046
prefix_dli(hoedown_document *doc, uint8_t *data, size_t size)
2047
407k
{
2048
  /* end is to keep track of the final return value */
2049
407k
  size_t i = 0, j = 0, end = 0;
2050
407k
  int empty = 0;
2051
2052
  /* if the first line has a : in front of it, it can't be a definition list
2053
   * that starts at this point */
2054
407k
  if (prefix_dt(data, size)) {
2055
2.38k
    return 0;
2056
2.38k
  }
2057
2058
  /* temporarily toggle definition lists off to prevent infinite loops */
2059
404k
  doc->ext_flags &= ~HOEDOWN_EXT_DEFINITION_LISTS;
2060
2061
  /* check if it is a block of text with no double newlines inside, followed by
2062
   *  another block of text starting with : */
2063
4.42M
  while (i < size) {
2064
    /* if the line we are on is empty, flip the empty flag to indicate that
2065
     * the next block of text we see has to start with : to be considered
2066
     * a definition list; then skip to the next line */
2067
4.36M
    j = is_empty(data + i, size - i);
2068
4.36M
    if(j != 0) {
2069
310k
      empty = 1;
2070
310k
      i += j;
2071
310k
      continue;
2072
310k
    }
2073
2074
    /* if anything special is found while parsing the definition term part,
2075
     * then return so that the main loop can deal with it */
2076
4.05M
    if (!is_paragraph(doc, data + i, size - i)) {
2077
123k
      break;
2078
123k
    }
2079
2080
    /* check if the current line starts with :, returning the position of the
2081
     * beginning of the line if it does */
2082
3.93M
    j = prefix_dt(data + i, size - i);
2083
3.93M
    if (j > 0) {
2084
72.3k
      end = i;
2085
72.3k
      break;
2086
3.86M
    } else if(empty) {
2087
      /* if an empty newline has been found, then since : was not the first
2088
       * character after whitespaces, it can't be a definition list */
2089
147k
      break;
2090
147k
    }
2091
    /* scan characters until the next newline */
2092
221M
    for (i = i + 1; i < size && data[i - 1] != '\n'; i++);
2093
3.71M
  }
2094
2095
404k
  doc->ext_flags |= HOEDOWN_EXT_DEFINITION_LISTS;
2096
404k
  return end;
2097
407k
}
2098
2099
/* parse_block • parsing of one block, returning next uint8_t to parse */
2100
static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
2101
      uint8_t *data, size_t size);
2102
2103
2104
/* parse_blockquote • handles parsing of a blockquote fragment */
2105
static size_t
2106
parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2107
34.1k
{
2108
34.1k
  size_t beg, end = 0, pre, work_size = 0;
2109
34.1k
  uint8_t *work_data = 0;
2110
34.1k
  hoedown_buffer *out = 0;
2111
2112
34.1k
  doc->blockquote_depth++;
2113
2114
34.1k
  out = newbuf(doc, BUFFER_BLOCK);
2115
34.1k
  beg = 0;
2116
210k
  while (beg < size) {
2117
41.9M
    for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
2118
2119
200k
    pre = prefix_quote(data + beg, end - beg);
2120
2121
200k
    if (pre)
2122
37.4k
      beg += pre; /* skipping prefix */
2123
2124
    /* empty line finished */
2125
163k
    else if ((doc->ext_flags & HOEDOWN_EXT_BLOCKQUOTE_EMPTY_LINE) &&
2126
163k
        (is_empty(data + beg, end - beg)))
2127
24.6k
      break;
2128
2129
    /* empty line followed by non-quote line */
2130
138k
    else if (is_empty(data + beg, end - beg) &&
2131
138k
        (end >= size || (prefix_quote(data + end, size - end) == 0 &&
2132
0
        !is_empty(data + end, size - end))))
2133
0
      break;
2134
2135
176k
    if (beg < end) { /* copy into the in-place working buffer */
2136
      /* hoedown_buffer_put(work, data + beg, end - beg); */
2137
175k
      if (!work_data)
2138
33.8k
        work_data = data + beg;
2139
142k
      else if (data + beg != work_data + work_size)
2140
18.5k
        memmove(work_data + work_size, data + beg, end - beg);
2141
175k
      work_size += end - beg;
2142
175k
    }
2143
176k
    beg = end;
2144
176k
  }
2145
2146
34.1k
  parse_block(out, doc, work_data, work_size);
2147
34.1k
  if (doc->md.blockquote)
2148
34.1k
    doc->md.blockquote(ob, out, &doc->data);
2149
34.1k
  popbuf(doc, BUFFER_BLOCK);
2150
2151
34.1k
  doc->blockquote_depth--;
2152
2153
34.1k
  return end;
2154
34.1k
}
2155
2156
static size_t
2157
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
2158
2159
/* parse_paragraph • handles parsing of a regular paragraph */
2160
static size_t
2161
parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2162
315k
{
2163
315k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2164
315k
  size_t i = 0, end = 0;
2165
315k
  int level = 0;
2166
2167
315k
  work.data = data;
2168
2169
1.03M
  while (i < size) {
2170
147M
    for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
2171
2172
998k
    if (is_empty(data + i, size - i))
2173
107k
      break;
2174
2175
890k
    if ((level = is_headerline(data + i, size - i)) != 0) {
2176
145k
      if (i == 0) {
2177
72.8k
        level = 0;
2178
72.8k
        i = end;
2179
72.8k
      }
2180
145k
      break;
2181
145k
    }
2182
2183
745k
    if (is_atxheader(doc, data + i, size - i) ||
2184
745k
      is_hrule(data + i, size - i) ||
2185
745k
      prefix_quote(data + i, size - i)) {
2186
21.4k
      end = i;
2187
21.4k
      break;
2188
21.4k
    }
2189
2190
724k
    i = end;
2191
724k
  }
2192
2193
315k
  work.size = i;
2194
622k
  while (work.size && data[work.size - 1] == '\n')
2195
307k
    work.size--;
2196
2197
315k
  if (!level) {
2198
242k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2199
242k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2200
242k
      parse_attributes(work.data, work.size, NULL, attr, "paragraph", 1, doc->attr_activation);
2201
242k
      if (attr->size > 0) {
2202
        /* remove the length of the attribute from the work size - the 12 comes
2203
        * from the leading space (1), the paragraph (9), the @ symbol (1), and
2204
        * the {} (2) (any extra spaces in the attribute are included inside
2205
        * the attribute) */
2206
0
        work.size -= attr->size + 12;
2207
0
      }
2208
242k
    }
2209
2210
242k
    hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
2211
242k
    parse_inline(tmp, doc, work.data, work.size);
2212
242k
    if (doc->md.paragraph)
2213
242k
      doc->md.paragraph(ob, tmp, attr, &doc->data);
2214
242k
    popbuf(doc, BUFFER_BLOCK);
2215
242k
    popbuf(doc, BUFFER_ATTRIBUTE);
2216
242k
  } else {
2217
72.3k
    hoedown_buffer *header_work;
2218
72.3k
    hoedown_buffer *attr_work;
2219
72.3k
    size_t len;
2220
2221
72.3k
    if (work.size) {
2222
72.3k
      size_t beg;
2223
72.3k
      i = work.size;
2224
72.3k
      work.size -= 1;
2225
2226
7.75M
      while (work.size && data[work.size] != '\n')
2227
7.67M
        work.size -= 1;
2228
2229
72.3k
      beg = work.size + 1;
2230
72.3k
      while (work.size && data[work.size - 1] == '\n')
2231
0
        work.size -= 1;
2232
2233
72.3k
      if (work.size > 0) {
2234
31.4k
        hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
2235
31.4k
        parse_inline(tmp, doc, work.data, work.size);
2236
2237
31.4k
        if (doc->md.paragraph)
2238
31.4k
          doc->md.paragraph(ob, tmp, NULL, &doc->data);
2239
2240
31.4k
        popbuf(doc, BUFFER_BLOCK);
2241
31.4k
        work.data += beg;
2242
31.4k
        work.size = i - beg;
2243
31.4k
      }
2244
40.9k
      else work.size = i;
2245
72.3k
    }
2246
2247
72.3k
    header_work = newbuf(doc, BUFFER_SPAN);
2248
72.3k
    attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
2249
2250
72.3k
    len = work.size;
2251
72.3k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2252
72.3k
      len = parse_attributes(work.data, work.size, attr_work, NULL, "", 1, doc->attr_activation);
2253
72.3k
    }
2254
2255
72.3k
    parse_inline(header_work, doc, work.data, len);
2256
2257
72.3k
    if (doc->md.header) {
2258
72.3k
      doc->header_type = HOEDOWN_HEADER_SETEXT;
2259
72.3k
      doc->md.header(ob, header_work, attr_work, (int)level, &doc->data);
2260
72.3k
      doc->header_type = HOEDOWN_HEADER_NONE;
2261
72.3k
    }
2262
2263
72.3k
    popbuf(doc, BUFFER_SPAN);
2264
72.3k
    popbuf(doc, BUFFER_ATTRIBUTE);
2265
72.3k
  }
2266
2267
315k
  return end;
2268
315k
}
2269
2270
/* parse_fencedcode • handles parsing of a block-level code fragment */
2271
static size_t
2272
parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, unsigned int flags)
2273
4.49M
{
2274
4.49M
  hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
2275
4.49M
  hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
2276
4.49M
  size_t i = 0, text_start, line_start;
2277
4.49M
  size_t w, w2;
2278
4.49M
  size_t width, width2;
2279
4.49M
  uint8_t chr, chr2;
2280
  /* needed for parse_attribute functions as buffer functions do not work with
2281
   * buffers on the stack */
2282
4.49M
  hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2283
2284
2285
  /* parse codefence line */
2286
325M
  while (i < size && data[i] != '\n')
2287
321M
    i++;
2288
2289
4.49M
  w = parse_codefence(doc, data, i, &lang, &width, &chr, flags, attr);
2290
4.49M
  if (!w) {
2291
4.47M
    popbuf(doc, BUFFER_ATTRIBUTE);
2292
4.47M
    return 0;
2293
4.47M
  }
2294
2295
  /* search for end */
2296
17.4k
  i++;
2297
17.4k
  text_start = i;
2298
3.77M
  while ((line_start = i) < size) {
2299
169M
    while (i < size && data[i] != '\n')
2300
165M
      i++;
2301
2302
3.76M
    w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
2303
3.76M
    if (w == w2 && width == width2 && chr == chr2 &&
2304
3.76M
      is_empty(data + (line_start+w), i - (line_start+w)))
2305
4.85k
      break;
2306
2307
3.75M
    if (i < size) i++;
2308
3.75M
  }
2309
2310
17.4k
  text.data = data + text_start;
2311
17.4k
  text.size = line_start - text_start;
2312
2313
17.4k
  if (doc->md.blockcode) {
2314
9.77k
    doc->fencedcode_char = chr;
2315
9.77k
    doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, attr->size ? attr : NULL, &doc->data);
2316
9.77k
    doc->fencedcode_char = 0;
2317
9.77k
  }
2318
2319
17.4k
  popbuf(doc, BUFFER_ATTRIBUTE);
2320
2321
17.4k
  return i;
2322
4.49M
}
2323
2324
static size_t
2325
parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2326
0
{
2327
0
  size_t beg, end, pre;
2328
0
  hoedown_buffer *work = 0;
2329
0
  hoedown_buffer *attr = 0;
2330
2331
0
  work = newbuf(doc, BUFFER_BLOCK);
2332
0
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2333
2334
0
  beg = 0;
2335
0
  while (beg < size) {
2336
0
    for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
2337
0
    pre = prefix_code(data + beg, end - beg);
2338
2339
0
    if (pre)
2340
0
      beg += pre; /* skipping prefix */
2341
0
    else if (!is_empty(data + beg, end - beg))
2342
      /* non-empty non-prefixed line breaks the pre */
2343
0
      break;
2344
2345
0
    if (beg < end) {
2346
      /* verbatim copy to the working buffer,
2347
        escaping entities */
2348
0
      if (is_empty(data + beg, end - beg))
2349
0
        hoedown_buffer_putc(work, '\n');
2350
0
      else hoedown_buffer_put(work, data + beg, end - beg);
2351
0
    }
2352
0
    beg = end;
2353
0
  }
2354
2355
0
  while (work->size && work->data[work->size - 1] == '\n')
2356
0
    work->size -= 1;
2357
2358
0
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2359
0
    work->size = parse_attributes(work->data, work->size, NULL, attr, "", 0, doc->attr_activation);
2360
0
  }
2361
2362
0
  hoedown_buffer_putc(work, '\n');
2363
2364
0
  if (doc->md.blockcode)
2365
0
    doc->md.blockcode(ob, work, NULL, attr, &doc->data);
2366
2367
0
  popbuf(doc, BUFFER_BLOCK);
2368
0
  popbuf(doc, BUFFER_ATTRIBUTE);
2369
0
  return beg;
2370
0
}
2371
2372
/* parse_listitem • parsing of a single list item */
2373
/*  assuming initial prefix is already removed */
2374
static size_t
2375
parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute)
2376
236k
{
2377
236k
  hoedown_buffer *work = 0, *inter = 0;
2378
236k
  hoedown_buffer *attr = 0;
2379
236k
  size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i, len, fence_pre = 0;
2380
236k
  int in_empty = 0, has_inside_empty = 0, in_fence = 0;
2381
236k
  uint8_t ul_item_char = '*';
2382
236k
  hoedown_buffer *ol_numeral = NULL;
2383
2384
  /* keeping track of the first indentation prefix */
2385
239k
  while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
2386
3.56k
    orgpre++;
2387
2388
236k
  beg = prefix_uli(data, size);
2389
236k
  if (beg) ul_item_char = data[beg - 2];
2390
236k
  if (!beg) {
2391
143k
    beg = prefix_oli(data, size);
2392
143k
    if (beg) {
2393
40.7k
      ol_numeral = hoedown_buffer_new(1024);
2394
      /* -2 to eliminate the trailing ". " */
2395
40.7k
      hoedown_buffer_put(ol_numeral, data, beg - 2);
2396
40.7k
    }
2397
143k
    if (*flags & HOEDOWN_LIST_DEFINITION) {
2398
109k
      beg = prefix_dt(data, size);
2399
109k
      if (beg) ul_item_char = data[beg - 2];
2400
109k
    }
2401
143k
  }
2402
2403
236k
  if (!beg) {
2404
34.7k
    if (ol_numeral) hoedown_buffer_free(ol_numeral);
2405
34.7k
    return 0;
2406
34.7k
  }
2407
2408
  /* skipping to the beginning of the following line */
2409
201k
  end = beg;
2410
16.9M
  while (end < size && data[end - 1] != '\n')
2411
16.7M
    end++;
2412
2413
201k
  if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2414
201k
    fence_pre = is_codefence(data + beg, end - beg, &len, NULL);
2415
201k
    if (fence_pre) {
2416
3.48k
      in_fence = 1;
2417
3.48k
      fence_pre = fence_pre + beg - len;
2418
3.48k
    }
2419
201k
  }
2420
2421
  /* getting working buffers */
2422
201k
  work = newbuf(doc, BUFFER_SPAN);
2423
201k
  inter = newbuf(doc, BUFFER_SPAN);
2424
2425
  /* calculating the indentation */
2426
201k
  i = 0;
2427
401k
  while (i < 4 && beg + i < end && data[beg + i] == ' ')
2428
200k
    i++;
2429
2430
201k
  beg += i;
2431
2432
  /* putting the first line into the working buffer */
2433
201k
  hoedown_buffer_put(work, data + beg, end - beg);
2434
201k
  beg = end;
2435
2436
201k
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2437
2438
  /* process the following lines */
2439
600k
  while (beg < size) {
2440
578k
    size_t has_next_uli = 0, has_next_oli = 0, has_next_dli = 0;
2441
2442
578k
    end++;
2443
2444
43.6M
    while (end < size && data[end - 1] != '\n')
2445
43.0M
      end++;
2446
2447
    /* process an empty line */
2448
578k
    if (is_empty(data + beg, end - beg)) {
2449
111k
      in_empty = 1;
2450
111k
      beg = end;
2451
111k
      continue;
2452
111k
    }
2453
2454
    /* calculating the indentation */
2455
466k
    i = 0;
2456
591k
    while (i < 4 && beg + i < end && data[beg + i] == ' ')
2457
125k
      i++;
2458
2459
466k
    if (in_fence && i > fence_pre) {
2460
4.13k
      i = fence_pre;
2461
4.13k
    }
2462
2463
466k
    pre = i;
2464
2465
466k
    if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2466
466k
      if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
2467
6.91k
        in_fence = !in_fence;
2468
466k
      if (in_fence && fence_pre == 0) {
2469
17.1k
        fence_pre = pre;
2470
17.1k
      }
2471
466k
    }
2472
2473
    /* Only check for new list items if we are **not** inside
2474
     * a fenced code block */
2475
466k
    if (!in_fence) {
2476
436k
      has_next_uli = prefix_uli(data + beg + i, end - beg - i);
2477
436k
      has_next_oli = prefix_oli(data + beg + i, end - beg - i);
2478
2479
      /* only check for the next definition if it is same indentation or less
2480
       * since embedded definition lists need terms, so finding just a
2481
       * colon by itself does not mean anything */
2482
436k
      if (pre <= orgpre)
2483
385k
        has_next_dli = prefix_dt(data + beg + i, end - beg - i);
2484
436k
    }
2485
2486
    /* checking for a new item */
2487
466k
    if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || 
2488
466k
      has_next_oli || (*flags & HOEDOWN_LI_DD && has_next_dli)) {
2489
147k
      if (in_empty)
2490
49.7k
        has_inside_empty = 1;
2491
2492
      /* the following item must have the same (or less) indentation */
2493
147k
      if (pre <= orgpre) {
2494
        /* if the following item has different list type, we end this list */
2495
138k
        if (in_empty && (
2496
47.9k
          ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
2497
47.9k
          (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))) {
2498
36.1k
          *flags |= HOEDOWN_LI_END;
2499
36.1k
          has_inside_empty = 0;
2500
36.1k
        }
2501
138k
        break;
2502
138k
      }
2503
2504
9.15k
      if (!sublist)
2505
6.42k
        sublist = work->size;
2506
9.15k
    }
2507
    /* joining only indented stuff after empty lines;
2508
     * note that now we only require 1 space of indentation
2509
     * to continue a list */
2510
318k
    else if (in_empty && pre == 0) {
2511
41.5k
      *flags |= HOEDOWN_LI_END;
2512
41.5k
      break;
2513
41.5k
    }
2514
2515
286k
    if (in_empty) {
2516
8.87k
      hoedown_buffer_putc(work, '\n');
2517
8.87k
      has_inside_empty = 1;
2518
8.87k
      in_empty = 0;
2519
8.87k
    }
2520
2521
    /* adding the line without prefix into the working buffer */
2522
286k
    hoedown_buffer_put(work, data + beg + i, end - beg - i);
2523
286k
    beg = end;
2524
286k
  }
2525
2526
  /* render of li contents */
2527
201k
  if (has_inside_empty)
2528
17.3k
    *flags |= HOEDOWN_LI_BLOCK;
2529
2530
201k
  if (*flags & HOEDOWN_LI_BLOCK) {
2531
    /* intermediate render of block li */
2532
67.2k
    pre = 0;
2533
67.2k
    if (sublist && sublist < work->size) {
2534
2.65k
      end = sublist;
2535
64.6k
    } else {
2536
64.6k
      end = work->size;
2537
64.6k
    }
2538
2539
67.2k
    do {
2540
67.2k
      if (!(doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) {
2541
0
        break;
2542
0
      }
2543
2544
67.2k
      i = 0;
2545
7.11M
      while (i < end && work->data[i] != '\n') {
2546
7.04M
        i++;
2547
7.04M
      }
2548
2549
67.2k
      len = parse_attributes(work->data, i, attr, attribute, "list", 0, doc->attr_activation);
2550
67.2k
      if (i == len) {
2551
47.0k
        break;
2552
47.0k
      }
2553
2554
20.2k
      pre = i;
2555
20.2k
      parse_block(inter, doc, work->data, len);
2556
20.2k
    } while (0);
2557
2558
0
    parse_block(inter, doc, work->data + pre, end - pre);
2559
67.2k
    if (end == sublist) {
2560
2.65k
      parse_block(inter, doc, work->data + sublist, work->size - sublist);
2561
2.65k
    }
2562
134k
  } else {
2563
    /* intermediate render of inline li */
2564
134k
    if (sublist && sublist < work->size) {
2565
3.77k
      if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2566
3.77k
        len = parse_attributes(work->data, sublist, attr, attribute, "list", 0, doc->attr_activation);
2567
3.77k
      } else {
2568
0
        len = sublist;
2569
0
      }
2570
3.77k
      parse_inline(inter, doc, work->data, len);
2571
3.77k
      parse_block(inter, doc, work->data + sublist, work->size - sublist);
2572
130k
    } else {
2573
130k
      if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2574
130k
        len = parse_attributes(work->data, work->size, attr, attribute, "list", 0, doc->attr_activation);
2575
130k
      } else {
2576
0
        len = work->size;
2577
0
      }
2578
130k
      parse_inline(inter, doc, work->data, len);
2579
130k
    }
2580
134k
  }
2581
2582
  /* render of li itself */
2583
201k
  if (doc->md.listitem) {
2584
201k
    doc->ul_item_char = ul_item_char;
2585
201k
    doc->ol_numeral = ol_numeral;
2586
201k
    doc->md.listitem(ob, inter, attr, flags, &doc->data);
2587
201k
    doc->ol_numeral = NULL;
2588
201k
    doc->ul_item_char = 0;
2589
201k
  }
2590
2591
201k
  if (ol_numeral) hoedown_buffer_free(ol_numeral);
2592
2593
201k
  popbuf(doc, BUFFER_SPAN);
2594
201k
  popbuf(doc, BUFFER_SPAN);
2595
201k
  popbuf(doc, BUFFER_ATTRIBUTE);
2596
201k
  return beg;
2597
236k
}
2598
2599
/* parse_definition • parsing of a term/definition pair, assuming starting
2600
 * at start of line */
2601
static size_t
2602
parse_definition(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute)
2603
63.1k
{
2604
  /* end represents the position of the first line where definitions start */
2605
63.1k
  size_t j = 0, k = 0, len = 0, end = prefix_dli(doc, data, size);
2606
63.1k
  if (end <= 0) {
2607
19.6k
    return 0;
2608
19.6k
  }
2609
43.4k
  hoedown_buffer *work = 0, *attr_work;
2610
2611
2612
  /* scan all the definition terms, rendering them to the output buffer
2613
   * the +1 is to account for the trailing newline on each term
2614
   * j is a counter keeping track of the beginning of each new term */
2615
43.4k
  *flags |= HOEDOWN_LI_DT;
2616
157k
  while (j + 1 < end) {
2617
    /* find the end of the term (where the newline is) */
2618
9.14M
    for(k = j + 1; k - 1 < end && data[k - 1] != '\n'; k++);
2619
2620
113k
    len = k - j;
2621
2622
113k
    if (is_empty(data + j, len)) {
2623
3.05k
      j = k;
2624
3.05k
      continue;
2625
3.05k
    }
2626
2627
110k
    work = newbuf(doc, BUFFER_BLOCK);
2628
110k
    attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
2629
2630
110k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2631
110k
      len = parse_attributes(data + j, len, attr_work, NULL, "", 1, doc->attr_activation);
2632
110k
    }
2633
2634
110k
    parse_inline(work, doc, data + j, len);
2635
2636
110k
    if (doc->md.listitem) {
2637
110k
      doc->md.listitem(ob, work, attr_work, flags, &doc->data);
2638
110k
    }
2639
2640
110k
    j = k;
2641
2642
110k
    popbuf(doc, BUFFER_BLOCK);
2643
110k
    popbuf(doc, BUFFER_ATTRIBUTE);
2644
110k
  }
2645
43.4k
  *flags &= ~HOEDOWN_LI_DT;
2646
2647
  /* scan all the definitions, rendering it to the output buffer */
2648
43.4k
  *flags |= HOEDOWN_LI_DD;
2649
147k
  while (end < size) {
2650
137k
    j = parse_listitem(ob, doc, data + end, size - end, flags, attribute);
2651
137k
    if (j <= 0) {
2652
34.1k
      break;
2653
34.1k
    }
2654
103k
    end += j;
2655
103k
  }
2656
2657
43.4k
  *flags &= ~HOEDOWN_LI_DD;
2658
43.4k
  *flags &= ~HOEDOWN_LI_END;
2659
2660
43.4k
  return end;
2661
63.1k
}
2662
2663
/* parse_list • parsing ordered or unordered list block */
2664
static size_t
2665
parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
2666
94.8k
{
2667
94.8k
  hoedown_buffer *work = 0;
2668
94.8k
  hoedown_buffer *attr = 0;
2669
94.8k
  size_t i = 0, j;
2670
2671
94.8k
  doc->list_depth++;
2672
2673
94.8k
  work = newbuf(doc, BUFFER_BLOCK);
2674
94.8k
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2675
2676
183k
  while (i < size) {
2677
161k
    if (flags & HOEDOWN_LIST_DEFINITION) {
2678
63.1k
      j = parse_definition(work, doc, data + i, size - i, &flags, attr);
2679
98.6k
    } else {
2680
98.6k
      j = parse_listitem(work, doc, data + i, size - i, &flags, attr);
2681
98.6k
    }
2682
161k
    i += j;
2683
2684
161k
    if (!j || (flags & HOEDOWN_LI_END))
2685
73.0k
      break;
2686
161k
  }
2687
2688
94.8k
  if (doc->md.list)
2689
94.8k
    doc->md.list(ob, work, attr, flags, &doc->data);
2690
94.8k
  popbuf(doc, BUFFER_BLOCK);
2691
94.8k
  popbuf(doc, BUFFER_ATTRIBUTE);
2692
2693
94.8k
  doc->list_depth--;
2694
2695
94.8k
  return i;
2696
94.8k
}
2697
2698
/* parse_atxheader • parsing of atx-style headers */
2699
static size_t
2700
parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2701
10.0k
{
2702
10.0k
  size_t level = 0;
2703
10.0k
  size_t i, end, skip;
2704
2705
22.3k
  while (level < size && level < 6 && data[level] == '#')
2706
12.3k
    level++;
2707
2708
46.2k
  for (i = level; i < size && data[i] == ' '; i++);
2709
2710
10.0M
  for (end = i; end < size && data[end] != '\n'; end++);
2711
10.0k
  skip = end;
2712
2713
11.4k
  while (end && data[end - 1] == '#')
2714
1.36k
    end--;
2715
2716
39.0k
  while (end && data[end - 1] == ' ')
2717
29.0k
    end--;
2718
2719
10.0k
  if (end > i) {
2720
9.02k
    hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
2721
9.02k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2722
9.02k
    size_t len;
2723
2724
9.02k
    len = end - i;
2725
9.02k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2726
9.02k
      len = parse_attributes(data + i, end - i, attr, NULL, "", 1, doc->attr_activation);
2727
9.02k
    }
2728
2729
9.02k
    parse_inline(work, doc, data + i, len);
2730
2731
9.02k
    if (doc->md.header) {
2732
9.02k
      doc->header_type = HOEDOWN_HEADER_ATX;
2733
9.02k
      doc->md.header(ob, work, attr, (int)level, &doc->data);
2734
9.02k
      doc->header_type = HOEDOWN_HEADER_NONE;
2735
9.02k
    }
2736
2737
9.02k
    popbuf(doc, BUFFER_SPAN);
2738
9.02k
    popbuf(doc, BUFFER_ATTRIBUTE);
2739
9.02k
  } else {
2740
1.01k
    doc->md.header(ob, NULL, NULL, (int)level, &doc->data);
2741
1.01k
  }
2742
2743
10.0k
  return skip;
2744
10.0k
}
2745
2746
/* parse_footnote_def • parse a single footnote definition */
2747
static void
2748
parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, const hoedown_buffer *name, uint8_t *data, size_t size)
2749
755
{
2750
755
  hoedown_buffer *work = 0;
2751
755
  work = newbuf(doc, BUFFER_SPAN);
2752
755
  doc->footnote_id = name;
2753
2754
755
  parse_block(work, doc, data, size);
2755
2756
755
  if (doc->md.footnote_def)
2757
755
  doc->md.footnote_def(ob, work, num, &doc->data);
2758
2759
755
  doc->footnote_id = NULL;
2760
755
  popbuf(doc, BUFFER_SPAN);
2761
755
}
2762
2763
/* parse_footnote_list • render the contents of the footnotes */
2764
static void
2765
parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
2766
10.1k
{
2767
10.1k
  hoedown_buffer *work = 0;
2768
10.1k
  struct footnote_item *item;
2769
10.1k
  struct footnote_ref *ref;
2770
2771
10.1k
  if (footnotes->count == 0)
2772
9.61k
    return;
2773
2774
582
  work = newbuf(doc, BUFFER_BLOCK);
2775
2776
582
  item = footnotes->head;
2777
1.33k
  while (item) {
2778
755
    ref = item->ref;
2779
755
    parse_footnote_def(work, doc, ref->num, ref->name, ref->contents->data, ref->contents->size);
2780
755
    item = item->next;
2781
755
  }
2782
2783
582
  if (doc->md.footnotes)
2784
582
    doc->md.footnotes(ob, work, &doc->data);
2785
582
  popbuf(doc, BUFFER_BLOCK);
2786
582
}
2787
2788
/* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
2789
/*  returns tag length on match, 0 otherwise */
2790
/*  assumes data starts with "<" */
2791
static size_t
2792
htmlblock_is_end(
2793
  const char *tag,
2794
  size_t tag_len,
2795
  hoedown_document *doc,
2796
  uint8_t *data,
2797
  size_t size)
2798
2.80M
{
2799
2.80M
  size_t i = tag_len + 3, w;
2800
2801
  /* try to match the end tag */
2802
  /* note: we're not considering tags like "</tag >" which are still valid */
2803
2.80M
  if (i > size ||
2804
2.80M
    data[1] != '/' ||
2805
2.80M
    strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2806
2.80M
    data[tag_len + 2] != '>')
2807
2.25M
    return 0;
2808
2809
  /* rest of the line must be empty */
2810
543k
  if ((w = is_empty(data + i, size - i)) == 0 && i < size)
2811
68.9k
    return 0;
2812
2813
474k
  return i + w;
2814
543k
}
2815
2816
/* htmlblock_find_end • try to find HTML block ending tag */
2817
/*  returns the length on match, 0 otherwise */
2818
static size_t
2819
htmlblock_find_end(
2820
  const char *tag,
2821
  size_t tag_len,
2822
  hoedown_document *doc,
2823
  uint8_t *data,
2824
  size_t size)
2825
8.03M
{
2826
8.03M
  size_t i = 0, w;
2827
2828
10.3M
  while (1) {
2829
1.32G
    while (i < size && data[i] != '<') i++;
2830
10.3M
    if (i >= size) return 0;
2831
2832
2.80M
    w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
2833
2.80M
    if (w) return i + w;
2834
2.32M
    i++;
2835
2.32M
  }
2836
8.03M
}
2837
2838
/* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
2839
/*  (it must have a blank line or a new HTML tag afterwards) */
2840
/*  returns the length on match, 0 otherwise */
2841
static size_t
2842
htmlblock_find_end_strict(
2843
  const char *tag,
2844
  size_t tag_len,
2845
  hoedown_document *doc,
2846
  uint8_t *data,
2847
  size_t size)
2848
10.5k
{
2849
10.5k
  size_t i = 0, mark;
2850
2851
8.03M
  while (1) {
2852
8.03M
    mark = i;
2853
783M
    while (i < size && data[i] != '\n') i++;
2854
8.03M
    if (i < size) i++;
2855
8.03M
    if (i == mark) return 0;
2856
2857
8.02M
    mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
2858
8.02M
    if (mark == i && (is_empty(data + i, size - i) || (i + 1 < size && data[i] == '<' && data[i + 1] != '/') || i >= size)) break;
2859
8.02M
  }
2860
2861
1.25k
  return i;
2862
10.5k
}
2863
2864
/* parse_htmlblock • parsing of inline HTML block */
2865
static size_t
2866
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
2867
201k
{
2868
201k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2869
201k
  size_t i, j = 0, tag_len, tag_end;
2870
201k
  const char *curtag = NULL;
2871
201k
  int meta = 0;
2872
2873
201k
  work.data = data;
2874
2875
  /* identification of the opening tag */
2876
201k
  if (size < 2 || data[0] != '<')
2877
196
    return 0;
2878
2879
200k
  i = 1;
2880
20.2M
  while (i < size && data[i] != '>' && data[i] != ' ')
2881
20.0M
    i++;
2882
2883
200k
  if (i < size) {
2884
194k
    if (doc->ext_flags & HOEDOWN_EXT_HTML5_BLOCKS)
2885
194k
      curtag = hoedown_find_html5_block_tag((char *)data + 1, (int)i - 1);
2886
0
    else
2887
0
      curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
2888
194k
  }  
2889
2890
  /* handling of special cases */
2891
200k
  if (!curtag) {
2892
2893
    /* HTML comment, laxist form */
2894
190k
    if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2895
27.2k
      i = 5;
2896
2897
27.2k
      if (data[4] == '*') {
2898
334
        meta++;
2899
334
      }
2900
2901
102M
      while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2902
102M
        i++;
2903
2904
27.2k
      if (data[i - 3] == '*') {
2905
827
        meta++;
2906
827
      }
2907
2908
27.2k
      i++;
2909
2910
27.2k
      if (i < size)
2911
24.7k
        j = is_empty(data + i, size - i);
2912
2913
27.2k
      if (j) {
2914
1.13k
        work.size = i + j;
2915
2916
1.13k
        if (do_render && doc->ext_flags & HOEDOWN_EXT_META_BLOCK &&
2917
1.13k
          meta == 2 && doc->meta) {
2918
0
          size_t org, sz;
2919
2920
0
          sz = work.size - 5;
2921
0
          while (sz > 0 && work.data[sz - 1] == '\n') {
2922
0
            sz--;
2923
0
          }
2924
2925
0
          org = 5;
2926
0
          while (org < sz && work.data[org] == '\n') {
2927
0
            org++;
2928
0
          }
2929
2930
0
          if (org < sz) {
2931
0
            hoedown_buffer_put(doc->meta, work.data + org, sz - org);
2932
0
            hoedown_buffer_putc(doc->meta, '\n');
2933
0
          }
2934
1.13k
        } else if (do_render && doc->md.blockhtml) {
2935
0
          doc->md.blockhtml(ob, &work, &doc->data);
2936
0
        }
2937
1.13k
        return work.size;
2938
1.13k
      }
2939
27.2k
    }
2940
2941
    /* HR, which is the only self-closing block tag considered */
2942
189k
    if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2943
8.89k
      i = 3;
2944
33.9M
      while (i < size && data[i] != '>')
2945
33.8M
        i++;
2946
2947
8.89k
      if (i + 1 < size) {
2948
8.09k
        i++;
2949
8.09k
        j = is_empty(data + i, size - i);
2950
8.09k
        if (j) {
2951
791
          work.size = i + j;
2952
791
          if (do_render && doc->md.blockhtml)
2953
0
            doc->md.blockhtml(ob, &work, &doc->data);
2954
791
          return work.size;
2955
791
        }
2956
8.09k
      }
2957
8.89k
    }
2958
2959
    /* Extension script tags */
2960
188k
    if (doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS) {
2961
188k
      i = script_tag_length(data, size);
2962
188k
      if (i) {
2963
9.26k
        if (i < size) {
2964
6.41k
          j = is_empty(data + i, size - i);
2965
6.41k
        }
2966
2967
9.26k
        if (j) {
2968
579
          work.size = i + j;
2969
579
          if (do_render && doc->md.blockhtml) {
2970
0
            doc->md.blockhtml(ob, &work, &doc->data);
2971
0
          }
2972
579
          return work.size;
2973
579
        }
2974
9.26k
      }
2975
2976
188k
    }
2977
2978
    /* no special case recognised */
2979
187k
    return 0;
2980
188k
  }
2981
2982
  /* looking for a matching closing tag in strict mode */
2983
10.5k
  tag_len = strlen(curtag);
2984
10.5k
  tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
2985
2986
  /* if not found, trying a second pass looking for indented match */
2987
  /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2988
10.5k
  if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
2989
8.29k
    tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
2990
2991
10.5k
  if (!tag_end)
2992
8.18k
    return 0;
2993
2994
  /* the end of the block has been found */
2995
2.38k
  work.size = tag_end;
2996
2.38k
  if (do_render && doc->md.blockhtml)
2997
0
    doc->md.blockhtml(ob, &work, &doc->data);
2998
2999
2.38k
  return tag_end;
3000
10.5k
}
3001
3002
/* Common function to parse table main rows and continued rows. */
3003
static size_t
3004
parse_table_cell_line(
3005
    hoedown_buffer *ob,
3006
    uint8_t *data,
3007
    size_t size,
3008
    size_t offset,
3009
    char separator,
3010
57.5k
    int is_continuation) {
3011
57.5k
  size_t pos, line_end, cell_start, cell_end, len, copy_start, copy_end;
3012
3013
57.5k
  pos = offset;
3014
3015
113k
  while (pos < size && _isspace(data[pos])) pos++;
3016
3017
57.5k
  cell_start = pos;
3018
3019
57.5k
  line_end = pos;
3020
15.6M
  while (line_end < size && data[line_end] != '\n') line_end++;
3021
57.5k
  len = find_separator_char(data + pos, line_end - pos, separator);
3022
3023
  /* Two possibilities for len == 0:
3024
     1) No more separator char found in the current line.
3025
     2) The next separator is right after the current one, i.e. empty cell.
3026
     For case 1, we skip to the end of line; for case 2 we just continue.
3027
  */
3028
57.5k
  if (len == 0 && pos < size && data[pos] != separator) {
3029
1.68M
    while (pos + len < size && data[pos + len] != '\n') len++;
3030
23.5k
  }
3031
57.5k
  pos += len;
3032
3033
57.5k
  cell_end = pos - 1;
3034
3035
397k
  while (cell_end > cell_start && _isspace(data[cell_end]))
3036
339k
    cell_end--;
3037
3038
  /* If this isn't the first line of the cell, add a new line before the
3039
     extra cell contents, to separate them (and make backslash linebreaks
3040
     work).
3041
  */
3042
57.5k
  if (is_continuation) hoedown_buffer_putc(ob, '\n');
3043
3044
  /* Remove escaping from pipes */
3045
57.5k
  copy_start = copy_end = cell_start;
3046
13.4M
  while (copy_end < cell_end + 1) {
3047
13.3M
    if (data[copy_end] == separator && copy_end > copy_start && data[copy_end - 1] == '\\') {
3048
3.01k
      hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start - 1);
3049
3.01k
      copy_start = copy_end;
3050
3.01k
    }
3051
13.3M
    copy_end++;
3052
13.3M
  }
3053
57.5k
  hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start);
3054
3055
57.5k
  return pos - offset;
3056
57.5k
}
3057
3058
static void
3059
parse_table_row(
3060
  hoedown_buffer *ob,
3061
  hoedown_document *doc,
3062
  uint8_t *data,
3063
  size_t size,
3064
  size_t columns,
3065
  size_t rows,
3066
  hoedown_table_flags *col_data,
3067
  hoedown_table_flags header_flag)
3068
110k
{
3069
110k
  size_t i = 0, col;
3070
110k
  hoedown_buffer *row_work = 0;
3071
3072
110k
  if (!doc->md.table_cell || !doc->md.table_row)
3073
84.9k
    return;
3074
3075
25.8k
  row_work = newbuf(doc, BUFFER_SPAN);
3076
3077
  /* skip optional first pipe */
3078
25.8k
  if (i < size && data[i] == '|')
3079
13.1k
    i++;
3080
3081
59.8k
  for (col = 0; col < columns && i < size; ++col) {
3082
33.9k
    size_t pos, extra_rows_in_cell;
3083
33.9k
    hoedown_buffer *cell_content;
3084
33.9k
    hoedown_buffer *cell_work;
3085
3086
    /* cell_content is the text that is inline parsed into cell_work. It
3087
       consists of the values of this cell from each row, concatenated and
3088
       separated by new lines.
3089
    */
3090
33.9k
    cell_content = newbuf(doc, BUFFER_SPAN);
3091
33.9k
    cell_work = newbuf(doc, BUFFER_SPAN);
3092
3093
33.9k
    i += parse_table_cell_line(cell_content, data, size, i, '|', 0 /* is_contination */);
3094
3095
    /* Add extra rows of the cell. This only occurs if rows is greater than 0,
3096
       which only happens when multiline tables are enabled.
3097
3098
       Each extra row is a colon, followed by cell contents for the continued
3099
       row, separated by colons.
3100
    */
3101
33.9k
    extra_rows_in_cell = rows - 1;
3102
33.9k
    pos = i;
3103
57.5k
    while (extra_rows_in_cell > 0 && pos < size) {
3104
23.6k
      size_t c;
3105
3106
      /* seek to the end of the current row */
3107
2.47M
      while (pos < size && data[pos] != '\n') {
3108
2.45M
        pos++;
3109
2.45M
      }
3110
3111
      /* skip new line and leading colon */
3112
23.6k
      if (pos < size) pos++;
3113
23.6k
      if (pos < size) pos++;
3114
3115
      /* Seek to the beginning of the correct column on the continuation line.
3116
       * The continuation line should have the expected number of columns, and
3117
       * so we never expect pos >= size or data[pos] == '\n'. These checks serve
3118
       * as defense in depth against wrong preconditions. */
3119
42.9k
      for (c = 0; c < col; c++) {
3120
3.71M
        while (pos < size && data[pos] != '\n' && (is_backslashed(data, pos) || data[pos] != ':'))
3121
3.69M
          pos++;
3122
19.3k
        if (pos < size && data[pos] == ':') pos++;  /* skip colon */
3123
19.3k
      }
3124
3125
23.6k
      parse_table_cell_line(cell_content, data, size, pos, ':', 1 /* is_contination */);
3126
3127
23.6k
      extra_rows_in_cell--;
3128
23.6k
    }
3129
3130
33.9k
    parse_inline(cell_work, doc, cell_content->data, cell_content->size);
3131
3132
33.9k
    doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
3133
3134
33.9k
    popbuf(doc, BUFFER_SPAN);
3135
33.9k
    popbuf(doc, BUFFER_SPAN);
3136
33.9k
    i++;
3137
33.9k
  }
3138
3139
29.3k
  for (; col < columns; ++col) {
3140
3.51k
    hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
3141
3.51k
    doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
3142
3.51k
  }
3143
3144
25.8k
  doc->md.table_row(ob, row_work, &doc->data);
3145
3146
25.8k
  popbuf(doc, BUFFER_SPAN);
3147
25.8k
}
3148
3149
static size_t
3150
parse_table_header(
3151
  hoedown_buffer *ob,
3152
  hoedown_buffer *attr,
3153
  hoedown_document *doc,
3154
  uint8_t *data,
3155
  size_t size,
3156
  size_t *columns,
3157
  hoedown_table_flags **column_data)
3158
4.47M
{
3159
4.47M
  int pipes, rows;
3160
4.47M
  size_t i = 0, col, header_end, under_end;
3161
4.47M
  hoedown_buffer *header_contents = 0;
3162
3163
4.47M
  pipes = 0;
3164
321M
  while (i < size && data[i] != '\n') {
3165
316M
    if (!is_backslashed(data, i) && data[i] == '|') {
3166
9.06M
      pipes++;
3167
9.06M
    }
3168
316M
    i++;
3169
316M
  }
3170
3171
4.47M
  if (i == size || pipes == 0)
3172
4.24M
    return 0;
3173
3174
229k
  header_end = i;
3175
3176
730k
  while (header_end > 0 && _isspace(data[header_end - 1]))
3177
500k
    header_end--;
3178
3179
229k
  if (data[0] == '|')
3180
96.9k
    pipes--;
3181
3182
229k
  if (header_end && data[header_end - 1] == '|' && !is_backslashed(data, header_end - 1))
3183
86.0k
    pipes--;
3184
3185
229k
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
3186
229k
    size_t n = parse_attributes(data, header_end, attr, NULL, "", 1, doc->attr_activation);
3187
    /* n == header_end when no attribute is found */
3188
229k
    if (n != header_end) {
3189
19.5k
      while (n > 0 && _isspace(data[n - 1]))
3190
0
        n--;
3191
19.5k
      if (attr->size && n && data[n - 1] == '|' && !is_backslashed(data, n - 1))
3192
2.45k
        pipes--;
3193
3194
19.5k
      header_end = n + 1;
3195
19.5k
    }
3196
229k
  }
3197
3198
229k
  if (pipes < 0)
3199
44.1k
    return 0;
3200
3201
  /* header_contents will have the lines of the header copied into it, and then
3202
     is passed to parse_table_row. We need a separate buffer to avoid passing
3203
     the attribute to parse_table_row.
3204
  */
3205
185k
  header_contents = newbuf(doc, BUFFER_SPAN);
3206
185k
  hoedown_buffer_put(header_contents, data, header_end);
3207
3208
185k
  *columns = pipes + 1;
3209
185k
  *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
3210
3211
  /* If the multiline table extension is enabled, check the next lines for
3212
     continuation markers, to find the number of text rows that make up this
3213
     logical row, and copy the contents of each row to header_contents,
3214
     separated by new lines.
3215
  */
3216
185k
  rows = 1;
3217
185k
  if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3218
192k
    while (i < size) {
3219
192k
      size_t j = i + 1;
3220
192k
      int colons = 0;
3221
3222
      /* Require that the continuation line starts with a colon */
3223
192k
      if (j >= size || data[j] != ':') break;
3224
      /* Skip the leading colon to match the pipe counting behavior above */
3225
28.3k
      j++;
3226
3227
      /* Require that the continuation line start with ": ", to
3228
         distinguish from ":-" which could start a left-aligned header
3229
         bar.
3230
      */
3231
28.3k
      if (j >= size || data[j] != ' ') break;
3232
3233
14.1M
      while (j < size && data[j] != '\n') {
3234
14.1M
        j++;
3235
14.1M
        if (j < size && !is_backslashed(data, j) && data[j] == ':')
3236
447k
          colons++;
3237
14.1M
      }
3238
3239
      /* Allow a trailing colon to match the pipe counting behavior above */
3240
11.6k
      if (!is_backslashed(data, j - 1) && data[j - 1] == ':')
3241
766
        colons--;
3242
3243
11.6k
      if (colons != pipes) break;
3244
3245
7.60k
      hoedown_buffer_putc(header_contents, '\n');
3246
      /* data[i] is the previous new line, and data[j] is the next new
3247
         line. This copies all the text between the new lines.
3248
       */
3249
7.60k
      hoedown_buffer_put(header_contents, data + i + 1, j - i - 1);
3250
3251
7.60k
      rows++;
3252
7.60k
      i = j;
3253
7.60k
      header_end = j;
3254
7.60k
    }
3255
185k
  }
3256
3257
  /* Parse the header underline */
3258
185k
  i++;
3259
185k
  if (i < size && data[i] == '|')
3260
20.8k
    i++;
3261
3262
185k
  under_end = i;
3263
41.4M
  while (under_end < size && data[under_end] != '\n')
3264
41.2M
    under_end++;
3265
3266
235k
  for (col = 0; col < *columns && i < under_end; ++col) {
3267
189k
    size_t dashes = 0;
3268
3269
394k
    while (i < under_end && data[i] == ' ')
3270
204k
      i++;
3271
3272
189k
    if (i < under_end && data[i] == ':') {
3273
35.6k
      i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT;
3274
35.6k
      dashes++;
3275
35.6k
    }
3276
3277
337k
    while (i < under_end && data[i] == '-') {
3278
148k
      i++; dashes++;
3279
148k
    }
3280
3281
189k
    if (i < under_end && data[i] == ':') {
3282
31.3k
      i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT;
3283
31.3k
      dashes++;
3284
31.3k
    }
3285
3286
287k
    while (i < under_end && data[i] == ' ')
3287
98.6k
      i++;
3288
3289
189k
    if (i < under_end && data[i] != '|' && data[i] != '+')
3290
102k
      break;
3291
3292
86.9k
    if (dashes < 3)
3293
36.7k
      break;
3294
3295
50.2k
    i++;
3296
50.2k
  }
3297
3298
185k
  if (col < *columns) {
3299
    /* clean up header_contents */
3300
155k
    popbuf(doc, BUFFER_SPAN);
3301
155k
    return 0;
3302
155k
  }
3303
3304
29.5k
  parse_table_row(
3305
29.5k
    ob, doc, header_contents->data,
3306
29.5k
    header_contents->size,
3307
29.5k
    *columns,
3308
29.5k
    rows,
3309
29.5k
    *column_data,
3310
29.5k
    HOEDOWN_TABLE_HEADER
3311
29.5k
  );
3312
3313
  /* clean up header_contents */
3314
29.5k
  popbuf(doc, BUFFER_SPAN);
3315
3316
29.5k
  return under_end + 1;
3317
185k
}
3318
3319
static size_t
3320
parse_table(
3321
  hoedown_buffer *ob,
3322
  hoedown_document *doc,
3323
  uint8_t *data,
3324
  size_t size)
3325
4.47M
{
3326
4.47M
  size_t i;
3327
3328
4.47M
  hoedown_buffer *work = 0;
3329
4.47M
  hoedown_buffer *header_work = 0;
3330
4.47M
  hoedown_buffer *body_work = 0;
3331
4.47M
  hoedown_buffer *attr_work = 0;
3332
3333
4.47M
  size_t columns;
3334
4.47M
  hoedown_table_flags *col_data = NULL;
3335
3336
4.47M
  work = newbuf(doc, BUFFER_BLOCK);
3337
4.47M
  header_work = newbuf(doc, BUFFER_SPAN);
3338
4.47M
  body_work = newbuf(doc, BUFFER_BLOCK);
3339
4.47M
  attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
3340
4.47M
  i = parse_table_header(header_work, attr_work, doc, data, size, &columns, &col_data);
3341
4.47M
  if (i > 0) {
3342
3343
110k
    while (i < size) {
3344
100k
      size_t row_start;
3345
100k
      size_t pipes = 0;
3346
100k
      size_t rows = 1;
3347
3348
100k
      row_start = i;
3349
3350
61.4M
      while (i < size && data[i] != '\n') {
3351
61.3M
        if (data[i] == '|' && !is_backslashed(data, i)) pipes++;
3352
61.3M
        i++;
3353
61.3M
      }
3354
3355
100k
      if (pipes == 0 || i == size) {
3356
19.1k
        i = row_start;
3357
19.1k
        break;
3358
19.1k
      }
3359
3360
      /* Don't count a leading pipe. */
3361
81.1k
      if (data[row_start] == '|')
3362
30.3k
        pipes--;
3363
3364
      /* Don't count a trailing pipe. */
3365
81.1k
      if (data[i - 1] == '|' && !is_backslashed(data, i - 1))
3366
9.47k
        pipes--;
3367
3368
      /* If the multiline table extension is enabled, check the next
3369
         lines for continuation markers, to find the number of text rows
3370
         that make up this logical row.
3371
      */
3372
81.1k
      if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3373
98.1k
        while (i < size) {
3374
97.9k
          size_t j = i + 1;
3375
97.9k
          size_t colons = 0;
3376
3377
          /* Require that a continued row starts with a colon. */
3378
97.9k
          if (j >= size || data[j] != ':') break;
3379
3380
          /* Don't count leading colon for comparison to pipes. */
3381
22.3k
          j++;
3382
3383
31.2M
          while (j < size && data[j] != '\n') {
3384
31.2M
            if (!is_backslashed(data, j) && data[j] == ':')
3385
1.69M
              colons++;
3386
31.2M
            j++;
3387
31.2M
          }
3388
3389
          /* Don't count a trailing colon for comparison to pipes. */
3390
22.3k
          if (!is_backslashed(data, j - 1) && data[j - 1] == ':')
3391
1.98k
            colons--;
3392
3393
          /* Hoedown allows table rows where the number of cells is different
3394
           * from `columns`. In this case, `parse_table_row` will add empty
3395
           * cells. However, the code does not work in the multi-line case, so
3396
           * we require the right number of columns. */
3397
22.3k
          if (colons != pipes || colons + 1 != columns) break;
3398
3399
16.9k
          rows++;
3400
16.9k
          i = j;
3401
16.9k
        }
3402
81.1k
      }
3403
3404
81.1k
      parse_table_row(
3405
81.1k
        body_work,
3406
81.1k
        doc,
3407
81.1k
        data + row_start,
3408
81.1k
        i - row_start,
3409
81.1k
        columns,
3410
81.1k
        rows,
3411
81.1k
        col_data, 0
3412
81.1k
      );
3413
3414
81.1k
      i++;
3415
3416
      /* Skip an optional row separator, if it's there. */
3417
81.1k
      if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3418
        /* Use j instead of i, and set i to j only if this is actually a row separator. */
3419
81.1k
        size_t j = i, next_line_end = i, col;
3420
3421
        /* Seek next_line_end to the position of the terminating new line. */
3422
60.3M
        while (next_line_end < size && data[next_line_end] != '\n')
3423
60.2M
          next_line_end++;
3424
3425
        /* Skip leading pipe, if any. */
3426
81.1k
        if (j < next_line_end && data[j] == '|')
3427
18.2k
          j++;
3428
3429
        /* Ensure that there are at least columns pipe/plus separated
3430
           runs of dashes, each at least 3 long. The pipes may be
3431
           padded with spaces, and the line may end in a pipe.
3432
        */
3433
84.2k
        for (col = 0; col < columns && j < next_line_end; col++) {
3434
70.4k
          size_t dashes = 0;
3435
3436
140k
          while (j < next_line_end && data[j] == ' ')
3437
70.3k
            j++;
3438
3439
114k
          while (j < next_line_end && data[j] == '-') {
3440
44.0k
            j++;
3441
44.0k
            dashes++;
3442
44.0k
          }
3443
3444
91.0k
          while (j < next_line_end && data[j] == ' ')
3445
20.6k
            j++;
3446
3447
70.4k
          if (j < next_line_end && data[j] != '|' && data[j] != '+')
3448
42.3k
            break;
3449
3450
28.1k
          if (dashes < 3)
3451
25.0k
            break;
3452
3453
3.08k
          j++;
3454
3.08k
        }
3455
3456
        /* Skip i past the row separator, if it was valid. */
3457
81.1k
        if (col == columns)
3458
2.90k
          i = next_line_end + 1;
3459
81.1k
      }
3460
81.1k
    }
3461
3462
29.5k
    if (doc->md.table_header)
3463
11.7k
      doc->md.table_header(work, header_work, &doc->data);
3464
3465
29.5k
    if (doc->md.table_body)
3466
11.7k
      doc->md.table_body(work, body_work, &doc->data);
3467
3468
29.5k
    if (doc->md.table)
3469
11.7k
      doc->md.table(ob, work, attr_work, &doc->data);
3470
29.5k
  }
3471
3472
4.47M
  free(col_data);
3473
4.47M
  popbuf(doc, BUFFER_SPAN);
3474
4.47M
  popbuf(doc, BUFFER_BLOCK);
3475
4.47M
  popbuf(doc, BUFFER_BLOCK);
3476
4.47M
  popbuf(doc, BUFFER_ATTRIBUTE);
3477
4.47M
  return i;
3478
4.47M
}
3479
3480
/* parse_userblock • parsing of user block */
3481
static size_t
3482
parse_userblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
3483
0
{
3484
0
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
3485
0
  size_t len = doc->user_block(data, size, &doc->data);
3486
3487
0
  if (!len) {
3488
0
    return 0;
3489
0
  }
3490
3491
0
  work.data = data;
3492
0
  work.size = len;
3493
3494
0
  if (doc->md.user_block) {
3495
0
    doc->md.user_block(ob, &work, &doc->data);
3496
0
  } else {
3497
0
    hoedown_buffer_put(ob, work.data, work.size);
3498
0
  }
3499
0
  return len;
3500
0
}
3501
3502
/* is_paragraph • returns if the next block is a paragraph (doesn't follow any
3503
 * other special rules for other types of blocks) */
3504
static int
3505
is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end)
3506
4.05M
{
3507
  /* temporary buffer for results of checking special blocks */
3508
4.05M
  hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
3509
  /* temporary renderer that has no rendering function */
3510
4.05M
  hoedown_renderer temp_renderer;
3511
  /* ensure all callbacks are NULL */
3512
4.05M
  memset(&temp_renderer, 0, sizeof(hoedown_renderer));
3513
  /* store the old renderer */
3514
4.05M
  hoedown_renderer old_renderer;
3515
4.05M
  memcpy(&old_renderer, &doc->md, sizeof(hoedown_renderer));
3516
  /* copy the new renderer over to the document */
3517
4.05M
  memcpy(&doc->md, &temp_renderer, sizeof(hoedown_renderer));
3518
  /* these are all the if branches inside parse_block, wrapped into one bool,
3519
   * with minimal parsing, and completely idempotent */
3520
4.05M
  int result = !(is_atxheader(doc, txt_data, end) ||
3521
4.05M
          (doc->user_block && parse_userblock(tmp, doc, txt_data, end)) ||
3522
4.05M
          (txt_data[0] == '<' &&
3523
4.04M
            parse_htmlblock(tmp, doc, txt_data, end, 0)) ||
3524
4.05M
          is_hrule(txt_data, end) ||
3525
4.05M
          ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) &&
3526
4.02M
            parse_fencedcode(tmp, doc, txt_data, end, doc->ext_flags)) ||
3527
4.05M
          ((doc->ext_flags & HOEDOWN_EXT_TABLES) &&
3528
4.02M
            parse_table(tmp, doc, txt_data, end)) ||
3529
4.05M
          prefix_quote(txt_data, end) ||
3530
4.05M
          (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) &&
3531
3.98M
            prefix_code(txt_data, end)) ||
3532
4.05M
          prefix_uli(txt_data, end) ||
3533
4.05M
          prefix_oli(txt_data, end) ||
3534
4.05M
          ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) &&
3535
3.93M
            prefix_dli(doc, txt_data, end)));
3536
4.05M
  popbuf(doc, BUFFER_BLOCK);
3537
4.05M
  memcpy(&doc->md, &old_renderer, sizeof(hoedown_renderer));
3538
4.05M
  return result;
3539
4.05M
}
3540
3541
/* parse_block • parsing of one block, returning next uint8_t to parse */
3542
static void
3543
parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
3544
138k
{
3545
138k
  size_t beg, end, i;
3546
138k
  uint8_t *txt_data;
3547
138k
  beg = 0;
3548
3549
138k
  if (doc->work_bufs[BUFFER_SPAN].size +
3550
138k
    doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
3551
448
    return;
3552
3553
796k
  while (beg < size) {
3554
658k
    txt_data = data + beg;
3555
658k
    end = size - beg;
3556
3557
658k
    if (is_atxheader(doc, txt_data, end))
3558
10.0k
      beg += parse_atxheader(ob, doc, txt_data, end);
3559
3560
648k
    else if (doc->user_block &&
3561
648k
        (i = parse_userblock(ob, doc, txt_data, end)) != 0)
3562
0
      beg += i;
3563
3564
648k
    else if (data[beg] == '<' && doc->md.blockhtml &&
3565
648k
        (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0)
3566
0
      beg += i;
3567
3568
648k
    else if ((i = is_empty(txt_data, end)) != 0)
3569
165k
      beg += i;
3570
3571
482k
    else if (is_hrule(txt_data, end)) {
3572
121k
      while (beg < size && data[beg] != '\n')
3573
104k
        beg++;
3574
3575
17.3k
      if (doc->md.hrule) {
3576
17.3k
        doc->hrule_char = data[beg - 1];
3577
17.3k
        doc->md.hrule(ob, &doc->data);
3578
17.3k
        doc->hrule_char = 0;
3579
17.3k
      }
3580
3581
17.3k
      beg++;
3582
17.3k
    }
3583
3584
465k
    else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
3585
465k
      (i = parse_fencedcode(ob, doc, txt_data, end, doc->ext_flags)) != 0)
3586
9.77k
      beg += i;
3587
3588
455k
    else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
3589
455k
      (i = parse_table(ob, doc, txt_data, end)) != 0)
3590
11.7k
      beg += i;
3591
3592
444k
    else if (prefix_quote(txt_data, end))
3593
34.1k
      beg += parse_blockquote(ob, doc, txt_data, end);
3594
3595
409k
    else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
3596
0
      beg += parse_blockcode(ob, doc, txt_data, end);
3597
3598
409k
    else if (prefix_uli(txt_data, end))
3599
35.1k
      beg += parse_list(ob, doc, txt_data, end, 0);
3600
3601
374k
    else if (prefix_oli(txt_data, end))
3602
30.8k
      beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED);
3603
3604
344k
    else if ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && prefix_dli(doc, txt_data, end))
3605
28.9k
      beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_DEFINITION);
3606
3607
315k
    else
3608
315k
      beg += parse_paragraph(ob, doc, txt_data, end);
3609
658k
  }
3610
138k
}
3611
3612
3613
3614
/*********************
3615
 * REFERENCE PARSING *
3616
 *********************/
3617
3618
/* is_footnote • returns whether a line is a footnote definition or not */
3619
static int
3620
is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
3621
2.02M
{
3622
2.02M
  size_t i = 0;
3623
2.02M
  hoedown_buffer *contents = NULL;
3624
2.02M
  hoedown_buffer *name = NULL;
3625
2.02M
  size_t ind = 0;
3626
2.02M
  int in_empty = 0;
3627
2.02M
  size_t start = 0;
3628
3629
2.02M
  size_t id_offset, id_end;
3630
2.02M
  size_t id_indent = 0, content_line = 0, content_indent = 0;
3631
3632
  /* up to 3 optional leading spaces */
3633
2.02M
  if (beg + 3 >= end) return 0;
3634
2.02M
  if (data[beg] == ' ') { i = 1;
3635
135k
  if (data[beg + 1] == ' ') { i = 2;
3636
17.7k
  if (data[beg + 2] == ' ') { i = 3;
3637
10.0k
  if (data[beg + 3] == ' ') return 0; } } }
3638
2.01M
  i += beg;
3639
3640
  /* id part: caret followed by anything between brackets */
3641
2.01M
  if (data[i] != '[') return 0;
3642
710k
  i++;
3643
710k
  if (i >= end || data[i] != '^') return 0;
3644
515k
  i++;
3645
515k
  id_offset = i;
3646
1.62M
  while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3647
1.11M
    i++;
3648
515k
  if (i >= end || data[i] != ']') return 0;
3649
508k
  id_end = i;
3650
3651
  /* spacer: colon (space | tab)* newline? (space | tab)* */
3652
508k
  i++;
3653
508k
  if (i >= end || data[i] != ':') return 0;
3654
497k
  i++;
3655
497k
  if (i >= end) return 0;
3656
3657
  /* getting content and name buffers */
3658
497k
  contents = hoedown_buffer_new(64);
3659
497k
  name = hoedown_buffer_new(64);
3660
3661
497k
  start = i;
3662
3663
  /* getting item indent size */
3664
1.59M
  while (id_indent != start && data[start - id_indent] != '\n' && data[start - id_indent] != '\r') {
3665
1.09M
    id_indent++;
3666
1.09M
  }
3667
3668
  /* process lines similar to a list item */
3669
1.05M
  while (i < end) {
3670
10.2M
    while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3671
3672
    /* process an empty line */
3673
1.05M
    if (is_empty(data + start, i - start)) {
3674
453k
      in_empty = 1;
3675
453k
      if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3676
453k
        i++;
3677
453k
        if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3678
453k
      }
3679
453k
      start = i;
3680
453k
      continue;
3681
453k
    }
3682
3683
    /* calculating the indentation */
3684
601k
    ind = 0;
3685
779k
    while (ind < 4 && start + ind < end && data[start + ind] == ' ')
3686
177k
      ind++;
3687
3688
601k
    content_line++;
3689
3690
    /* joining only indented stuff after empty lines;
3691
     * note that now we only require 1 space of indentation
3692
     * to continue, just like lists */
3693
601k
    if (ind == 0) {
3694
528k
      if (start == id_end + 2 && data[start] == '\t') {}
3695
496k
      else break;
3696
528k
    }
3697
73.5k
    else if (in_empty) {
3698
3.58k
      hoedown_buffer_putc(contents, '\n');
3699
3.58k
    }
3700
3701
104k
    in_empty = 0;
3702
3703
    /* re-calculating the indentation */
3704
104k
    if (content_line == 2 && data[start + ind] == ' ') {
3705
46.1k
      while (ind < id_indent && data[start + ind] == ' ') {
3706
23.4k
        ind++;
3707
23.4k
      }
3708
22.6k
      content_indent = ind;
3709
22.6k
    }
3710
104k
    if (content_indent > ind) {
3711
21.8k
      while (ind < content_indent && data[start + ind] == ' ') {
3712
7.77k
        ind++;
3713
7.77k
      }
3714
14.0k
    }
3715
3716
    /* adding the line into the content buffer */
3717
104k
    hoedown_buffer_put(contents, data + start + ind, i - start - ind);
3718
    /* add carriage return */
3719
104k
    if (i < end) {
3720
104k
      hoedown_buffer_putc(contents, '\n');
3721
104k
      if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3722
104k
        i++;
3723
104k
        if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3724
104k
      }
3725
104k
    }
3726
104k
    start = i;
3727
104k
  }
3728
3729
497k
  if (last)
3730
497k
    *last = start;
3731
3732
497k
  if (list) {
3733
497k
    struct footnote_ref *ref;
3734
497k
    ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
3735
497k
    if (!ref)
3736
0
      return 0;
3737
497k
    if (!add_footnote_ref(list, ref)) {
3738
0
      free_footnote_ref(ref);
3739
0
      return 0;
3740
0
    }
3741
497k
    ref->contents = contents;
3742
497k
    hoedown_buffer_put(name, data + id_offset, id_end - id_offset);
3743
497k
    ref->name = name;
3744
497k
  }
3745
3746
497k
  return 1;
3747
497k
}
3748
3749
/* is_html_comment • returns whether a html comment or not */
3750
static int
3751
is_html_comment(const uint8_t *data, size_t beg, size_t end, size_t *last)
3752
1.52M
{
3753
1.52M
  size_t i = 0;
3754
3755
1.52M
  if (beg + 5 >= end) return 0;
3756
1.52M
  if (!(data[beg] == '<'  && data[beg + 1] == '!' && data[beg + 2] == '-' && data[beg + 3] == '-')) return 0;
3757
3758
23.2k
  i = 5;
3759
18.7M
  while (beg + i < end && !(data[beg + i - 2] == '-' && data[beg + i - 1] == '-' && data[beg + i] == '>')) i++;
3760
  /* i can only ever be beyond the end if the ending --> is not found */
3761
23.2k
  if (beg + i >= end) return 0;
3762
22.0k
  i++;
3763
3764
22.0k
  if (beg + i < end && (data[beg + i] == '\n' || data[beg + i] == '\r')) {
3765
19.7k
    i++;
3766
19.7k
    if (beg + i < end && data[beg + i] == '\r' && data[beg + i - 1] == '\n') i++;
3767
19.7k
  }
3768
3769
22.0k
  if (last)
3770
22.0k
    *last = beg + i;
3771
3772
22.0k
  return 1;
3773
23.2k
}
3774
3775
/* is_ref • returns whether a line is a reference or not */
3776
static int
3777
is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
3778
1.50M
{
3779
/*  int n; */
3780
1.50M
  size_t i = 0;
3781
1.50M
  size_t id_offset, id_end;
3782
1.50M
  size_t link_offset, link_end;
3783
1.50M
  size_t title_offset, title_end;
3784
1.50M
  size_t line_end;
3785
1.50M
  size_t attr_offset = 0, attr_end = 0;
3786
3787
  /* up to 3 optional leading spaces */
3788
1.50M
  if (beg + 3 >= end) return 0;
3789
1.50M
  if (data[beg] == ' ') { i = 1;
3790
135k
  if (data[beg + 1] == ' ') { i = 2;
3791
17.7k
  if (data[beg + 2] == ' ') { i = 3;
3792
10.0k
  if (data[beg + 3] == ' ') return 0; } } }
3793
1.49M
  i += beg;
3794
3795
  /* id part: anything but a newline between brackets */
3796
1.49M
  if (data[i] != '[') return 0;
3797
213k
  i++;
3798
213k
  id_offset = i;
3799
5.01M
  while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3800
4.79M
    i++;
3801
213k
  if (i >= end || data[i] != ']') return 0;
3802
197k
  id_end = i;
3803
3804
  /* spacer: colon (space | tab)* newline? (space | tab)* */
3805
197k
  i++;
3806
197k
  if (i >= end || data[i] != ':') return 0;
3807
172k
  i++;
3808
174k
  while (i < end && data[i] == ' ') i++;
3809
172k
  if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3810
37.1k
    i++;
3811
37.1k
    if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
3812
173k
  while (i < end && data[i] == ' ') i++;
3813
172k
  if (i >= end) return 0;
3814
3815
  /* link: spacing-free sequence, optionally between angle brackets */
3816
172k
  if (data[i] == '<')
3817
1.72k
    i++;
3818
3819
172k
  link_offset = i;
3820
3821
3.95M
  while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
3822
3.78M
    i++;
3823
3824
172k
  if (data[i - 1] == '>') link_end = i - 1;
3825
171k
  else link_end = i;
3826
3827
  /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
3828
244k
  while (i < end && data[i] == ' ') i++;
3829
172k
  if (i < end && data[i] != '\n' && data[i] != '\r'
3830
172k
      && data[i] != '\'' && data[i] != '"' && data[i] != '(')
3831
9.19k
    return 0;
3832
162k
  line_end = 0;
3833
  /* computing end-of-line */
3834
162k
  if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
3835
162k
  if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3836
1.04k
    line_end = i + 1;
3837
3838
  /* optional (space|tab)* spacer after a newline */
3839
162k
  if (line_end) {
3840
101k
    i = line_end + 1;
3841
109k
    while (i < end && data[i] == ' ') i++; }
3842
3843
  /* optional title: any non-newline sequence enclosed in '"()
3844
          alone on its line */
3845
162k
  title_offset = title_end = 0;
3846
162k
  if (i + 1 < end
3847
162k
  && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
3848
109k
    char d = data[i++];
3849
109k
    title_offset = i;
3850
3851
    /* looking for end of tile */
3852
3.65M
    while (i < end && data[i] != d && data[i] != '\n' && data[i] != '\r') {
3853
3.54M
      ++i;
3854
3.54M
    }
3855
3856
109k
    if (i + 1 < end && data[i] == d) {
3857
59.1k
      title_end = i++;
3858
59.1k
      attr_offset = i;
3859
3860
      /* looking for EOL */
3861
3.57M
      while (i < end && data[i] != '\n' && data[i] != '\r') {
3862
3.51M
        i++;
3863
3.51M
      }
3864
3865
      /* looking for attribute */
3866
59.1k
      if (data[i-1] == '}' &&
3867
59.1k
        memchr(&data[attr_offset], '{', i - attr_offset)) {
3868
156k
        while (attr_offset < i && data[attr_offset] != '{') {
3869
105k
          ++attr_offset;
3870
105k
        }
3871
51.0k
        ++attr_offset;
3872
51.0k
        attr_end = i - 1;
3873
51.0k
      } else {
3874
8.07k
        if (data[i-1] == d) {
3875
2.42k
          title_end = i - 1;
3876
5.65k
        } else {
3877
5.65k
          title_end = i;
3878
5.65k
        }
3879
8.07k
        attr_offset = 0;
3880
8.07k
        attr_end = 0;
3881
8.07k
      }
3882
59.1k
      if (i + 1 < end && data[i] == '\r' && data[i + 1] == '\n') {
3883
519
        ++i;
3884
519
      }
3885
3886
59.1k
      line_end = i;
3887
59.1k
    } else {
3888
      /* looking for EOL */
3889
50.7k
      while (i < end && data[i] != '\n' && data[i] != '\r') {
3890
11
        i++;
3891
11
      }
3892
50.7k
      if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') {
3893
686
        title_end = i + 1;
3894
50.0k
      } else {
3895
50.0k
        title_end = i;
3896
50.0k
      }
3897
      /* stepping back */
3898
50.7k
      i -= 1;
3899
51.4k
      while (i > title_offset && data[i] == ' ') {
3900
766
        i -= 1;
3901
766
      }
3902
50.7k
      if (i > title_offset &&
3903
50.7k
        (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
3904
44.2k
        line_end = title_end;
3905
44.2k
        title_end = i;
3906
44.2k
      }
3907
50.7k
    }
3908
109k
  }
3909
3910
162k
  if (!line_end || link_end == link_offset)
3911
30.2k
    return 0; /* garbage after the link empty link */
3912
3913
  /* a valid ref has been found, filling-in return structures */
3914
132k
  if (last)
3915
132k
    *last = line_end;
3916
3917
132k
  if (refs) {
3918
132k
    struct link_ref *ref;
3919
3920
132k
    ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
3921
132k
    if (!ref)
3922
0
      return 0;
3923
3924
132k
    ref->link = hoedown_buffer_new(link_end - link_offset);
3925
132k
    hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
3926
3927
132k
    if (title_end > title_offset) {
3928
53.6k
      ref->title = hoedown_buffer_new(title_end - title_offset);
3929
53.6k
      hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
3930
53.6k
    }
3931
132k
    if (attr_end > attr_offset) {
3932
51.0k
      ref->attr = hoedown_buffer_new(attr_end - attr_offset);
3933
51.0k
      hoedown_buffer_put(ref->attr, data + attr_offset, attr_end - attr_offset);
3934
51.0k
    }
3935
132k
  }
3936
3937
132k
  return 1;
3938
132k
}
3939
3940
static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
3941
1.24M
{
3942
  /* This code makes two assumptions:
3943
   * - Input is valid UTF-8.  (Any byte with top two bits 10 is skipped,
3944
   *   whether or not it is a valid UTF-8 continuation byte.)
3945
   * - Input contains no combining characters.  (Combining characters
3946
   *   should be skipped but are not.)
3947
   */
3948
1.24M
  size_t  i = 0, tab = 0;
3949
3950
41.0M
  while (i < size) {
3951
40.9M
    size_t org = i;
3952
3953
90.4M
    while (i < size && line[i] != '\t') {
3954
      /* ignore UTF-8 continuation bytes */
3955
49.5M
      if ((line[i] & 0xc0) != 0x80)
3956
46.3M
        tab++;
3957
49.5M
      i++;
3958
49.5M
    }
3959
3960
40.9M
    if (i > org)
3961
1.69M
      hoedown_buffer_put(ob, line + org, i - org);
3962
3963
40.9M
    if (i >= size)
3964
1.09M
      break;
3965
3966
158M
    do {
3967
158M
      hoedown_buffer_putc(ob, ' '); tab++;
3968
158M
    } while (tab % 4);
3969
3970
39.8M
    i++;
3971
39.8M
  }
3972
1.24M
}
3973
3974
/**********************
3975
 * EXPORTED FUNCTIONS *
3976
 **********************/
3977
3978
hoedown_document *
3979
hoedown_document_new(
3980
  const hoedown_renderer *renderer,
3981
  hoedown_extensions extensions,
3982
  size_t max_nesting,
3983
  uint8_t attr_activation,
3984
  hoedown_user_block user_block,
3985
  hoedown_buffer *meta)
3986
10.1k
{
3987
10.1k
  hoedown_document *doc = NULL;
3988
3989
10.1k
  assert(max_nesting > 0 && renderer);
3990
3991
10.1k
  doc = hoedown_malloc(sizeof(hoedown_document));
3992
10.1k
  memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
3993
3994
10.1k
  doc->data.opaque = renderer->opaque;
3995
3996
10.1k
  hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
3997
10.1k
  hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
3998
10.1k
  hoedown_stack_init(&doc->work_bufs[BUFFER_ATTRIBUTE], 8);
3999
4000
10.1k
  memset(doc->active_char, 0x0, 256);
4001
4002
10.1k
  if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
4003
10.1k
    doc->active_char['_'] = MD_CHAR_EMPHASIS;
4004
10.1k
  }
4005
4006
10.1k
  if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
4007
10.1k
    doc->active_char['*'] = MD_CHAR_EMPHASIS;
4008
10.1k
    doc->active_char['_'] = MD_CHAR_EMPHASIS;
4009
10.1k
    if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
4010
10.1k
      doc->active_char['~'] = MD_CHAR_EMPHASIS;
4011
10.1k
    if (extensions & HOEDOWN_EXT_HIGHLIGHT)
4012
10.1k
      doc->active_char['='] = MD_CHAR_EMPHASIS;
4013
10.1k
  }
4014
4015
10.1k
  if (doc->md.codespan)
4016
10.1k
    doc->active_char['`'] = MD_CHAR_CODESPAN;
4017
4018
10.1k
  if (doc->md.linebreak)
4019
10.1k
    doc->active_char['\n'] = MD_CHAR_LINEBREAK;
4020
4021
10.1k
  if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) {
4022
10.1k
    doc->active_char['['] = MD_CHAR_LINK;
4023
10.1k
    doc->active_char['!'] = MD_CHAR_IMAGE;
4024
10.1k
  }
4025
4026
10.1k
  doc->active_char['<'] = MD_CHAR_LANGLE;
4027
10.1k
  doc->active_char['\\'] = MD_CHAR_ESCAPE;
4028
10.1k
  doc->active_char['&'] = MD_CHAR_ENTITY;
4029
4030
10.1k
  if (extensions & HOEDOWN_EXT_AUTOLINK) {
4031
10.1k
    doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
4032
10.1k
    doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
4033
10.1k
    doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
4034
10.1k
  }
4035
4036
10.1k
  if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
4037
10.1k
    doc->active_char['^'] = MD_CHAR_SUPERSCRIPT;
4038
4039
10.1k
  if (extensions & HOEDOWN_EXT_QUOTE)
4040
10.1k
    doc->active_char['"'] = MD_CHAR_QUOTE;
4041
4042
10.1k
  if (extensions & HOEDOWN_EXT_MATH)
4043
10.1k
    doc->active_char['$'] = MD_CHAR_MATH;
4044
4045
  /* Extension data */
4046
10.1k
  doc->ext_flags = extensions;
4047
10.1k
  doc->max_nesting = max_nesting;
4048
10.1k
  doc->attr_activation = attr_activation;
4049
10.1k
  doc->in_link_body = 0;
4050
10.1k
  doc->link_id = NULL;
4051
10.1k
  doc->link_ref_attr = NULL;
4052
10.1k
  doc->link_inline_attr = NULL;
4053
10.1k
  doc->is_escape_char = 0;
4054
10.1k
  doc->header_type = HOEDOWN_HEADER_NONE;
4055
10.1k
  doc->link_type = HOEDOWN_LINK_NONE;
4056
10.1k
  doc->footnote_id = NULL;
4057
10.1k
  doc->list_depth = 0;
4058
10.1k
  doc->blockquote_depth = 0;
4059
10.1k
  doc->ul_item_char = 0;
4060
10.1k
  doc->hrule_char = 0;
4061
10.1k
  doc->fencedcode_char = 0;
4062
10.1k
  doc->ol_numeral = NULL;
4063
10.1k
  doc->user_block = user_block;
4064
10.1k
  doc->meta = meta;
4065
4066
10.1k
  return doc;
4067
10.1k
}
4068
4069
void
4070
hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
4071
10.1k
{
4072
10.1k
  static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
4073
4074
10.1k
  hoedown_buffer *text;
4075
10.1k
  size_t beg, end;
4076
4077
10.1k
  int footnotes_enabled;
4078
4079
10.1k
  text = hoedown_buffer_new(64);
4080
4081
  /* Preallocate enough space for our buffer to avoid expanding while copying */
4082
10.1k
  hoedown_buffer_grow(text, size);
4083
4084
  /* reset the references table */
4085
10.1k
  memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
4086
4087
10.1k
  footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
4088
4089
  /* reset the footnotes lists */
4090
10.1k
  if (footnotes_enabled) {
4091
10.1k
    memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found));
4092
10.1k
    memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used));
4093
10.1k
  }
4094
4095
  /* first pass: looking for references, copying everything else */
4096
10.1k
  beg = 0;
4097
4098
  /* Skip a possible UTF-8 BOM, even though the Unicode standard
4099
   * discourages having these in UTF-8 documents */
4100
10.1k
  if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
4101
31
    beg += 3;
4102
4103
2.03M
  while (beg < size) /* iterating over lines */
4104
2.02M
    if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found)) {
4105
497k
      if (doc->md.footnote_ref_def) {
4106
0
        hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL };
4107
0
        original.data = (uint8_t*) (data + beg);
4108
0
        original.size = end - beg;
4109
0
        doc->md.footnote_ref_def(&original, &doc->data);
4110
0
      }
4111
497k
      beg = end;
4112
1.52M
    } else if (is_html_comment(data, beg, size, &end)) {
4113
22.0k
      size_t  i = 0;
4114
6.29M
      while (i < (end - beg) && beg + i < size) {
4115
6.27M
        if (data[beg + i] == '\t' && (data[beg + i] & 0xc0) != 0x80) {
4116
1.52M
          hoedown_buffer_put(text, (uint8_t*)"    ", 4);
4117
4.74M
        } else {
4118
4.74M
          hoedown_buffer_putc(text, data[beg + i]);
4119
4.74M
        }
4120
6.27M
        i++;
4121
6.27M
      }
4122
22.0k
      beg = end;
4123
1.50M
    } else if (is_ref(data, beg, size, &end, doc->refs)) {
4124
132k
      if (doc->md.ref) {
4125
0
        hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL };
4126
0
        original.data = (uint8_t*) (data + beg);
4127
0
        original.size = end - beg;
4128
0
        doc->md.ref(&original, &doc->data);
4129
0
      }
4130
132k
      beg = end;
4131
1.37M
    } else { /* skipping to the next line */
4132
1.37M
      end = beg;
4133
90.7M
      while (end < size && data[end] != '\n' && data[end] != '\r')
4134
89.3M
        end++;
4135
4136
      /* adding the line body if present */
4137
1.37M
      if (end > beg)
4138
1.24M
        expand_tabs(text, data + beg, end - beg);
4139
4140
2.94M
      while (end < size && (data[end] == '\n' || data[end] == '\r')) {
4141
        /* add one \n per newline */
4142
1.57M
        if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
4143
1.56M
          hoedown_buffer_putc(text, '\n');
4144
1.57M
        end++;
4145
1.57M
      }
4146
4147
1.37M
      beg = end;
4148
1.37M
    }
4149
4150
  /* pre-grow the output buffer to minimize allocations */
4151
10.1k
  hoedown_buffer_grow(ob, text->size + (text->size >> 1));
4152
4153
  /* second pass: actual rendering */
4154
10.1k
  if (doc->md.doc_header)
4155
0
    doc->md.doc_header(ob, 0, &doc->data);
4156
4157
10.1k
  if (text->size) {
4158
    /* adding a final newline if not already present */
4159
10.0k
    if (text->data[text->size - 1] != '\n')
4160
9.06k
      hoedown_buffer_putc(text, '\n');
4161
4162
10.0k
    parse_block(ob, doc, text->data, text->size);
4163
10.0k
  }
4164
4165
  /* footnotes */
4166
10.1k
  if (footnotes_enabled)
4167
10.1k
    parse_footnote_list(ob, doc, &doc->footnotes_used);
4168
4169
10.1k
  if (doc->md.doc_footer)
4170
0
    doc->md.doc_footer(ob, 0, &doc->data);
4171
4172
  /* clean-up */
4173
10.1k
  hoedown_buffer_free(text);
4174
10.1k
  free_link_refs(doc->refs);
4175
10.1k
  if (footnotes_enabled) {
4176
10.1k
    free_footnote_list(&doc->footnotes_found, 1);
4177
10.1k
    free_footnote_list(&doc->footnotes_used, 0);
4178
10.1k
  }
4179
4180
10.1k
  assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4181
10.1k
  assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4182
10.1k
  assert(doc->work_bufs[BUFFER_ATTRIBUTE].size == 0);
4183
10.1k
}
4184
4185
void
4186
hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
4187
0
{
4188
0
  size_t i = 0, mark;
4189
0
  hoedown_buffer *text = hoedown_buffer_new(64);
4190
4191
  /* reset the references table */
4192
0
  memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
4193
4194
  /* first pass: expand tabs and process newlines */
4195
0
  hoedown_buffer_grow(text, size);
4196
0
  while (1) {
4197
0
    mark = i;
4198
0
    while (i < size && data[i] != '\n' && data[i] != '\r')
4199
0
      i++;
4200
4201
0
    expand_tabs(text, data + mark, i - mark);
4202
4203
0
    if (i >= size)
4204
0
      break;
4205
4206
0
    while (i < size && (data[i] == '\n' || data[i] == '\r')) {
4207
      /* add one \n per newline */
4208
0
      if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
4209
0
        hoedown_buffer_putc(text, '\n');
4210
0
      i++;
4211
0
    }
4212
0
  }
4213
4214
  /* second pass: actual rendering */
4215
0
  hoedown_buffer_grow(ob, text->size + (text->size >> 1));
4216
4217
0
  if (doc->md.doc_header)
4218
0
    doc->md.doc_header(ob, 1, &doc->data);
4219
4220
0
  parse_inline(ob, doc, text->data, text->size);
4221
4222
0
  if (doc->md.doc_footer)
4223
0
    doc->md.doc_footer(ob, 1, &doc->data);
4224
4225
  /* clean-up */
4226
0
  hoedown_buffer_free(text);
4227
4228
0
  assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4229
0
  assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4230
0
}
4231
4232
void
4233
hoedown_document_free(hoedown_document *doc)
4234
10.1k
{
4235
10.1k
  size_t i;
4236
4237
97.2k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i)
4238
87.0k
    hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]);
4239
4240
56.0k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
4241
45.8k
    hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
4242
4243
92.4k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_ATTRIBUTE].asize; ++i)
4244
82.2k
    hoedown_buffer_free(doc->work_bufs[BUFFER_ATTRIBUTE].item[i]);
4245
4246
10.1k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
4247
10.1k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
4248
10.1k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_ATTRIBUTE]);
4249
4250
10.1k
  free(doc);
4251
10.1k
}
4252
4253
const hoedown_buffer*
4254
hoedown_document_link_id(hoedown_document* document)
4255
0
{
4256
0
  return document->link_id;
4257
0
}
4258
4259
const hoedown_buffer*
4260
hoedown_document_link_ref_attr(hoedown_document* document)
4261
0
{
4262
0
  return document->link_ref_attr;
4263
0
}
4264
4265
const hoedown_buffer*
4266
hoedown_document_link_inline_attr(hoedown_document* document)
4267
0
{
4268
0
  return document->link_inline_attr;
4269
0
}
4270
4271
int
4272
hoedown_document_is_escaped(hoedown_document* document)
4273
0
{
4274
0
  return document->is_escape_char;
4275
0
}
4276
4277
hoedown_header_type
4278
hoedown_document_header_type(hoedown_document* document)
4279
0
{
4280
0
  return document->header_type;
4281
0
}
4282
4283
hoedown_link_type
4284
hoedown_document_link_type(hoedown_document* document)
4285
0
{
4286
0
  return document->link_type;
4287
0
}
4288
4289
const hoedown_buffer*
4290
hoedown_document_footnote_id(hoedown_document* document)
4291
0
{
4292
0
  return document->footnote_id;
4293
0
}
4294
4295
int
4296
hoedown_document_list_depth(hoedown_document* document)
4297
0
{
4298
0
  return document->list_depth;
4299
0
}
4300
4301
int
4302
hoedown_document_blockquote_depth(hoedown_document* document)
4303
0
{
4304
0
  return document->blockquote_depth;
4305
0
}
4306
4307
uint8_t
4308
hoedown_document_ul_item_char(hoedown_document* document)
4309
0
{
4310
0
  return document->ul_item_char;
4311
0
}
4312
4313
uint8_t
4314
hoedown_document_hrule_char(hoedown_document* document)
4315
0
{
4316
0
  return document->hrule_char;
4317
0
}
4318
4319
uint8_t
4320
hoedown_document_fencedcode_char(hoedown_document* document)
4321
0
{
4322
0
  return document->fencedcode_char;
4323
0
}
4324
4325
const hoedown_buffer*
4326
hoedown_document_ol_numeral(hoedown_document* document)
4327
0
{
4328
0
    return document->ol_numeral;
4329
0
}