Coverage Report

Created: 2025-02-03 06:23

/src/hoextdown/src/document.c
Line
Count
Source (jump to first uncovered line)
1
#include "document.h"
2
3
#include <assert.h>
4
#include <string.h>
5
#include <ctype.h>
6
#include <stdio.h>
7
8
#include "stack.h"
9
10
#ifndef _MSC_VER
11
#include <strings.h>
12
#else
13
#define strncasecmp _strnicmp
14
#endif
15
16
780k
#define REF_TABLE_SIZE 8
17
18
34.9M
#define BUFFER_BLOCK 0
19
22.1M
#define BUFFER_SPAN 1
20
24.3M
#define BUFFER_ATTRIBUTE 2
21
22
const char *hoedown_find_block_tag(const char *str, unsigned int len);
23
const char *hoedown_find_html5_block_tag(const char *str, unsigned int len);
24
25
/***************
26
 * LOCAL TYPES *
27
 ***************/
28
29
/* link_ref: reference to a link */
30
struct link_ref {
31
  unsigned int id;
32
33
  hoedown_buffer *link;
34
  hoedown_buffer *title;
35
  hoedown_buffer *attr;
36
37
  struct link_ref *next;
38
};
39
40
/* footnote_ref: reference to a footnote */
41
struct footnote_ref {
42
  unsigned int id;
43
44
  int is_used;
45
  unsigned int num;
46
47
  hoedown_buffer *contents;
48
49
  /* the original string id of the footnote, before conversion to an int */
50
  hoedown_buffer *name;
51
};
52
53
/* footnote_item: an item in a footnote_list */
54
struct footnote_item {
55
  struct footnote_ref *ref;
56
  struct footnote_item *next;
57
};
58
59
/* footnote_list: linked list of footnote_item */
60
struct footnote_list {
61
  unsigned int count;
62
  struct footnote_item *head;
63
  struct footnote_item *tail;
64
};
65
66
/* char_trigger: function pointer to render active chars */
67
/*   returns the number of chars taken care of */
68
/*   data is the pointer of the beginning of the span */
69
/*   offset is the number of valid chars before data */
70
typedef size_t
71
(*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
72
73
static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
74
static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
75
static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
76
static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
77
static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
78
static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
79
static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
80
static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
81
static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
82
static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
83
static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
84
static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
85
static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
86
static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size);
87
88
enum markdown_char_t {
89
  MD_CHAR_NONE = 0,
90
  MD_CHAR_EMPHASIS,
91
  MD_CHAR_CODESPAN,
92
  MD_CHAR_LINEBREAK,
93
  MD_CHAR_LINK,
94
  MD_CHAR_IMAGE,
95
  MD_CHAR_LANGLE,
96
  MD_CHAR_ESCAPE,
97
  MD_CHAR_ENTITY,
98
  MD_CHAR_AUTOLINK_URL,
99
  MD_CHAR_AUTOLINK_EMAIL,
100
  MD_CHAR_AUTOLINK_WWW,
101
  MD_CHAR_SUPERSCRIPT,
102
  MD_CHAR_QUOTE,
103
  MD_CHAR_MATH
104
};
105
106
static char_trigger markdown_char_ptrs[] = {
107
  NULL,
108
  &char_emphasis,
109
  &char_codespan,
110
  &char_linebreak,
111
  &char_link,
112
  &char_image,
113
  &char_langle_tag,
114
  &char_escape,
115
  &char_entity,
116
  &char_autolink_url,
117
  &char_autolink_email,
118
  &char_autolink_www,
119
  &char_superscript,
120
  &char_quote,
121
  &char_math
122
};
123
124
struct hoedown_document {
125
  hoedown_renderer md;
126
  hoedown_renderer_data data;
127
128
  uint8_t attr_activation;
129
130
  struct link_ref *refs[REF_TABLE_SIZE];
131
  struct footnote_list footnotes_found;
132
  struct footnote_list footnotes_used;
133
  uint8_t active_char[256];
134
  hoedown_stack work_bufs[3];
135
  hoedown_extensions ext_flags;
136
  size_t max_nesting;
137
  int in_link_body;
138
139
  /* extra information provided to callbacks */
140
  const hoedown_buffer *link_id;
141
  const hoedown_buffer *link_inline_attr;
142
  const hoedown_buffer *link_ref_attr;
143
  int is_escape_char;
144
  hoedown_header_type header_type;
145
  hoedown_link_type link_type;
146
  const hoedown_buffer *footnote_id;
147
  int list_depth;
148
  int blockquote_depth;
149
  uint8_t ul_item_char;
150
  uint8_t hrule_char;
151
  uint8_t fencedcode_char;
152
  const hoedown_buffer *ol_numeral;
153
154
  hoedown_user_block user_block;
155
  hoedown_buffer *meta;
156
};
157
158
/***************************
159
 * HELPER FUNCTIONS *
160
 ***************************/
161
162
static hoedown_buffer *
163
newbuf(hoedown_document *doc, int type)
164
39.0M
{
165
39.0M
  static const size_t buf_size[3] = {256, 64, 64};
166
39.0M
  hoedown_buffer *work = NULL;
167
39.0M
  hoedown_stack *pool = &doc->work_bufs[type];
168
169
39.0M
  if (pool->size < pool->asize &&
170
39.0M
    pool->item[pool->size] != NULL) {
171
38.9M
    work = pool->item[pool->size++];
172
38.9M
    work->size = 0;
173
38.9M
  } else {
174
72.2k
    work = hoedown_buffer_new(buf_size[type]);
175
72.2k
    hoedown_stack_push(pool, work);
176
72.2k
  }
177
178
39.0M
  return work;
179
39.0M
}
180
181
static void
182
popbuf(hoedown_document *doc, int type)
183
37.8M
{
184
37.8M
  doc->work_bufs[type].size--;
185
37.8M
}
186
187
static void
188
unscape_text(hoedown_buffer *ob, hoedown_buffer *src)
189
276k
{
190
276k
  size_t i = 0, org;
191
291k
  while (i < src->size) {
192
290k
    org = i;
193
13.8M
    while (i < src->size && src->data[i] != '\\')
194
13.5M
      i++;
195
196
290k
    if (i > org)
197
238k
      hoedown_buffer_put(ob, src->data + org, i - org);
198
199
290k
    if (i + 1 >= src->size)
200
275k
      break;
201
202
15.0k
    hoedown_buffer_putc(ob, src->data[i + 1]);
203
15.0k
    i += 2;
204
15.0k
  }
205
276k
}
206
207
static unsigned int
208
hash_link_ref(const uint8_t *link_ref, size_t length)
209
797k
{
210
797k
  size_t i;
211
797k
  unsigned int hash = 0;
212
213
128M
  for (i = 0; i < length; ++i)
214
127M
    hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash;
215
216
797k
  return hash;
217
797k
}
218
219
static struct link_ref *
220
add_link_ref(
221
  struct link_ref **references,
222
  const uint8_t *name, size_t name_size)
223
101k
{
224
101k
  struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref));
225
226
101k
  ref->id = hash_link_ref(name, name_size);
227
101k
  ref->next = references[ref->id % REF_TABLE_SIZE];
228
229
101k
  references[ref->id % REF_TABLE_SIZE] = ref;
230
101k
  return ref;
231
101k
}
232
233
static struct link_ref *
234
find_link_ref(struct link_ref **references, uint8_t *name, size_t length)
235
488k
{
236
488k
  unsigned int hash = hash_link_ref(name, length);
237
488k
  struct link_ref *ref = NULL;
238
239
488k
  ref = references[hash % REF_TABLE_SIZE];
240
241
22.7M
  while (ref != NULL) {
242
22.4M
    if (ref->id == hash)
243
264k
      return ref;
244
245
22.2M
    ref = ref->next;
246
22.2M
  }
247
248
224k
  return NULL;
249
488k
}
250
251
static void
252
free_link_refs(struct link_ref **references)
253
8.92k
{
254
8.92k
  size_t i;
255
256
80.3k
  for (i = 0; i < REF_TABLE_SIZE; ++i) {
257
71.4k
    struct link_ref *r = references[i];
258
71.4k
    struct link_ref *next;
259
260
172k
    while (r) {
261
101k
      next = r->next;
262
101k
      hoedown_buffer_free(r->link);
263
101k
      hoedown_buffer_free(r->title);
264
101k
      hoedown_buffer_free(r->attr);
265
101k
      free(r);
266
101k
      r = next;
267
101k
    }
268
71.4k
  }
269
8.92k
}
270
271
static struct footnote_ref *
272
create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size)
273
192k
{
274
192k
  struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref));
275
276
192k
  ref->id = hash_link_ref(name, name_size);
277
278
192k
  return ref;
279
192k
}
280
281
static int
282
add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref)
283
193k
{
284
193k
  struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item));
285
193k
  if (!item)
286
0
    return 0;
287
193k
  item->ref = ref;
288
289
193k
  if (list->head == NULL) {
290
1.81k
    list->head = list->tail = item;
291
191k
  } else {
292
191k
    list->tail->next = item;
293
191k
    list->tail = item;
294
191k
  }
295
193k
  list->count++;
296
297
193k
  return 1;
298
193k
}
299
300
static struct footnote_ref *
301
find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length)
302
14.6k
{
303
14.6k
  unsigned int hash = hash_link_ref(name, length);
304
14.6k
  struct footnote_item *item = NULL;
305
306
14.6k
  item = list->head;
307
308
6.38M
  while (item != NULL) {
309
6.37M
    if (item->ref->id == hash)
310
6.25k
      return item->ref;
311
6.36M
    item = item->next;
312
6.36M
  }
313
314
8.35k
  return NULL;
315
14.6k
}
316
317
static void
318
free_footnote_ref(struct footnote_ref *ref)
319
192k
{
320
192k
  hoedown_buffer_free(ref->contents);
321
192k
  hoedown_buffer_free(ref->name);
322
192k
  free(ref);
323
192k
}
324
325
static void
326
free_footnote_list(struct footnote_list *list, int free_refs)
327
17.8k
{
328
17.8k
  struct footnote_item *item = list->head;
329
17.8k
  struct footnote_item *next;
330
331
211k
  while (item) {
332
193k
    next = item->next;
333
193k
    if (free_refs)
334
192k
      free_footnote_ref(item->ref);
335
193k
    free(item);
336
193k
    item = next;
337
193k
  }
338
17.8k
}
339
340
341
/*
342
 * Check whether a char is a Markdown spacing char.
343
344
 * Right now we only consider spaces the actual
345
 * space and a newline: tabs and carriage returns
346
 * are filtered out during the preprocessing phase.
347
 *
348
 * If we wanted to actually be UTF-8 compliant, we
349
 * should instead extract an Unicode codepoint from
350
 * this character and check for space properties.
351
 */
352
static int
353
_isspace(int c)
354
45.6M
{
355
45.6M
  return c == ' ' || c == '\n';
356
45.6M
}
357
358
/* is_empty_all: verify that all the data is spacing */
359
static int
360
is_empty_all(const uint8_t *data, size_t size)
361
87.3k
{
362
87.3k
  size_t i = 0;
363
983k
  while (i < size && _isspace(data[i])) i++;
364
87.3k
  return i == size;
365
87.3k
}
366
367
/*
368
 * Replace all spacing characters in data with spaces. As a special
369
 * case, this collapses a newline with the previous space, if possible.
370
 */
371
static void
372
replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size)
373
461k
{
374
461k
  size_t i = 0, mark;
375
461k
  hoedown_buffer_grow(ob, size);
376
1.12M
  while (1) {
377
1.12M
    mark = i;
378
119M
    while (i < size && data[i] != '\n') i++;
379
1.12M
    hoedown_buffer_put(ob, data + mark, i - mark);
380
381
1.12M
    if (i >= size) break;
382
383
664k
    if (!(i > 0 && data[i-1] == ' '))
384
301k
      hoedown_buffer_putc(ob, ' ');
385
664k
    i++;
386
664k
  }
387
461k
}
388
389
/****************************
390
 * INLINE PARSING FUNCTIONS *
391
 ****************************/
392
393
/* is_mail_autolink • looks for the address part of a mail autolink and '>' */
394
/* this is less strict than the original markdown e-mail address matching */
395
static size_t
396
is_mail_autolink(uint8_t *data, size_t size)
397
5.32k
{
398
5.32k
  size_t i = 0, nb = 0;
399
400
  /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */
401
66.4k
  for (i = 0; i < size; ++i) {
402
66.0k
    if (isalnum(data[i]))
403
43.5k
      continue;
404
405
22.5k
    switch (data[i]) {
406
12.7k
      case '@':
407
12.7k
        nb++;
408
409
13.9k
      case '-':
410
17.0k
      case '.':
411
17.5k
      case '_':
412
17.5k
        break;
413
414
3.59k
      case '>':
415
3.59k
        return (nb == 1) ? i + 1 : 0;
416
417
1.35k
      default:
418
1.35k
        return 0;
419
22.5k
    }
420
22.5k
  }
421
422
373
  return 0;
423
5.32k
}
424
425
static size_t
426
script_tag_length(uint8_t *data, size_t size)
427
398k
{
428
398k
  size_t i = 2;
429
398k
  char comment = 0;
430
431
398k
  if (size < 3 || data[0] != '<' || data[1] != '?') {
432
386k
    return 0;
433
386k
  }
434
435
11.8k
  i = 2;
436
437
206M
  while (i < size) {
438
206M
    if (data[i - 1] == '?' && data[i] == '>' && comment == 0) {
439
3.73k
      break;
440
3.73k
    }
441
442
206M
    if (data[i] == '\'' || data[i] == '"') {
443
4.98M
      if (comment != 0) {
444
3.27M
        if (data[i] == comment && data[i - 1] != '\\') {
445
1.70M
          comment = 0;
446
1.70M
        }
447
3.27M
      } else {
448
1.71M
        comment = data[i];
449
1.71M
      }
450
4.98M
    }
451
452
206M
    ++i;
453
206M
  }
454
455
11.8k
  if (i >= size) return i;
456
457
3.73k
  return i + 1;
458
11.8k
}
459
460
/* tag_length • returns the length of the given tag, or 0 is it's not valid */
461
static size_t
462
tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink, int script_tag)
463
199k
{
464
199k
  size_t i, j;
465
466
  /* a valid tag can't be shorter than 3 chars */
467
199k
  if (size < 3) return 0;
468
469
188k
  if (data[0] != '<') return 0;
470
471
  /* HTML comment, laxist form */
472
188k
  if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
473
41.7k
    i = 5;
474
475
13.2M
    while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
476
13.2M
      i++;
477
478
41.7k
    i++;
479
480
41.7k
    if (i <= size)
481
20.4k
      return i;
482
41.7k
  }
483
484
  /* begins with a '<' optionally followed by '/', followed by letter or number */
485
168k
  i = (data[1] == '/') ? 2 : 1;
486
487
168k
  if (!isalnum(data[i])) {
488
80.0k
    if (script_tag) {
489
80.0k
      return script_tag_length(data, size);
490
80.0k
    }
491
0
    return 0;
492
80.0k
  }
493
494
  /* scheme test */
495
88.4k
  *autolink = HOEDOWN_AUTOLINK_NONE;
496
497
  /* try to find the beginning of an URI */
498
409k
  while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-'))
499
321k
    i++;
500
501
88.4k
  if (i > 1 && i < size && data[i] == '@') {
502
5.32k
    if ((j = is_mail_autolink(data + i, size - i)) != 0) {
503
2.97k
      *autolink = HOEDOWN_AUTOLINK_EMAIL;
504
2.97k
      return i + j;
505
2.97k
    }
506
5.32k
  }
507
508
85.4k
  if (i > 2 && i < size && data[i] == ':') {
509
16.3k
    *autolink = HOEDOWN_AUTOLINK_NORMAL;
510
16.3k
    i++;
511
16.3k
  }
512
513
  /* completing autolink test: no spacing or ' or " */
514
85.4k
  if (i >= size)
515
3.86k
    *autolink = HOEDOWN_AUTOLINK_NONE;
516
517
81.5k
  else if (*autolink) {
518
16.3k
    j = i;
519
520
4.46M
    while (i < size) {
521
4.45M
      if (data[i] == '\\') i += 2;
522
4.44M
      else if (data[i] == '>' || data[i] == '\'' ||
523
4.44M
          data[i] == '"' || data[i] == ' ' || data[i] == '\n')
524
12.0k
          break;
525
4.43M
      else i++;
526
4.45M
    }
527
528
16.3k
    if (i >= size) return 0;
529
12.0k
    if (i > j && data[i] == '>') return i + 1;
530
    /* one of the forbidden chars has been found */
531
7.43k
    *autolink = HOEDOWN_AUTOLINK_NONE;
532
7.43k
  }
533
534
  /* looking for something looking like a tag end */
535
48.6M
  while (i < size && data[i] != '>') i++;
536
76.5k
  if (i >= size) return 0;
537
27.7k
  return i + 1;
538
76.5k
}
539
540
/* parse_inline • parses inline markdown elements */
541
static void
542
parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
543
1.07M
{
544
1.07M
  size_t i = 0, end = 0, consumed = 0;
545
1.07M
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
546
1.07M
  uint8_t *active_char = doc->active_char;
547
548
1.07M
  if (doc->work_bufs[BUFFER_SPAN].size +
549
1.07M
    doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
550
5.08k
    return;
551
552
6.36M
  while (i < size) {
553
6.04M
    size_t user_block = 0;
554
128M
    while (end < size) {
555
127M
      if (doc->user_block) {
556
0
        user_block = doc->user_block(data+end, size - end, &doc->data);
557
0
        if (user_block) {
558
0
          break;
559
0
        }
560
0
      }
561
      /* copying inactive chars into the output */
562
127M
      if (active_char[data[end]] != 0) {
563
5.29M
        break;
564
5.29M
      }
565
122M
      end++;
566
122M
    }
567
568
6.04M
    if (doc->md.normal_text) {
569
6.04M
      work.data = data + i;
570
6.04M
      work.size = end - i;
571
6.04M
      doc->md.normal_text(ob, &work, &doc->data);
572
6.04M
    }
573
0
    else
574
0
      hoedown_buffer_put(ob, data + i, end - i);
575
576
6.04M
    if (end >= size) {
577
748k
      break;
578
748k
    }
579
5.29M
    i = end;
580
581
5.29M
    if (user_block) {
582
0
      work.data = data + i;
583
0
      work.size = user_block;
584
0
      end = user_block;
585
0
      if (doc->md.user_block) {
586
0
        doc->md.user_block(ob, &work, &doc->data);
587
0
      } else {
588
0
        hoedown_buffer_put(ob, data + i, size - i);
589
0
      }
590
0
      if (!end) {
591
0
        end = i + 1;
592
0
      } else {
593
0
        i += end;
594
0
        end = i;
595
0
        consumed = i;
596
0
      }
597
5.29M
    } else {
598
5.29M
      end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i);
599
5.29M
      if (!end) /* no action from the callback */
600
4.43M
        end = i + 1;
601
869k
      else {
602
869k
        i += end;
603
869k
        end = i;
604
869k
        consumed = i;
605
869k
      }
606
5.29M
    }
607
5.29M
  }
608
1.06M
}
609
610
/* parse_inline_attributes • parses inline attributes, returning the end position of the
611
 * attributes. attributes must be in the start. differs from parse_attributes in
612
 * that parses_attributes assumes attributes are at the end of data.*/
613
static size_t parse_inline_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, uint8_t attr_activation)
614
317k
{
615
317k
  size_t attr_start, i = 0;
616
617
317k
  if (size < 1)
618
54.9k
    return 0;
619
620
262k
  if (data[i] == '{' && (!attr_activation || (i + 1 < size && data[i + 1] == attr_activation))) {
621
24.2k
    attr_start = i + 1;
622
    /* skip an extra character to skip over the activation character if any */
623
24.2k
    if (attr_activation) attr_start++;
624
238k
  } else {
625
238k
    return 0;
626
238k
  }
627
628
19.8M
  while (i < size) {
629
    /* ignore escaped characters */
630
19.8M
    if (data[i] == '\\') {
631
2.27k
      i += 2;
632
19.8M
    } else if (data[i] == '}') {
633
19.2k
      if (attr != NULL) {
634
7.05k
        hoedown_buffer_put(attr, data + attr_start, i - attr_start);
635
7.05k
      }
636
19.2k
      return i + 1;
637
19.7M
    } else {
638
19.7M
      i++;
639
19.7M
    }
640
19.8M
  }
641
4.93k
  return 0;
642
24.2k
}
643
644
645
/* parse_attributes • parses special attributes at the end of the data */
646
static size_t parse_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, struct hoedown_buffer *block_attr, const char *block_id, int is_header, uint8_t attr_activation)
647
1.02M
{
648
1.02M
  size_t i, len, begin = 0, end = 0;
649
650
1.02M
  if (size < 1)
651
31.6k
    return 0;
652
653
990k
  i = size;
654
1.31M
  while (i && data[i-1] == '\n') {
655
320k
    i--;
656
320k
  }
657
990k
  len = i;
658
659
990k
  if (i && data[i-1] == '}') {
660
27.9M
    do {
661
27.9M
      i--;
662
27.9M
    } while (i && data[i] != '{');
663
664
120k
    begin = i + 1;
665
120k
    end = len - 1;
666
157k
    while (i && data[i-1] == ' ') {
667
36.7k
      i--;
668
36.7k
    }
669
120k
  }
670
671
990k
  if (is_header && i && data[i-1] == '#') {
672
22.5k
    while (i && data[i-1] == '#') {
673
11.5k
      i--;
674
11.5k
    }
675
60.5k
    while (i && data[i-1] == ' ') {
676
49.5k
      i--;
677
49.5k
    }
678
11.0k
  }
679
680
990k
  if (begin && end && data[begin-1] == '{' && data[end] == '}') {
681
102k
    if (begin >=2 && data[begin-2] == '\\' && data[end-1] == '\\') {
682
1.28k
      return len;
683
1.28k
    }
684
685
101k
    if (block_attr && data[begin] == '@') {
686
      /* skip the @ by incrementing past it */
687
8.64k
      begin++;
688
8.64k
      if (*block_id) {
689
        /* if a block_id was fed in, check to make sure the string until the
690
         * space is identical */
691
32.9k
        while (begin < end && *block_id) {
692
26.5k
          if (data[begin] != (uint8_t)(*block_id)) {
693
2.21k
            return len;
694
2.21k
          }
695
24.2k
          begin++;
696
24.2k
          block_id++;
697
24.2k
        }
698
        /* it might have matched only the first portion of block_id; make sure
699
         * there's no more to it here */
700
6.43k
        if (*block_id) {
701
862
          return len;
702
862
        }
703
6.43k
      }
704
5.56k
      if (begin < end && data[begin] != ' ') {
705
796
        return len;
706
796
      }
707
4.77k
      if (block_attr) {
708
4.77k
        if (block_attr->size) {
709
2.54k
          hoedown_buffer_reset(block_attr);
710
2.54k
        }
711
4.77k
        hoedown_buffer_put(block_attr, data + begin, end - begin);
712
4.77k
      }
713
4.77k
      len = i;
714
4.77k
      if (attr) {
715
4.77k
        len = parse_attributes(data, len, attr, NULL, "", is_header, attr_activation);
716
4.77k
      }
717
92.8k
    } else if (attr && (!attr_activation || attr_activation == data[begin])) {
718
80.0k
      if (attr->size) {
719
0
        hoedown_buffer_reset(attr);
720
0
      }
721
80.0k
      if (attr_activation) {
722
0
        begin++;
723
0
      }
724
80.0k
      hoedown_buffer_put(attr, data + begin, end - begin);
725
80.0k
      len = i;
726
80.0k
    }
727
101k
  }
728
729
985k
  return len;
730
990k
}
731
732
/* is_escaped • returns whether special char at data[loc] is escaped by '\\' */
733
static int
734
is_escaped(uint8_t *data, size_t loc)
735
4.57M
{
736
4.57M
  size_t i = loc;
737
4.64M
  while (i >= 1 && data[i - 1] == '\\')
738
63.9k
    i--;
739
740
  /* odd numbers of backslashes escapes data[loc] */
741
4.57M
  return (loc - i) % 2;
742
4.57M
}
743
744
/* is_backslashed • returns whether special char at data[loc] is preceded by '\\', a stricter interpretation of escaping than is_escaped. */
745
static int
746
is_backslashed(uint8_t *data, size_t loc)
747
303M
{
748
303M
  return loc >= 1 && data[loc - 1] == '\\';
749
303M
}
750
751
/* find_emph_char • looks for the next emph uint8_t, skipping other constructs */
752
static size_t
753
find_emph_char(uint8_t *data, size_t size, uint8_t c)
754
1.71M
{
755
1.71M
  size_t i = 0;
756
757
4.23M
  while (i < size) {
758
218M
    while (i < size && data[i] != c && data[i] != '[' && data[i] != '`')
759
214M
      i++;
760
761
4.20M
    if (i == size)
762
221k
      return 0;
763
764
    /* not counting escaped chars */
765
3.98M
    if (is_escaped(data, i)) {
766
7.54k
      i++; continue;
767
7.54k
    }
768
769
3.97M
    if (data[i] == c)
770
1.16M
      return i;
771
772
    /* skipping a codespan */
773
2.81M
    if (data[i] == '`') {
774
38.2k
      size_t span_nb = 0, bt;
775
38.2k
      size_t tmp_i = 0;
776
777
      /* counting the number of opening backticks */
778
119k
      while (i < size && data[i] == '`') {
779
81.2k
        i++; span_nb++;
780
81.2k
      }
781
782
38.2k
      if (i >= size) return 0;
783
784
      /* finding the matching closing sequence */
785
37.2k
      bt = 0;
786
41.2M
      while (i < size && bt < span_nb) {
787
41.2M
        if (!tmp_i && data[i] == c) tmp_i = i;
788
41.2M
        if (data[i] == '`') bt++;
789
41.1M
        else bt = 0;
790
41.2M
        i++;
791
41.2M
      }
792
793
      /* not a well-formed codespan; use found matching emph char */
794
37.2k
      if (bt < span_nb && i >= size) return tmp_i;
795
37.2k
    }
796
    /* skipping a link */
797
2.77M
    else if (data[i] == '[') {
798
2.77M
      size_t tmp_i = 0;
799
2.77M
      uint8_t cc;
800
801
2.77M
      i++;
802
229M
      while (i < size && data[i] != ']') {
803
226M
        if (!tmp_i && data[i] == c) tmp_i = i;
804
226M
        i++;
805
226M
      }
806
807
2.77M
      i++;
808
5.74M
      while (i < size && _isspace(data[i]))
809
2.97M
        i++;
810
811
2.77M
      if (i >= size)
812
189k
        return tmp_i;
813
814
2.58M
      switch (data[i]) {
815
411k
      case '[':
816
411k
        cc = ']'; break;
817
818
74.2k
      case '(':
819
74.2k
        cc = ')'; break;
820
821
2.10M
      default:
822
2.10M
        if (tmp_i)
823
27.3k
          return tmp_i;
824
2.07M
        else
825
2.07M
          continue;
826
2.58M
      }
827
828
485k
      i++;
829
94.4M
      while (i < size && data[i] != cc) {
830
93.9M
        if (!tmp_i && data[i] == c) tmp_i = i;
831
93.9M
        i++;
832
93.9M
      }
833
834
485k
      if (i >= size)
835
71.7k
        return tmp_i;
836
837
413k
      i++;
838
413k
    }
839
2.81M
  }
840
841
28.5k
  return 0;
842
1.71M
}
843
844
/* find_separator_char • looks for the next unbackslashed separator character c */
845
static size_t
846
find_separator_char(uint8_t *data, size_t size, uint8_t c)
847
64.1k
{
848
64.1k
  size_t i = 0;
849
850
67.0k
  while (i < size) {
851
12.7M
    while (i < size && data[i] != c)
852
12.6M
      i++;
853
854
62.1k
    if (i == size)
855
23.5k
      return 0;
856
857
    /* not counting backslashed separators */
858
38.5k
    if (is_backslashed(data, i)) {
859
2.81k
      i++; continue;
860
2.81k
    }
861
862
35.6k
    if (data[i] == c)
863
35.6k
      return i;
864
35.6k
  }
865
866
4.89k
  return 0;
867
64.1k
}
868
869
/* parse_emph1 • parsing single emphase */
870
/* closed by a symbol not preceded by spacing and not followed by symbol */
871
static size_t
872
parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
873
87.0k
{
874
87.0k
  size_t i = 0, len;
875
87.0k
  hoedown_buffer *work = 0;
876
87.0k
  int r;
877
878
  /* skipping one symbol if coming from emph3 */
879
87.0k
  if (size > 1 && data[0] == c && data[1] == c) i = 1;
880
881
106k
  while (i < size) {
882
106k
    len = find_emph_char(data + i, size - i, c);
883
106k
    if (!len) return 0;
884
60.8k
    i += len;
885
60.8k
    if (i >= size) return 0;
886
887
60.8k
    if (data[i] == c && !_isspace(data[i - 1])) {
888
889
42.6k
      if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS ||
890
42.6k
        (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_UNDERLINE_EMPHASIS && c == '_')) {
891
42.6k
        if (i + 1 < size && isalnum(data[i + 1]))
892
1.10k
          continue;
893
42.6k
      }
894
895
41.5k
      work = newbuf(doc, BUFFER_SPAN);
896
41.5k
      parse_inline(work, doc, data, i);
897
898
41.5k
      if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_')
899
38.5k
        r = doc->md.underline(ob, work, &doc->data);
900
2.98k
      else
901
2.98k
        r = doc->md.emphasis(ob, work, &doc->data);
902
903
41.5k
      popbuf(doc, BUFFER_SPAN);
904
41.5k
      return r ? i + 1 : 0;
905
42.6k
    }
906
60.8k
  }
907
908
0
  return 0;
909
87.0k
}
910
911
/* parse_emph2 • parsing single emphase */
912
static size_t
913
parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
914
39.0k
{
915
39.0k
  size_t i = 0, len;
916
39.0k
  hoedown_buffer *work = 0;
917
39.0k
  int r;
918
919
59.7k
  while (i < size) {
920
59.2k
    len = find_emph_char(data + i, size - i, c);
921
59.2k
    if (!len) return 0;
922
34.1k
    i += len;
923
924
34.1k
    if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) {
925
13.4k
      work = newbuf(doc, BUFFER_SPAN);
926
13.4k
      parse_inline(work, doc, data, i);
927
928
13.4k
      if (c == '~')
929
2.97k
        r = doc->md.strikethrough(ob, work, &doc->data);
930
10.4k
      else if (c == '=')
931
741
        r = doc->md.highlight(ob, work, &doc->data);
932
9.73k
      else
933
9.73k
        r = doc->md.double_emphasis(ob, work, &doc->data);
934
935
13.4k
      popbuf(doc, BUFFER_SPAN);
936
13.4k
      return r ? i + 2 : 0;
937
13.4k
    }
938
20.7k
    i++;
939
20.7k
  }
940
528
  return 0;
941
39.0k
}
942
943
/* parse_emph3 • parsing single emphase */
944
/* finds the first closing tag, and delegates to the other emph */
945
static size_t
946
parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c)
947
52.1k
{
948
52.1k
  size_t i = 0, len;
949
52.1k
  int r;
950
951
81.6k
  while (i < size) {
952
81.6k
    len = find_emph_char(data + i, size - i, c);
953
81.6k
    if (!len) return 0;
954
45.5k
    i += len;
955
956
    /* skip spacing preceded symbols */
957
45.5k
    if (data[i] != c || _isspace(data[i - 1]))
958
29.5k
      continue;
959
960
15.9k
    if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) {
961
      /* triple symbol found */
962
2.70k
      hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
963
964
2.70k
      parse_inline(work, doc, data, i);
965
2.70k
      r = doc->md.triple_emphasis(ob, work, &doc->data);
966
2.70k
      popbuf(doc, BUFFER_SPAN);
967
2.70k
      return r ? i + 3 : 0;
968
969
13.2k
    } else if (i + 1 < size && data[i + 1] == c) {
970
      /* double symbol found, handing over to emph1 */
971
6.48k
      len = parse_emph1(ob, doc, data - 2, size + 2, c);
972
6.48k
      if (!len) return 0;
973
0
      else return len - 2;
974
975
6.77k
    } else {
976
      /* single symbol found, handing over to emph2 */
977
6.77k
      len = parse_emph2(ob, doc, data - 1, size + 1, c);
978
6.77k
      if (!len) return 0;
979
0
      else return len - 1;
980
6.77k
    }
981
15.9k
  }
982
0
  return 0;
983
52.1k
}
984
985
/* parse_math • parses a math span until the given ending delimiter */
986
static size_t
987
parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode)
988
34.6k
{
989
34.6k
  hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL };
990
34.6k
  size_t i = delimsz;
991
992
34.6k
  if (!doc->md.math)
993
0
    return 0;
994
995
  /* find ending delimiter */
996
93.1k
  while (1) {
997
52.7M
    while (i < size && data[i] != (uint8_t)end[0])
998
52.6M
      i++;
999
1000
93.1k
    if (i >= size)
1001
19.3k
      return 0;
1002
1003
73.7k
    if (!is_escaped(data, i) && !(i + delimsz > size)
1004
73.7k
      && memcmp(data + i, end, delimsz) == 0)
1005
15.2k
      break;
1006
1007
58.5k
    i++;
1008
58.5k
  }
1009
1010
  /* prepare buffers */
1011
15.2k
  text.data = data + delimsz;
1012
15.2k
  text.size = i - delimsz;
1013
1014
  /* if this is a $$ and MATH_EXPLICIT is not active,
1015
   * guess whether displaymode should be enabled from the context */
1016
15.2k
  i += delimsz;
1017
15.2k
  if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT))
1018
0
    displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i);
1019
1020
  /* call callback */
1021
15.2k
  if (doc->md.math(ob, &text, displaymode, &doc->data))
1022
15.2k
    return i;
1023
1024
0
  return 0;
1025
15.2k
}
1026
1027
/* char_emphasis • single and double emphasis parsing */
1028
static size_t
1029
char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1030
966k
{
1031
966k
  uint8_t c = data[0];
1032
966k
  size_t ret;
1033
1034
966k
  if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) {
1035
966k
    if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(')
1036
680k
      return 0;
1037
966k
  }
1038
1039
286k
  if (size > 2 && data[1] != c) {
1040
    /* spacing cannot follow an opening emphasis;
1041
     * strikethrough and highlight only takes two characters '~~' */
1042
126k
    if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0)
1043
87.1k
      return 0;
1044
1045
39.7k
    return ret + 1;
1046
126k
  }
1047
1048
159k
  if (size > 3 && data[1] == c && data[2] != c) {
1049
37.0k
    if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0)
1050
27.8k
      return 0;
1051
1052
9.16k
    return ret + 2;
1053
37.0k
  }
1054
1055
122k
  if (size > 4 && data[1] == c && data[2] == c && data[3] != c) {
1056
62.2k
    if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0)
1057
60.0k
      return 0;
1058
1059
2.16k
    return ret + 3;
1060
62.2k
  }
1061
1062
60.2k
  return 0;
1063
122k
}
1064
1065
1066
/* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */
1067
static size_t
1068
char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1069
465k
{
1070
465k
  if (offset < 2 || data[-1] != ' ' || data[-2] != ' ')
1071
439k
    return 0;
1072
1073
  /* removing the last space from ob and rendering */
1074
1.32M
  while (ob->size && ob->data[ob->size - 1] == ' ')
1075
1.30M
    ob->size--;
1076
1077
25.9k
  return doc->md.linebreak(ob, &doc->data) ? 1 : 0;
1078
465k
}
1079
1080
1081
/* char_codespan • '`' parsing a code span (assuming codespan != 0) */
1082
static size_t
1083
char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1084
107k
{
1085
107k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1086
107k
  size_t end, nb = 0, i, f_begin, f_end;
1087
1088
  /* counting the number of backticks in the delimiter */
1089
3.71M
  while (nb < size && data[nb] == '`')
1090
3.60M
    nb++;
1091
1092
  /* finding the next delimiter */
1093
107k
  i = 0;
1094
73.0M
  for (end = nb; end < size && i < nb; end++) {
1095
72.9M
    if (data[end] == '`') {
1096
421k
      if (end + 1 == size || !is_escaped(data, end)) {
1097
420k
        i++;
1098
420k
      } else {
1099
727
        i = 0;
1100
727
      }
1101
421k
    }
1102
72.5M
    else i = 0;
1103
72.9M
  }
1104
1105
107k
  if (i < nb && end >= size)
1106
84.2k
    return 0; /* no matching delimiter */
1107
1108
  /* trimming outside whitespace */
1109
23.6k
  f_begin = nb;
1110
256k
  while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\n'))
1111
233k
    f_begin++;
1112
1113
23.6k
  f_end = end - nb;
1114
160k
  while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\n'))
1115
137k
    f_end--;
1116
1117
  /* real code span */
1118
23.6k
  if (f_begin < f_end) {
1119
    /* needed for parse_attribute functions as buffer functions do not work with
1120
     * buffers made on the stack */
1121
20.5k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
1122
1123
20.5k
    work.data = data + f_begin;
1124
20.5k
    work.size = f_end - f_begin;
1125
1126
20.5k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1127
20.5k
      end += parse_inline_attributes(data + end, size - end, attr, doc->attr_activation);
1128
20.5k
    }
1129
1130
20.5k
    if (!doc->md.codespan(ob, &work, attr, &doc->data))
1131
0
      end = 0;
1132
20.5k
    popbuf(doc, BUFFER_ATTRIBUTE);
1133
20.5k
  } else {
1134
3.06k
    if (!doc->md.codespan(ob, 0, 0, &doc->data))
1135
0
      end = 0;
1136
3.06k
  }
1137
1138
23.6k
  return end;
1139
107k
}
1140
1141
/* char_quote • '"' parsing a quote */
1142
static size_t
1143
char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1144
272k
{
1145
272k
  size_t end, nq = 0, i, f_begin, f_end;
1146
1147
  /* counting the number of quotes in the delimiter */
1148
168M
  while (nq < size && data[nq] == '"')
1149
168M
    nq++;
1150
1151
  /* finding the next delimiter */
1152
272k
  end = nq;
1153
670k
  while (1) {
1154
670k
    i = end;
1155
670k
    end += find_emph_char(data + end, size - end, '"');
1156
670k
    if (end == i) return 0;    /* no matching delimiter */
1157
496k
    i = end;
1158
84.4M
    while (end < size && data[end] == '"' && end - i < nq) end++;
1159
496k
    if (end - i >= nq) break;
1160
496k
  }
1161
1162
  /* trimming outside spaces */
1163
98.3k
  f_begin = nq;
1164
846k
  while (f_begin < end && data[f_begin] == ' ')
1165
747k
    f_begin++;
1166
1167
98.3k
  f_end = end - nq;
1168
711k
  while (f_end > nq && data[f_end-1] == ' ')
1169
613k
    f_end--;
1170
1171
  /* real quote */
1172
98.3k
  if (f_begin < f_end) {
1173
91.3k
    hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
1174
91.3k
    parse_inline(work, doc, data + f_begin, f_end - f_begin);
1175
1176
91.3k
    if (!doc->md.quote(ob, work, &doc->data))
1177
1.66k
      end = 0;
1178
91.3k
    popbuf(doc, BUFFER_SPAN);
1179
91.3k
  } else {
1180
6.96k
    if (!doc->md.quote(ob, 0, &doc->data))
1181
6.96k
      end = 0;
1182
6.96k
  }
1183
1184
98.3k
  return end;
1185
272k
}
1186
1187
1188
/* char_escape • '\\' backslash escape */
1189
static size_t
1190
char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1191
104k
{
1192
104k
  static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$";
1193
104k
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1194
104k
  size_t w;
1195
1196
104k
  if (size > 1) {
1197
101k
    if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) &&
1198
101k
      size > 2 && (data[2] == '(' || data[2] == '[')) {
1199
2.70k
      const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)";
1200
2.70k
      w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '[');
1201
2.70k
      if (w) return w;
1202
2.70k
    }
1203
1204
101k
    if (strchr(escape_chars, data[1]) == NULL)
1205
47.7k
      return 0;
1206
1207
53.5k
    if (doc->md.normal_text) {
1208
53.5k
      work.data = data + 1;
1209
53.5k
      work.size = 1;
1210
53.5k
      doc->is_escape_char = 1;
1211
53.5k
      doc->md.normal_text(ob, &work, &doc->data);
1212
53.5k
      doc->is_escape_char = 0;
1213
53.5k
    }
1214
0
    else hoedown_buffer_putc(ob, data[1]);
1215
53.5k
  } else if (size == 1) {
1216
2.57k
    if (doc->md.normal_text) {
1217
2.57k
      work.data = data;
1218
2.57k
      work.size = 1;
1219
2.57k
      doc->md.normal_text(ob, &work, &doc->data);
1220
2.57k
    }
1221
0
    else hoedown_buffer_putc(ob, data[0]);
1222
2.57k
  }
1223
1224
56.1k
  return 2;
1225
104k
}
1226
1227
/* char_entity • '&' escaped when it doesn't belong to an entity */
1228
/* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */
1229
static size_t
1230
char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1231
62.3k
{
1232
62.3k
  size_t end = 1;
1233
62.3k
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
1234
1235
62.3k
  if (end < size && data[end] == '#')
1236
409
    end++;
1237
1238
357k
  while (end < size && isalnum(data[end]))
1239
294k
    end++;
1240
1241
62.3k
  if (end < size && data[end] == ';')
1242
965
    end++; /* real entity */
1243
61.3k
  else
1244
61.3k
    return 0; /* lone '&' */
1245
1246
965
  if (doc->md.entity) {
1247
0
    work.data = data;
1248
0
    work.size = end;
1249
0
    doc->md.entity(ob, &work, &doc->data);
1250
0
  }
1251
965
  else hoedown_buffer_put(ob, data, end);
1252
1253
965
  return end;
1254
62.3k
}
1255
1256
/* char_langle_tag • '<' when tags or autolinks are allowed */
1257
static size_t
1258
char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1259
199k
{
1260
199k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
1261
199k
  hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE;
1262
199k
  size_t end = tag_length(data, size, &altype, doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS);
1263
199k
  int ret = 0;
1264
1265
199k
  work.data = data;
1266
199k
  work.size = end;
1267
1268
199k
  if (end > 2) {
1269
60.0k
    if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) {
1270
7.58k
      hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN);
1271
7.58k
      work.data = data + 1;
1272
7.58k
      work.size = end - 2;
1273
7.58k
      unscape_text(u_link, &work);
1274
7.58k
      ret = doc->md.autolink(ob, u_link, altype, &doc->data);
1275
7.58k
      popbuf(doc, BUFFER_SPAN);
1276
7.58k
    }
1277
52.4k
    else if (doc->md.raw_html)
1278
52.4k
      ret = doc->md.raw_html(ob, &work, &doc->data);
1279
60.0k
  }
1280
1281
199k
  if (!ret) return 0;
1282
60.0k
  else return end;
1283
199k
}
1284
1285
static size_t
1286
char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1287
461k
{
1288
461k
  hoedown_buffer *link, *link_url, *link_text;
1289
461k
  size_t link_len, rewind;
1290
1291
461k
  if (!doc->md.link || doc->in_link_body)
1292
1.81k
    return 0;
1293
1294
459k
  link = newbuf(doc, BUFFER_SPAN);
1295
1296
459k
  if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) {
1297
23.0k
    link_url = newbuf(doc, BUFFER_SPAN);
1298
23.0k
    HOEDOWN_BUFPUTSL(link_url, "http://");
1299
23.0k
    hoedown_buffer_put(link_url, link->data, link->size);
1300
1301
23.0k
    if (ob->size > rewind)
1302
17.2k
      ob->size -= rewind;
1303
5.79k
    else
1304
5.79k
      ob->size = 0;
1305
1306
23.0k
    if (doc->md.normal_text) {
1307
23.0k
      link_text = newbuf(doc, BUFFER_SPAN);
1308
23.0k
      doc->md.normal_text(link_text, link, &doc->data);
1309
23.0k
      doc->md.link(ob, link_text, link_url, NULL, NULL, &doc->data);
1310
23.0k
      popbuf(doc, BUFFER_SPAN);
1311
23.0k
    } else {
1312
0
      doc->md.link(ob, link, link_url, NULL, NULL, &doc->data);
1313
0
    }
1314
23.0k
    popbuf(doc, BUFFER_SPAN);
1315
23.0k
  }
1316
1317
459k
  popbuf(doc, BUFFER_SPAN);
1318
459k
  return link_len;
1319
461k
}
1320
1321
static size_t
1322
char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1323
87.9k
{
1324
87.9k
  hoedown_buffer *link;
1325
87.9k
  size_t link_len, rewind;
1326
1327
87.9k
  if (!doc->md.autolink || doc->in_link_body)
1328
591
    return 0;
1329
1330
87.3k
  link = newbuf(doc, BUFFER_SPAN);
1331
1332
87.3k
  if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) {
1333
1.50k
    if (ob->size > rewind)
1334
838
      ob->size -= rewind;
1335
665
    else
1336
665
      ob->size = 0;
1337
1338
1.50k
    doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data);
1339
1.50k
  }
1340
1341
87.3k
  popbuf(doc, BUFFER_SPAN);
1342
87.3k
  return link_len;
1343
87.9k
}
1344
1345
static size_t
1346
char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1347
1.41M
{
1348
1.41M
  hoedown_buffer *link;
1349
1.41M
  size_t link_len, rewind;
1350
1351
1.41M
  if (!doc->md.autolink || doc->in_link_body)
1352
7.69k
    return 0;
1353
1354
1.40M
  link = newbuf(doc, BUFFER_SPAN);
1355
1356
1.40M
  if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) {
1357
4.77k
    if (ob->size > rewind)
1358
4.52k
      ob->size -= rewind;
1359
245
    else
1360
245
      ob->size = 0;
1361
1362
4.77k
    doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data);
1363
4.77k
  }
1364
1365
1.40M
  popbuf(doc, BUFFER_SPAN);
1366
1.40M
  return link_len;
1367
1.41M
}
1368
1369
static size_t
1370
142k
char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) {
1371
142k
  size_t ret;
1372
1373
142k
  if (size < 2 || data[1] != '[') return 0;
1374
1375
57.7k
  ret = char_link(ob, doc, data + 1, offset + 1, size - 1);
1376
57.7k
  if (!ret) return 0;
1377
8.21k
  return ret + 1;
1378
57.7k
}
1379
1380
/* char_link • '[': parsing a link, a footnote or an image */
1381
static size_t
1382
char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1383
772k
{
1384
772k
  int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1));
1385
772k
  int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && size > 1 && data[1] == '^');
1386
772k
  size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0;
1387
772k
  hoedown_buffer *content = NULL;
1388
772k
  hoedown_buffer *link = NULL;
1389
772k
  hoedown_buffer *title = NULL;
1390
772k
  hoedown_buffer *u_link = NULL;
1391
772k
  hoedown_buffer *inline_attr = NULL;
1392
772k
  hoedown_buffer *ref_attr = NULL;
1393
772k
  hoedown_buffer *attr = NULL;
1394
772k
  hoedown_buffer *id = NULL;
1395
772k
  size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size;
1396
772k
  int ret = 0, in_title = 0, qtype = 0;
1397
772k
  hoedown_link_type link_type = HOEDOWN_LINK_NONE;
1398
772k
  int ref_attr_exists = 0, inline_attr_exists = 0;
1399
1400
  /* checking whether the correct renderer exists */
1401
772k
  if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image)
1402
772k
    || (!is_img && !is_footnote && !doc->md.link))
1403
0
    goto cleanup;
1404
1405
  /* looking for the matching closing bracket */
1406
772k
  i += find_emph_char(data + i, size - i, ']');
1407
772k
  txt_e = i;
1408
1409
772k
  if (i < size && data[i] == ']') i++;
1410
201k
  else goto cleanup;
1411
1412
  /* footnote link */
1413
571k
  if (is_footnote) {
1414
34.4k
    hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL };
1415
34.4k
    struct footnote_ref *fr;
1416
1417
34.4k
    if (txt_e < 3)
1418
19.8k
      goto cleanup;
1419
1420
14.6k
    id.data = data + 2;
1421
14.6k
    id.size = txt_e - 2;
1422
1423
14.6k
    fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size);
1424
1425
    /* mark footnote used */
1426
14.6k
    if (fr && !fr->is_used) {
1427
862
      if(!add_footnote_ref(&doc->footnotes_used, fr))
1428
0
        goto cleanup;
1429
862
      fr->is_used = 1;
1430
862
      fr->num = doc->footnotes_used.count;
1431
1432
      /* render */
1433
862
      if (doc->md.footnote_ref) {
1434
862
        doc->link_id = &id;
1435
862
        ret = doc->md.footnote_ref(ob, fr->num, &doc->data);
1436
862
        doc->link_id = NULL;
1437
862
      }
1438
862
    }
1439
1440
14.6k
    goto cleanup;
1441
14.6k
  }
1442
1443
  /* skip any amount of spacing */
1444
  /* (this is much more laxist than original markdown syntax) */
1445
2.00M
  while (i < size && _isspace(data[i]))
1446
1.47M
    i++;
1447
1448
  /* inline style link */
1449
537k
  if (i < size && data[i] == '(') {
1450
43.7k
    size_t nb_p;
1451
1452
43.7k
    link_type = HOEDOWN_LINK_INLINE;
1453
1454
    /* skipping initial spacing */
1455
43.7k
    i++;
1456
1457
934k
    while (i < size && _isspace(data[i]))
1458
890k
      i++;
1459
1460
43.7k
    link_b = i;
1461
1462
    /* looking for link end: ' " ) */
1463
    /* Count the number of open parenthesis */
1464
43.7k
    nb_p = 0;
1465
1466
26.7M
    while (i < size) {
1467
26.7M
      if (data[i] == '\\') i += 2;
1468
26.6M
      else if (data[i] == '(' && i != 0) {
1469
133k
        nb_p++; i++;
1470
133k
      }
1471
26.5M
      else if (data[i] == ')') {
1472
12.8k
        if (nb_p == 0) break;
1473
4.85k
        nb_p--; i++;
1474
26.5M
      } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break;
1475
26.4M
      else i++;
1476
26.7M
    }
1477
1478
43.7k
    if (i >= size) goto cleanup;
1479
30.9k
    link_e = i;
1480
1481
    /* looking for title end if present */
1482
30.9k
    if (data[i] == '\'' || data[i] == '"') {
1483
22.8k
      qtype = data[i];
1484
22.8k
      in_title = 1;
1485
22.8k
      i++;
1486
22.8k
      title_b = i;
1487
1488
514M
      while (i < size) {
1489
514M
        if (data[i] == '\\') i += 2;
1490
514M
        else if (data[i] == qtype) {in_title = 0; i++;}
1491
514M
        else if ((data[i] == ')') && !in_title) break;
1492
514M
        else i++;
1493
514M
      }
1494
1495
22.8k
      if (i >= size) goto cleanup;
1496
1497
      /* skipping spacing after title */
1498
11.7k
      title_e = i - 1;
1499
341k
      while (title_e > title_b && _isspace(data[title_e]))
1500
329k
        title_e--;
1501
1502
      /* checking for closing quote presence */
1503
11.7k
      if (data[title_e] != '\'' &&  data[title_e] != '"') {
1504
3.15k
        title_b = title_e = 0;
1505
3.15k
        link_e = i;
1506
3.15k
      }
1507
11.7k
    }
1508
1509
    /* remove spacing at the end of the link */
1510
279k
    while (link_e > link_b && _isspace(data[link_e - 1]))
1511
259k
      link_e--;
1512
1513
    /* remove optional angle brackets around the link */
1514
19.7k
    if (data[link_b] == '<' && data[link_e - 1] == '>') {
1515
580
      link_b++;
1516
580
      link_e--;
1517
580
    }
1518
1519
    /* building escaped link and title */
1520
19.7k
    if (link_e > link_b) {
1521
4.94k
      link = newbuf(doc, BUFFER_SPAN);
1522
4.94k
      hoedown_buffer_put(link, data + link_b, link_e - link_b);
1523
4.94k
    }
1524
1525
19.7k
    if (title_e > title_b) {
1526
2.23k
      title = newbuf(doc, BUFFER_SPAN);
1527
2.23k
      hoedown_buffer_put(title, data + title_b, title_e - title_b);
1528
2.23k
    }
1529
1530
19.7k
    i++;
1531
19.7k
  }
1532
1533
  /* reference style link */
1534
493k
  else if (i < size && data[i] == '[') {
1535
59.6k
    struct link_ref *lr;
1536
1537
59.6k
    id = newbuf(doc, BUFFER_SPAN);
1538
1539
    /* looking for the id */
1540
59.6k
    i++;
1541
59.6k
    link_b = i;
1542
10.8M
    while (i < size && data[i] != ']') i++;
1543
59.6k
    if (i >= size) goto cleanup;
1544
55.1k
    link_e = i;
1545
1546
    /* finding the link_ref */
1547
55.1k
    if (link_b == link_e) {
1548
27.9k
      link_type = HOEDOWN_LINK_EMPTY_REFERENCE;
1549
27.9k
      replace_spacing(id, data + 1, txt_e - 1);
1550
27.9k
    } else {
1551
27.1k
      link_type = HOEDOWN_LINK_REFERENCE;
1552
27.1k
      hoedown_buffer_put(id, data + link_b, link_e - link_b);
1553
27.1k
    }
1554
1555
55.1k
    lr = find_link_ref(doc->refs, id->data, id->size);
1556
55.1k
    if (!lr)
1557
50.3k
      goto cleanup;
1558
1559
    /* keeping link and title from link_ref */
1560
4.86k
    link = lr->link;
1561
4.86k
    title = lr->title;
1562
4.86k
    ref_attr = lr->attr;
1563
4.86k
    i++;
1564
4.86k
  }
1565
1566
  /* shortcut reference style link */
1567
433k
  else {
1568
433k
    struct link_ref *lr;
1569
1570
433k
    id = newbuf(doc, BUFFER_SPAN);
1571
1572
433k
    link_type = HOEDOWN_LINK_SHORTCUT;
1573
1574
    /* crafting the id */
1575
433k
    replace_spacing(id, data + 1, txt_e - 1);
1576
1577
    /* finding the link_ref */
1578
433k
    lr = find_link_ref(doc->refs, id->data, id->size);
1579
433k
    if (!lr)
1580
174k
      goto cleanup;
1581
1582
    /* keeping link and title from link_ref */
1583
259k
    link = lr->link;
1584
259k
    title = lr->title;
1585
259k
    ref_attr = lr->attr;
1586
1587
    /* rewinding the spacing */
1588
259k
    i = txt_e + 1;
1589
259k
  }
1590
1591
  /* building content: img alt is kept, only link content is parsed */
1592
283k
  if (txt_e > 1) {
1593
24.0k
    content = newbuf(doc, BUFFER_SPAN);
1594
24.0k
    if (is_img) {
1595
13.8k
      hoedown_buffer_put(content, data + 1, txt_e - 1);
1596
13.8k
    } else {
1597
      /* disable autolinking when parsing inline the
1598
       * content of a link */
1599
10.1k
      doc->in_link_body = 1;
1600
10.1k
      parse_inline(content, doc, data + 1, txt_e - 1);
1601
10.1k
      doc->in_link_body = 0;
1602
10.1k
    }
1603
24.0k
  }
1604
1605
283k
  if (link) {
1606
269k
    u_link = newbuf(doc, BUFFER_SPAN);
1607
269k
    unscape_text(u_link, link);
1608
269k
  }
1609
1610
  /* if special attributes are enabled, attempt to parse an inline one from
1611
   * the link */
1612
283k
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1613
    /* attr is a span because cleanup code depends on it being span */
1614
283k
    inline_attr = newbuf(doc, BUFFER_SPAN);
1615
283k
    i += parse_inline_attributes(data + i, size - i, inline_attr, doc->attr_activation);
1616
283k
  }
1617
1618
  /* remove optional < and > around inline and ref special attributes */
1619
283k
  if (ref_attr && ref_attr->size > 0) {
1620
29.6k
    if (ref_attr->size > 1) {
1621
6.11k
      if (ref_attr->data[0] == '<') {
1622
244
        hoedown_buffer_slurp(ref_attr, 1);
1623
244
      }
1624
6.11k
      if (ref_attr->data[ref_attr->size - 1] == '>') {
1625
81
        ref_attr->size--;
1626
81
      }
1627
6.11k
    }
1628
29.6k
  }
1629
283k
  if (inline_attr && inline_attr->size > 0) {
1630
4.43k
    if (inline_attr->size > 1) {
1631
3.51k
      if (inline_attr->data[0] == '<') {
1632
585
        hoedown_buffer_slurp(inline_attr, 1);
1633
585
      }
1634
3.51k
      if (inline_attr->data[inline_attr->size - 1] == '>') {
1635
221
        inline_attr->size--;
1636
221
      }
1637
3.51k
    }
1638
4.43k
  }
1639
1640
  /* construct the final attr that is actually applied to the link */
1641
283k
  ref_attr_exists = ref_attr && ref_attr->size > 0;
1642
283k
  inline_attr_exists = inline_attr && inline_attr->size > 0;
1643
283k
  if (ref_attr_exists || inline_attr_exists) {
1644
33.4k
    attr = newbuf(doc, BUFFER_SPAN);
1645
33.4k
    if (ref_attr_exists) {
1646
29.6k
      hoedown_buffer_put(attr, ref_attr->data, ref_attr->size);
1647
29.6k
    }
1648
    /* if both inline and ref attrs exist, join them with a space to prevent
1649
     * conflicts */
1650
33.4k
    if (ref_attr_exists && inline_attr_exists) {
1651
669
      hoedown_buffer_putc(attr, ' ');
1652
669
    }
1653
33.4k
    if (inline_attr_exists) {
1654
4.42k
      hoedown_buffer_put(attr, inline_attr->data, inline_attr->size);
1655
4.42k
    }
1656
33.4k
  }
1657
1658
  /* calling the relevant rendering function */
1659
283k
  doc->link_id = id;
1660
283k
  doc->link_type = link_type;
1661
283k
  doc->link_ref_attr = ref_attr;
1662
283k
  doc->link_inline_attr = inline_attr;
1663
283k
  if (is_img) {
1664
24.9k
    ret = doc->md.image(ob, u_link, title, content, attr, &doc->data);
1665
259k
  } else {
1666
259k
    ret = doc->md.link(ob, content, u_link, title, attr, &doc->data);
1667
259k
  }
1668
283k
  doc->link_inline_attr = NULL;
1669
283k
  doc->link_ref_attr = NULL;
1670
283k
  doc->link_type = HOEDOWN_LINK_NONE;
1671
283k
  doc->link_id = NULL;
1672
1673
  /* cleanup */
1674
772k
cleanup:
1675
772k
  doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size;
1676
772k
  return ret ? i : 0;
1677
283k
}
1678
1679
static size_t
1680
char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1681
266k
{
1682
266k
  size_t sup_start, sup_len;
1683
266k
  hoedown_buffer *sup;
1684
1685
266k
  if (!doc->md.superscript)
1686
0
    return 0;
1687
1688
266k
  if (size < 2)
1689
8.33k
    return 0;
1690
1691
257k
  if (data[1] == '(') {
1692
27.0k
    sup_start = 2;
1693
27.0k
    sup_len = find_emph_char(data + 2, size - 2, ')') + 2;
1694
1695
27.0k
    if (sup_len == size)
1696
4.11k
      return 0;
1697
230k
  } else {
1698
230k
    sup_start = sup_len = 1;
1699
1700
6.54M
    while (sup_len < size && !_isspace(data[sup_len]))
1701
6.31M
      sup_len++;
1702
230k
  }
1703
1704
253k
  if (sup_len - sup_start == 0)
1705
23.2k
    return (sup_start == 2) ? 3 : 0;
1706
1707
230k
  sup = newbuf(doc, BUFFER_SPAN);
1708
230k
  parse_inline(sup, doc, data + sup_start, sup_len - sup_start);
1709
230k
  doc->md.superscript(ob, sup, &doc->data);
1710
230k
  popbuf(doc, BUFFER_SPAN);
1711
1712
230k
  return (sup_start == 2) ? sup_len + 1 : sup_len;
1713
253k
}
1714
1715
static size_t
1716
char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size)
1717
31.9k
{
1718
  /* double dollar */
1719
31.9k
  if (size > 1 && data[1] == '$')
1720
6.45k
    return parse_math(ob, doc, data, offset, size, "$$", 2, 1);
1721
1722
  /* single dollar allowed only with MATH_EXPLICIT flag */
1723
25.4k
  if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)
1724
25.4k
    return parse_math(ob, doc, data, offset, size, "$", 1, 0);
1725
1726
0
  return 0;
1727
25.4k
}
1728
1729
/*********************************
1730
 * BLOCK-LEVEL PARSING FUNCTIONS *
1731
 *********************************/
1732
1733
/* is_empty • returns the line length when it is empty, 0 otherwise */
1734
static size_t
1735
is_empty(const uint8_t *data, size_t size)
1736
8.78M
{
1737
8.78M
  size_t i;
1738
1739
18.2M
  for (i = 0; i < size && data[i] != '\n'; i++)
1740
17.4M
    if (data[i] != ' ')
1741
7.96M
      return 0;
1742
1743
816k
  return i + 1;
1744
8.78M
}
1745
1746
/* is_hrule • returns whether a line is a horizontal rule */
1747
static int
1748
is_hrule(uint8_t *data, size_t size)
1749
6.32M
{
1750
6.32M
  size_t i = 0, n = 0;
1751
6.32M
  uint8_t c;
1752
1753
  /* skipping initial spaces */
1754
6.32M
  if (size < 3) return 0;
1755
6.28M
  if (data[0] == ' ') { i++;
1756
251k
  if (data[1] == ' ') { i++;
1757
101k
  if (data[2] == ' ') { i++; } } }
1758
1759
  /* looking at the hrule uint8_t */
1760
6.28M
  if (i + 2 >= size
1761
6.28M
  || (data[i] != '*' && data[i] != '-' && data[i] != '_'))
1762
3.14M
    return 0;
1763
3.13M
  c = data[i];
1764
1765
  /* the whole line must be the char or space */
1766
8.03M
  while (i < size && data[i] != '\n') {
1767
5.24M
    if (data[i] == c) n++;
1768
1.77M
    else if (data[i] != ' ')
1769
346k
      return 0;
1770
1771
4.89M
    i++;
1772
4.89M
  }
1773
1774
2.79M
  return n >= 3;
1775
3.13M
}
1776
1777
/* check if a line is a code fence; return the
1778
 * end of the code fence. if passed, width of
1779
 * the fence rule and character will be returned */
1780
static size_t
1781
is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr)
1782
9.79M
{
1783
9.79M
  size_t i = 0, n = 1, j;
1784
9.79M
  uint8_t c;
1785
1786
  /* skipping initial spaces */
1787
9.79M
  if (size < 3)
1788
6.23M
    return 0;
1789
1790
3.56M
  if (data[0] == ' ') { i++;
1791
327k
  if (data[1] == ' ') { i++;
1792
213k
  if (data[2] == ' ') { i++; } } }
1793
1794
  /* looking at the hrule uint8_t */
1795
3.56M
  if (i + 2 >= size)
1796
91.5k
    return 0;
1797
3.47M
  c = data[i];
1798
3.47M
  if (!(c=='~' || c=='`'))
1799
3.39M
    return 0;
1800
1801
  /* the fence must be that same character */
1802
301k
  while (++i < size && data[i] == c)
1803
227k
    ++n;
1804
1805
73.6k
  if (n < 3)
1806
31.1k
    return 0;
1807
1808
4.30M
  for (j = i; j < size && data[j] != '\n'; ++j) {
1809
4.26M
    if (data[j] == c) {
1810
      /* Avoid parsing codespan as fence. */
1811
4.02k
      return 0;
1812
4.02k
    }
1813
4.26M
  }
1814
1815
38.4k
  if (width) *width = n;
1816
38.4k
  if (chr) *chr = c;
1817
38.4k
  return i;
1818
42.4k
}
1819
1820
/* expects single line, checks if it's a codefence and extracts language */
1821
static int
1822
parse_codefence(hoedown_document *doc, uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr, unsigned int flags, hoedown_buffer *attr)
1823
5.58M
{
1824
5.58M
  size_t i, w, lang_start, attr_start = 0;
1825
1826
5.58M
  i = w = is_codefence(data, size, width, chr);
1827
5.58M
  if (i == 0)
1828
5.57M
    return 0;
1829
1830
123k
  while (i < size && _isspace(data[i]))
1831
109k
    i++;
1832
1833
13.5k
  lang_start = i;
1834
1835
13.5k
  if (flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
1836
13.5k
    attr_start = i + parse_attributes(data + i, size - i, attr, NULL, "", 0, doc->attr_activation);
1837
69.0k
    while (i < attr_start) {
1838
56.8k
      if (_isspace(data[i])) {
1839
1.38k
        break;
1840
1.38k
      }
1841
55.5k
      i++;
1842
55.5k
    }
1843
13.5k
  } else {
1844
0
    while (i < size && !_isspace(data[i]))
1845
0
      i++;
1846
0
  }
1847
1848
13.5k
  lang->data = data + lang_start;
1849
13.5k
  lang->size = i - lang_start;
1850
1851
13.5k
  return w;
1852
5.58M
}
1853
1854
/* is_atxheader • returns whether the line is a hash-prefixed header */
1855
static int
1856
is_atxheader(hoedown_document *doc, uint8_t *data, size_t size)
1857
6.37M
{
1858
6.37M
  size_t level = 0, begin = 0, len;
1859
6.37M
  uint8_t *p;
1860
1861
6.37M
  if (data[0] != '#')
1862
6.26M
    return 0;
1863
1864
231k
  while (level < size && level < 6 && data[level] == '#')
1865
127k
    level++;
1866
1867
104k
  if (level >= size || data[level] == '\n') {
1868
33.2k
      return 0;
1869
33.2k
  }
1870
1871
70.9k
  len = size - level;
1872
70.9k
  p = memchr(data + level, '\n', len);
1873
70.9k
  if (p) {
1874
68.6k
    len = p - (data + level) + 1;
1875
68.6k
  }
1876
1877
  /* if the header is only whitespace, it is not a header */
1878
70.9k
  if (len && is_empty_all(data + level, len)) {
1879
1.34k
    return 0;
1880
1.34k
  }
1881
1882
69.5k
  if ((doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) && level < size && data[level] != ' ') {
1883
13.2k
    return 0;
1884
13.2k
  }
1885
1886
  /* if the header is only special attribute, it is not a header */
1887
56.3k
  if (len && (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) {
1888
56.3k
    p = memchr(data + level, '{', len);
1889
56.3k
    if (p) {
1890
      /* get number of characters from # to { */
1891
16.4k
      begin = p - (data + level);
1892
16.4k
      if (begin > 0 && !is_empty_all(data + level, begin)) {
1893
3.07k
        return 1;
1894
3.07k
      }
1895
      /* check for special attributes after the # */
1896
13.3k
      return !parse_inline_attributes(data + level + begin, len - begin, NULL, doc->attr_activation);
1897
16.4k
    }
1898
56.3k
  }
1899
1900
39.8k
  return 1;
1901
56.3k
}
1902
1903
/* is_headerline • returns whether the line is a setext-style hdr underline */
1904
static int
1905
is_headerline(uint8_t *data, size_t size)
1906
1.41M
{
1907
1.41M
  size_t i = 0;
1908
1909
  /* test of level 1 header */
1910
1.41M
  if (data[i] == '=') {
1911
84.5k
    for (i = 1; i < size && data[i] == '='; i++);
1912
348k
    while (i < size && data[i] == ' ') i++;
1913
80.1k
    return (i >= size || data[i] == '\n') ? 1 : 0; }
1914
1915
  /* test of level 2 header */
1916
1.33M
  if (data[i] == '-') {
1917
409k
    for (i = 1; i < size && data[i] == '-'; i++);
1918
481k
    while (i < size && data[i] == ' ') i++;
1919
284k
    return (i >= size || data[i] == '\n') ? 2 : 0; }
1920
1921
1.04M
  return 0;
1922
1.33M
}
1923
1924
static int
1925
is_next_headerline(uint8_t *data, size_t size)
1926
962k
{
1927
962k
  size_t i = 0;
1928
1929
45.2M
  while (i < size && data[i] != '\n')
1930
44.3M
    i++;
1931
1932
962k
  if (++i >= size)
1933
310k
    return 0;
1934
1935
652k
  return is_headerline(data + i, size - i);
1936
962k
}
1937
1938
/* prefix_quote • returns blockquote prefix length */
1939
static size_t
1940
prefix_quote(uint8_t *data, size_t size)
1941
6.38M
{
1942
6.38M
  size_t i = 0;
1943
6.38M
  if (i < size && data[i] == ' ') i++;
1944
6.38M
  if (i < size && data[i] == ' ') i++;
1945
6.38M
  if (i < size && data[i] == ' ') i++;
1946
1947
6.38M
  if (i < size && data[i] == '>') {
1948
74.4k
    if (i + 1 < size && data[i + 1] == ' ')
1949
3.03k
      return i + 2;
1950
1951
71.4k
    return i + 1;
1952
74.4k
  }
1953
1954
6.31M
  return 0;
1955
6.38M
}
1956
1957
/* prefix_code • returns prefix length for block code*/
1958
static size_t
1959
prefix_code(uint8_t *data, size_t size)
1960
0
{
1961
0
  if (size > 3 && data[0] == ' ' && data[1] == ' '
1962
0
    && data[2] == ' ' && data[3] == ' ') return 4;
1963
1964
0
  return 0;
1965
0
}
1966
1967
/* prefix_oli • returns ordered list item prefix */
1968
static size_t
1969
prefix_oli(uint8_t *data, size_t size)
1970
6.14M
{
1971
6.14M
  size_t i = 0;
1972
1973
6.14M
  if (i < size && data[i] == ' ') i++;
1974
6.14M
  if (i < size && data[i] == ' ') i++;
1975
6.14M
  if (i < size && data[i] == ' ') i++;
1976
1977
6.14M
  if (i >= size || data[i] < '0' || data[i] > '9')
1978
5.81M
    return 0;
1979
1980
689k
  while (i < size && data[i] >= '0' && data[i] <= '9')
1981
361k
    i++;
1982
1983
327k
  if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ')
1984
58.2k
    return 0;
1985
1986
269k
  if (is_next_headerline(data + i, size - i))
1987
3.68k
    return 0;
1988
1989
265k
  return i + 2;
1990
269k
}
1991
1992
/* prefix_uli • returns unordered list item prefix */
1993
static size_t
1994
prefix_uli(uint8_t *data, size_t size)
1995
6.43M
{
1996
6.43M
  size_t i = 0;
1997
1998
6.43M
  if (i < size && data[i] == ' ') i++;
1999
6.43M
  if (i < size && data[i] == ' ') i++;
2000
6.43M
  if (i < size && data[i] == ' ') i++;
2001
2002
6.43M
  if (i + 1 >= size ||
2003
6.43M
    (data[i] != '*' && data[i] != '+' && data[i] != '-') ||
2004
6.43M
    data[i + 1] != ' ')
2005
5.94M
    return 0;
2006
2007
490k
  if (is_next_headerline(data + i, size - i))
2008
64.3k
    return 0;
2009
2010
425k
  return i + 2;
2011
490k
}
2012
2013
/* prefix_dt • returns dictionary definition prefix
2014
 * this is in the form of /\s{0,3}:/ (e.g. "  :", where spacing is optional) */
2015
static size_t
2016
prefix_dt(uint8_t *data, size_t size)
2017
5.93M
{
2018
5.93M
  size_t i = 0;
2019
2020
  /* skip up to 3 whitespaces (since it's an indented codeblock at 4) */
2021
5.93M
  if (i < size && data[i] == ' ') i++;
2022
5.93M
  if (i < size && data[i] == ' ') i++;
2023
5.93M
  if (i < size && data[i] == ' ') i++;
2024
2025
  /* if the first character after whitespaces isn't :, it isn't a dt */
2026
5.93M
  if (i + 1 >= size ||
2027
5.93M
    data[i] != ':' ||
2028
5.93M
    data[i + 1] != ' ')
2029
5.73M
    return 0;
2030
2031
203k
  if (is_next_headerline(data + i, size - i))
2032
35.9k
    return 0;
2033
2034
167k
  return i + 2;
2035
203k
}
2036
2037
/* is_paragraph • returns if the next block is a paragraph (doesn't follow any
2038
 * other special rules for other types of blocks) */
2039
static int
2040
is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end);
2041
2042
/* prefix_dli • returns dictionary definition prefix
2043
 * a dli looks like a block of text, followed by optional whitespace, followed
2044
 * by another block with : as the first non-whitespace character */
2045
static size_t
2046
prefix_dli(hoedown_document *doc, uint8_t *data, size_t size)
2047
341k
{
2048
  /* end is to keep track of the final return value */
2049
341k
  size_t i = 0, j = 0, end = 0;
2050
341k
  int empty = 0;
2051
2052
  /* if the first line has a : in front of it, it can't be a definition list
2053
   * that starts at this point */
2054
341k
  if (prefix_dt(data, size)) {
2055
3.04k
    return 0;
2056
3.04k
  }
2057
2058
  /* temporarily toggle definition lists off to prevent infinite loops */
2059
338k
  doc->ext_flags &= ~HOEDOWN_EXT_DEFINITION_LISTS;
2060
2061
  /* check if it is a block of text with no double newlines inside, followed by
2062
   *  another block of text starting with : */
2063
5.36M
  while (i < size) {
2064
    /* if the line we are on is empty, flip the empty flag to indicate that
2065
     * the next block of text we see has to start with : to be considered
2066
     * a definition list; then skip to the next line */
2067
5.29M
    j = is_empty(data + i, size - i);
2068
5.29M
    if(j != 0) {
2069
201k
      empty = 1;
2070
201k
      i += j;
2071
201k
      continue;
2072
201k
    }
2073
2074
    /* if anything special is found while parsing the definition term part,
2075
     * then return so that the main loop can deal with it */
2076
5.09M
    if (!is_paragraph(doc, data + i, size - i)) {
2077
119k
      break;
2078
119k
    }
2079
2080
    /* check if the current line starts with :, returning the position of the
2081
     * beginning of the line if it does */
2082
4.97M
    j = prefix_dt(data + i, size - i);
2083
4.97M
    if (j > 0) {
2084
49.0k
      end = i;
2085
49.0k
      break;
2086
4.92M
    } else if(empty) {
2087
      /* if an empty newline has been found, then since : was not the first
2088
       * character after whitespaces, it can't be a definition list */
2089
101k
      break;
2090
101k
    }
2091
    /* scan characters until the next newline */
2092
194M
    for (i = i + 1; i < size && data[i - 1] != '\n'; i++);
2093
4.82M
  }
2094
2095
338k
  doc->ext_flags |= HOEDOWN_EXT_DEFINITION_LISTS;
2096
338k
  return end;
2097
341k
}
2098
2099
/* parse_block • parsing of one block, returning next uint8_t to parse */
2100
static void parse_block(hoedown_buffer *ob, hoedown_document *doc,
2101
      uint8_t *data, size_t size);
2102
2103
2104
/* parse_blockquote • handles parsing of a blockquote fragment */
2105
static size_t
2106
parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2107
27.5k
{
2108
27.5k
  size_t beg, end = 0, pre, work_size = 0;
2109
27.5k
  uint8_t *work_data = 0;
2110
27.5k
  hoedown_buffer *out = 0;
2111
2112
27.5k
  doc->blockquote_depth++;
2113
2114
27.5k
  out = newbuf(doc, BUFFER_BLOCK);
2115
27.5k
  beg = 0;
2116
264k
  while (beg < size) {
2117
21.9M
    for (end = beg + 1; end < size && data[end - 1] != '\n'; end++);
2118
2119
255k
    pre = prefix_quote(data + beg, end - beg);
2120
2121
255k
    if (pre)
2122
31.4k
      beg += pre; /* skipping prefix */
2123
2124
    /* empty line finished */
2125
223k
    else if ((doc->ext_flags & HOEDOWN_EXT_BLOCKQUOTE_EMPTY_LINE) &&
2126
223k
        (is_empty(data + beg, end - beg)))
2127
17.7k
      break;
2128
2129
    /* empty line followed by non-quote line */
2130
205k
    else if (is_empty(data + beg, end - beg) &&
2131
205k
        (end >= size || (prefix_quote(data + end, size - end) == 0 &&
2132
0
        !is_empty(data + end, size - end))))
2133
0
      break;
2134
2135
237k
    if (beg < end) { /* copy into the in-place working buffer */
2136
      /* hoedown_buffer_put(work, data + beg, end - beg); */
2137
234k
      if (!work_data)
2138
24.7k
        work_data = data + beg;
2139
209k
      else if (data + beg != work_data + work_size)
2140
46.0k
        memmove(work_data + work_size, data + beg, end - beg);
2141
234k
      work_size += end - beg;
2142
234k
    }
2143
237k
    beg = end;
2144
237k
  }
2145
2146
27.5k
  parse_block(out, doc, work_data, work_size);
2147
27.5k
  if (doc->md.blockquote)
2148
27.5k
    doc->md.blockquote(ob, out, &doc->data);
2149
27.5k
  popbuf(doc, BUFFER_BLOCK);
2150
2151
27.5k
  doc->blockquote_depth--;
2152
2153
27.5k
  return end;
2154
27.5k
}
2155
2156
static size_t
2157
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render);
2158
2159
/* parse_paragraph • handles parsing of a regular paragraph */
2160
static size_t
2161
parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2162
282k
{
2163
282k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2164
282k
  size_t i = 0, end = 0;
2165
282k
  int level = 0;
2166
2167
282k
  work.data = data;
2168
2169
865k
  while (i < size) {
2170
151M
    for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */;
2171
2172
826k
    if (is_empty(data + i, size - i))
2173
66.9k
      break;
2174
2175
759k
    if ((level = is_headerline(data + i, size - i)) != 0) {
2176
159k
      if (i == 0) {
2177
90.1k
        level = 0;
2178
90.1k
        i = end;
2179
90.1k
      }
2180
159k
      break;
2181
159k
    }
2182
2183
600k
    if (is_atxheader(doc, data + i, size - i) ||
2184
600k
      is_hrule(data + i, size - i) ||
2185
600k
      prefix_quote(data + i, size - i)) {
2186
16.6k
      end = i;
2187
16.6k
      break;
2188
16.6k
    }
2189
2190
583k
    i = end;
2191
583k
  }
2192
2193
282k
  work.size = i;
2194
553k
  while (work.size && data[work.size - 1] == '\n')
2195
271k
    work.size--;
2196
2197
282k
  if (!level) {
2198
212k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2199
212k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2200
212k
      parse_attributes(work.data, work.size, NULL, attr, "paragraph", 1, doc->attr_activation);
2201
212k
      if (attr->size > 0) {
2202
        /* remove the length of the attribute from the work size - the 12 comes
2203
        * from the leading space (1), the paragraph (9), the @ symbol (1), and
2204
        * the {} (2) (any extra spaces in the attribute are included inside
2205
        * the attribute) */
2206
0
        work.size -= attr->size + 12;
2207
0
      }
2208
212k
    }
2209
2210
212k
    hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
2211
212k
    parse_inline(tmp, doc, work.data, work.size);
2212
212k
    if (doc->md.paragraph)
2213
212k
      doc->md.paragraph(ob, tmp, attr, &doc->data);
2214
212k
    popbuf(doc, BUFFER_BLOCK);
2215
212k
    popbuf(doc, BUFFER_ATTRIBUTE);
2216
212k
  } else {
2217
69.3k
    hoedown_buffer *header_work;
2218
69.3k
    hoedown_buffer *attr_work;
2219
69.3k
    size_t len;
2220
2221
69.3k
    if (work.size) {
2222
69.3k
      size_t beg;
2223
69.3k
      i = work.size;
2224
69.3k
      work.size -= 1;
2225
2226
9.59M
      while (work.size && data[work.size] != '\n')
2227
9.52M
        work.size -= 1;
2228
2229
69.3k
      beg = work.size + 1;
2230
69.3k
      while (work.size && data[work.size - 1] == '\n')
2231
0
        work.size -= 1;
2232
2233
69.3k
      if (work.size > 0) {
2234
27.4k
        hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
2235
27.4k
        parse_inline(tmp, doc, work.data, work.size);
2236
2237
27.4k
        if (doc->md.paragraph)
2238
27.4k
          doc->md.paragraph(ob, tmp, NULL, &doc->data);
2239
2240
27.4k
        popbuf(doc, BUFFER_BLOCK);
2241
27.4k
        work.data += beg;
2242
27.4k
        work.size = i - beg;
2243
27.4k
      }
2244
41.8k
      else work.size = i;
2245
69.3k
    }
2246
2247
69.3k
    header_work = newbuf(doc, BUFFER_SPAN);
2248
69.3k
    attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
2249
2250
69.3k
    len = work.size;
2251
69.3k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2252
69.3k
      len = parse_attributes(work.data, work.size, attr_work, NULL, "", 1, doc->attr_activation);
2253
69.3k
    }
2254
2255
69.3k
    parse_inline(header_work, doc, work.data, len);
2256
2257
69.3k
    if (doc->md.header) {
2258
69.3k
      doc->header_type = HOEDOWN_HEADER_SETEXT;
2259
69.3k
      doc->md.header(ob, header_work, attr_work, (int)level, &doc->data);
2260
69.3k
      doc->header_type = HOEDOWN_HEADER_NONE;
2261
69.3k
    }
2262
2263
69.3k
    popbuf(doc, BUFFER_SPAN);
2264
69.3k
    popbuf(doc, BUFFER_ATTRIBUTE);
2265
69.3k
  }
2266
2267
282k
  return end;
2268
282k
}
2269
2270
/* parse_fencedcode • handles parsing of a block-level code fragment */
2271
static size_t
2272
parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, unsigned int flags)
2273
5.58M
{
2274
5.58M
  hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL };
2275
5.58M
  hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL };
2276
5.58M
  size_t i = 0, text_start, line_start;
2277
5.58M
  size_t w, w2;
2278
5.58M
  size_t width, width2;
2279
5.58M
  uint8_t chr, chr2;
2280
  /* needed for parse_attribute functions as buffer functions do not work with
2281
   * buffers on the stack */
2282
5.58M
  hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2283
2284
2285
  /* parse codefence line */
2286
270M
  while (i < size && data[i] != '\n')
2287
264M
    i++;
2288
2289
5.58M
  w = parse_codefence(doc, data, i, &lang, &width, &chr, flags, attr);
2290
5.58M
  if (!w) {
2291
5.57M
    popbuf(doc, BUFFER_ATTRIBUTE);
2292
5.57M
    return 0;
2293
5.57M
  }
2294
2295
  /* search for end */
2296
13.5k
  i++;
2297
13.5k
  text_start = i;
2298
3.28M
  while ((line_start = i) < size) {
2299
88.0M
    while (i < size && data[i] != '\n')
2300
84.7M
      i++;
2301
2302
3.27M
    w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2);
2303
3.27M
    if (w == w2 && width == width2 && chr == chr2 &&
2304
3.27M
      is_empty(data + (line_start+w), i - (line_start+w)))
2305
3.23k
      break;
2306
2307
3.26M
    if (i < size) i++;
2308
3.26M
  }
2309
2310
13.5k
  text.data = data + text_start;
2311
13.5k
  text.size = line_start - text_start;
2312
2313
13.5k
  if (doc->md.blockcode) {
2314
6.61k
    doc->fencedcode_char = chr;
2315
6.61k
    doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, attr->size ? attr : NULL, &doc->data);
2316
6.61k
    doc->fencedcode_char = 0;
2317
6.61k
  }
2318
2319
13.5k
  popbuf(doc, BUFFER_ATTRIBUTE);
2320
2321
13.5k
  return i;
2322
5.58M
}
2323
2324
static size_t
2325
parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2326
0
{
2327
0
  size_t beg, end, pre;
2328
0
  hoedown_buffer *work = 0;
2329
0
  hoedown_buffer *attr = 0;
2330
2331
0
  work = newbuf(doc, BUFFER_BLOCK);
2332
0
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2333
2334
0
  beg = 0;
2335
0
  while (beg < size) {
2336
0
    for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {};
2337
0
    pre = prefix_code(data + beg, end - beg);
2338
2339
0
    if (pre)
2340
0
      beg += pre; /* skipping prefix */
2341
0
    else if (!is_empty(data + beg, end - beg))
2342
      /* non-empty non-prefixed line breaks the pre */
2343
0
      break;
2344
2345
0
    if (beg < end) {
2346
      /* verbatim copy to the working buffer,
2347
        escaping entities */
2348
0
      if (is_empty(data + beg, end - beg))
2349
0
        hoedown_buffer_putc(work, '\n');
2350
0
      else hoedown_buffer_put(work, data + beg, end - beg);
2351
0
    }
2352
0
    beg = end;
2353
0
  }
2354
2355
0
  while (work->size && work->data[work->size - 1] == '\n')
2356
0
    work->size -= 1;
2357
2358
0
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2359
0
    work->size = parse_attributes(work->data, work->size, NULL, attr, "", 0, doc->attr_activation);
2360
0
  }
2361
2362
0
  hoedown_buffer_putc(work, '\n');
2363
2364
0
  if (doc->md.blockcode)
2365
0
    doc->md.blockcode(ob, work, NULL, attr, &doc->data);
2366
2367
0
  popbuf(doc, BUFFER_BLOCK);
2368
0
  popbuf(doc, BUFFER_ATTRIBUTE);
2369
0
  return beg;
2370
0
}
2371
2372
/* parse_listitem • parsing of a single list item */
2373
/*  assuming initial prefix is already removed */
2374
static size_t
2375
parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute)
2376
328k
{
2377
328k
  hoedown_buffer *work = 0, *inter = 0;
2378
328k
  hoedown_buffer *attr = 0;
2379
328k
  size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i, len, fence_pre = 0;
2380
328k
  int in_empty = 0, has_inside_empty = 0, in_fence = 0;
2381
328k
  uint8_t ul_item_char = '*';
2382
328k
  hoedown_buffer *ol_numeral = NULL;
2383
2384
  /* keeping track of the first indentation prefix */
2385
335k
  while (orgpre < 3 && orgpre < size && data[orgpre] == ' ')
2386
6.79k
    orgpre++;
2387
2388
328k
  beg = prefix_uli(data, size);
2389
328k
  if (beg) ul_item_char = data[beg - 2];
2390
328k
  if (!beg) {
2391
178k
    beg = prefix_oli(data, size);
2392
178k
    if (beg) {
2393
90.2k
      ol_numeral = hoedown_buffer_new(1024);
2394
      /* -2 to eliminate the trailing ". " */
2395
90.2k
      hoedown_buffer_put(ol_numeral, data, beg - 2);
2396
90.2k
    }
2397
178k
    if (*flags & HOEDOWN_LIST_DEFINITION) {
2398
89.1k
      beg = prefix_dt(data, size);
2399
89.1k
      if (beg) ul_item_char = data[beg - 2];
2400
89.1k
    }
2401
178k
  }
2402
2403
328k
  if (!beg) {
2404
24.6k
    if (ol_numeral) hoedown_buffer_free(ol_numeral);
2405
24.6k
    return 0;
2406
24.6k
  }
2407
2408
  /* skipping to the beginning of the following line */
2409
304k
  end = beg;
2410
16.9M
  while (end < size && data[end - 1] != '\n')
2411
16.6M
    end++;
2412
2413
304k
  if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2414
304k
    fence_pre = is_codefence(data + beg, end - beg, &len, NULL);
2415
304k
    if (fence_pre) {
2416
4.24k
      in_fence = 1;
2417
4.24k
      fence_pre = fence_pre + beg - len;
2418
4.24k
    }
2419
304k
  }
2420
2421
  /* getting working buffers */
2422
304k
  work = newbuf(doc, BUFFER_SPAN);
2423
304k
  inter = newbuf(doc, BUFFER_SPAN);
2424
2425
  /* calculating the indentation */
2426
304k
  i = 0;
2427
453k
  while (i < 4 && beg + i < end && data[beg + i] == ' ')
2428
149k
    i++;
2429
2430
304k
  beg += i;
2431
2432
  /* putting the first line into the working buffer */
2433
304k
  hoedown_buffer_put(work, data + beg, end - beg);
2434
304k
  beg = end;
2435
2436
304k
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2437
2438
  /* process the following lines */
2439
872k
  while (beg < size) {
2440
852k
    size_t has_next_uli = 0, has_next_oli = 0, has_next_dli = 0;
2441
2442
852k
    end++;
2443
2444
55.7M
    while (end < size && data[end - 1] != '\n')
2445
54.8M
      end++;
2446
2447
    /* process an empty line */
2448
852k
    if (is_empty(data + beg, end - beg)) {
2449
219k
      in_empty = 1;
2450
219k
      beg = end;
2451
219k
      continue;
2452
219k
    }
2453
2454
    /* calculating the indentation */
2455
632k
    i = 0;
2456
780k
    while (i < 4 && beg + i < end && data[beg + i] == ' ')
2457
148k
      i++;
2458
2459
632k
    if (in_fence && i > fence_pre) {
2460
3.86k
      i = fence_pre;
2461
3.86k
    }
2462
2463
632k
    pre = i;
2464
2465
632k
    if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) {
2466
632k
      if (is_codefence(data + beg + i, end - beg - i, NULL, NULL))
2467
3.68k
        in_fence = !in_fence;
2468
632k
      if (in_fence && fence_pre == 0) {
2469
19.0k
        fence_pre = pre;
2470
19.0k
      }
2471
632k
    }
2472
2473
    /* Only check for new list items if we are **not** inside
2474
     * a fenced code block */
2475
632k
    if (!in_fence) {
2476
599k
      has_next_uli = prefix_uli(data + beg + i, end - beg - i);
2477
599k
      has_next_oli = prefix_oli(data + beg + i, end - beg - i);
2478
2479
      /* only check for the next definition if it is same indentation or less
2480
       * since embedded definition lists need terms, so finding just a
2481
       * colon by itself does not mean anything */
2482
599k
      if (pre <= orgpre)
2483
533k
        has_next_dli = prefix_dt(data + beg + i, end - beg - i);
2484
599k
    }
2485
2486
    /* checking for a new item */
2487
632k
    if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || 
2488
632k
      has_next_oli || (*flags & HOEDOWN_LI_DD && has_next_dli)) {
2489
260k
      if (in_empty)
2490
160k
        has_inside_empty = 1;
2491
2492
      /* the following item must have the same (or less) indentation */
2493
260k
      if (pre <= orgpre) {
2494
        /* if the following item has different list type, we end this list */
2495
249k
        if (in_empty && (
2496
158k
          ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) ||
2497
158k
          (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))) {
2498
146k
          *flags |= HOEDOWN_LI_END;
2499
146k
          has_inside_empty = 0;
2500
146k
        }
2501
249k
        break;
2502
249k
      }
2503
2504
10.8k
      if (!sublist)
2505
5.18k
        sublist = work->size;
2506
10.8k
    }
2507
    /* joining only indented stuff after empty lines;
2508
     * note that now we only require 1 space of indentation
2509
     * to continue a list */
2510
372k
    else if (in_empty && pre == 0) {
2511
33.3k
      *flags |= HOEDOWN_LI_END;
2512
33.3k
      break;
2513
33.3k
    }
2514
2515
349k
    if (in_empty) {
2516
9.24k
      hoedown_buffer_putc(work, '\n');
2517
9.24k
      has_inside_empty = 1;
2518
9.24k
      in_empty = 0;
2519
9.24k
    }
2520
2521
    /* adding the line without prefix into the working buffer */
2522
349k
    hoedown_buffer_put(work, data + beg + i, end - beg - i);
2523
349k
    beg = end;
2524
349k
  }
2525
2526
  /* render of li contents */
2527
304k
  if (has_inside_empty)
2528
17.0k
    *flags |= HOEDOWN_LI_BLOCK;
2529
2530
304k
  if (*flags & HOEDOWN_LI_BLOCK) {
2531
    /* intermediate render of block li */
2532
67.6k
    pre = 0;
2533
67.6k
    if (sublist && sublist < work->size) {
2534
2.64k
      end = sublist;
2535
64.9k
    } else {
2536
64.9k
      end = work->size;
2537
64.9k
    }
2538
2539
67.6k
    do {
2540
67.6k
      if (!(doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) {
2541
0
        break;
2542
0
      }
2543
2544
67.6k
      i = 0;
2545
9.39M
      while (i < end && work->data[i] != '\n') {
2546
9.32M
        i++;
2547
9.32M
      }
2548
2549
67.6k
      len = parse_attributes(work->data, i, attr, attribute, "list", 0, doc->attr_activation);
2550
67.6k
      if (i == len) {
2551
43.8k
        break;
2552
43.8k
      }
2553
2554
23.7k
      pre = i;
2555
23.7k
      parse_block(inter, doc, work->data, len);
2556
23.7k
    } while (0);
2557
2558
0
    parse_block(inter, doc, work->data + pre, end - pre);
2559
67.6k
    if (end == sublist) {
2560
2.64k
      parse_block(inter, doc, work->data + sublist, work->size - sublist);
2561
2.64k
    }
2562
236k
  } else {
2563
    /* intermediate render of inline li */
2564
236k
    if (sublist && sublist < work->size) {
2565
2.53k
      if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2566
2.53k
        len = parse_attributes(work->data, sublist, attr, attribute, "list", 0, doc->attr_activation);
2567
2.53k
      } else {
2568
0
        len = sublist;
2569
0
      }
2570
2.53k
      parse_inline(inter, doc, work->data, len);
2571
2.53k
      parse_block(inter, doc, work->data + sublist, work->size - sublist);
2572
233k
    } else {
2573
233k
      if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2574
233k
        len = parse_attributes(work->data, work->size, attr, attribute, "list", 0, doc->attr_activation);
2575
233k
      } else {
2576
0
        len = work->size;
2577
0
      }
2578
233k
      parse_inline(inter, doc, work->data, len);
2579
233k
    }
2580
236k
  }
2581
2582
  /* render of li itself */
2583
304k
  if (doc->md.listitem) {
2584
304k
    doc->ul_item_char = ul_item_char;
2585
304k
    doc->ol_numeral = ol_numeral;
2586
304k
    doc->md.listitem(ob, inter, attr, flags, &doc->data);
2587
304k
    doc->ol_numeral = NULL;
2588
304k
    doc->ul_item_char = 0;
2589
304k
  }
2590
2591
304k
  if (ol_numeral) hoedown_buffer_free(ol_numeral);
2592
2593
304k
  popbuf(doc, BUFFER_SPAN);
2594
304k
  popbuf(doc, BUFFER_SPAN);
2595
304k
  popbuf(doc, BUFFER_ATTRIBUTE);
2596
304k
  return beg;
2597
328k
}
2598
2599
/* parse_definition • parsing of a term/definition pair, assuming starting
2600
 * at start of line */
2601
static size_t
2602
parse_definition(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute)
2603
41.5k
{
2604
  /* end represents the position of the first line where definitions start */
2605
41.5k
  size_t j = 0, k = 0, len = 0, end = prefix_dli(doc, data, size);
2606
41.5k
  if (end <= 0) {
2607
10.2k
    return 0;
2608
10.2k
  }
2609
31.2k
  hoedown_buffer *work = 0, *attr_work;
2610
2611
2612
  /* scan all the definition terms, rendering them to the output buffer
2613
   * the +1 is to account for the trailing newline on each term
2614
   * j is a counter keeping track of the beginning of each new term */
2615
31.2k
  *flags |= HOEDOWN_LI_DT;
2616
120k
  while (j + 1 < end) {
2617
    /* find the end of the term (where the newline is) */
2618
6.33M
    for(k = j + 1; k - 1 < end && data[k - 1] != '\n'; k++);
2619
2620
89.7k
    len = k - j;
2621
2622
89.7k
    if (is_empty(data + j, len)) {
2623
1.74k
      j = k;
2624
1.74k
      continue;
2625
1.74k
    }
2626
2627
87.9k
    work = newbuf(doc, BUFFER_BLOCK);
2628
87.9k
    attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
2629
2630
87.9k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2631
87.9k
      len = parse_attributes(data + j, len, attr_work, NULL, "", 1, doc->attr_activation);
2632
87.9k
    }
2633
2634
87.9k
    parse_inline(work, doc, data + j, len);
2635
2636
87.9k
    if (doc->md.listitem) {
2637
87.9k
      doc->md.listitem(ob, work, attr_work, flags, &doc->data);
2638
87.9k
    }
2639
2640
87.9k
    j = k;
2641
2642
87.9k
    popbuf(doc, BUFFER_BLOCK);
2643
87.9k
    popbuf(doc, BUFFER_ATTRIBUTE);
2644
87.9k
  }
2645
31.2k
  *flags &= ~HOEDOWN_LI_DT;
2646
2647
  /* scan all the definitions, rendering it to the output buffer */
2648
31.2k
  *flags |= HOEDOWN_LI_DD;
2649
118k
  while (end < size) {
2650
111k
    j = parse_listitem(ob, doc, data + end, size - end, flags, attribute);
2651
111k
    if (j <= 0) {
2652
23.6k
      break;
2653
23.6k
    }
2654
87.5k
    end += j;
2655
87.5k
  }
2656
2657
31.2k
  *flags &= ~HOEDOWN_LI_DD;
2658
31.2k
  *flags &= ~HOEDOWN_LI_END;
2659
2660
31.2k
  return end;
2661
41.5k
}
2662
2663
/* parse_list • parsing ordered or unordered list block */
2664
static size_t
2665
parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags)
2666
197k
{
2667
197k
  hoedown_buffer *work = 0;
2668
197k
  hoedown_buffer *attr = 0;
2669
197k
  size_t i = 0, j;
2670
2671
197k
  doc->list_depth++;
2672
2673
197k
  work = newbuf(doc, BUFFER_BLOCK);
2674
197k
  attr = newbuf(doc, BUFFER_ATTRIBUTE);
2675
2676
279k
  while (i < size) {
2677
259k
    if (flags & HOEDOWN_LIST_DEFINITION) {
2678
41.5k
      j = parse_definition(work, doc, data + i, size - i, &flags, attr);
2679
217k
    } else {
2680
217k
      j = parse_listitem(work, doc, data + i, size - i, &flags, attr);
2681
217k
    }
2682
259k
    i += j;
2683
2684
259k
    if (!j || (flags & HOEDOWN_LI_END))
2685
177k
      break;
2686
259k
  }
2687
2688
197k
  if (doc->md.list)
2689
197k
    doc->md.list(ob, work, attr, flags, &doc->data);
2690
197k
  popbuf(doc, BUFFER_BLOCK);
2691
197k
  popbuf(doc, BUFFER_ATTRIBUTE);
2692
2693
197k
  doc->list_depth--;
2694
2695
197k
  return i;
2696
197k
}
2697
2698
/* parse_atxheader • parsing of atx-style headers */
2699
static size_t
2700
parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
2701
16.8k
{
2702
16.8k
  size_t level = 0;
2703
16.8k
  size_t i, end, skip;
2704
2705
42.3k
  while (level < size && level < 6 && data[level] == '#')
2706
25.4k
    level++;
2707
2708
599k
  for (i = level; i < size && data[i] == ' '; i++);
2709
2710
6.81M
  for (end = i; end < size && data[end] != '\n'; end++);
2711
16.8k
  skip = end;
2712
2713
18.7k
  while (end && data[end - 1] == '#')
2714
1.92k
    end--;
2715
2716
54.9k
  while (end && data[end - 1] == ' ')
2717
38.0k
    end--;
2718
2719
16.8k
  if (end > i) {
2720
15.2k
    hoedown_buffer *work = newbuf(doc, BUFFER_SPAN);
2721
15.2k
    hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE);
2722
15.2k
    size_t len;
2723
2724
15.2k
    len = end - i;
2725
15.2k
    if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
2726
15.2k
      len = parse_attributes(data + i, end - i, attr, NULL, "", 1, doc->attr_activation);
2727
15.2k
    }
2728
2729
15.2k
    parse_inline(work, doc, data + i, len);
2730
2731
15.2k
    if (doc->md.header) {
2732
15.2k
      doc->header_type = HOEDOWN_HEADER_ATX;
2733
15.2k
      doc->md.header(ob, work, attr, (int)level, &doc->data);
2734
15.2k
      doc->header_type = HOEDOWN_HEADER_NONE;
2735
15.2k
    }
2736
2737
15.2k
    popbuf(doc, BUFFER_SPAN);
2738
15.2k
    popbuf(doc, BUFFER_ATTRIBUTE);
2739
15.2k
  } else {
2740
1.60k
    doc->md.header(ob, NULL, NULL, (int)level, &doc->data);
2741
1.60k
  }
2742
2743
16.8k
  return skip;
2744
16.8k
}
2745
2746
/* parse_footnote_def • parse a single footnote definition */
2747
static void
2748
parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, const hoedown_buffer *name, uint8_t *data, size_t size)
2749
862
{
2750
862
  hoedown_buffer *work = 0;
2751
862
  work = newbuf(doc, BUFFER_SPAN);
2752
862
  doc->footnote_id = name;
2753
2754
862
  parse_block(work, doc, data, size);
2755
2756
862
  if (doc->md.footnote_def)
2757
862
  doc->md.footnote_def(ob, work, num, &doc->data);
2758
2759
862
  doc->footnote_id = NULL;
2760
862
  popbuf(doc, BUFFER_SPAN);
2761
862
}
2762
2763
/* parse_footnote_list • render the contents of the footnotes */
2764
static void
2765
parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes)
2766
8.92k
{
2767
8.92k
  hoedown_buffer *work = 0;
2768
8.92k
  struct footnote_item *item;
2769
8.92k
  struct footnote_ref *ref;
2770
2771
8.92k
  if (footnotes->count == 0)
2772
8.25k
    return;
2773
2774
669
  work = newbuf(doc, BUFFER_BLOCK);
2775
2776
669
  item = footnotes->head;
2777
1.53k
  while (item) {
2778
862
    ref = item->ref;
2779
862
    parse_footnote_def(work, doc, ref->num, ref->name, ref->contents->data, ref->contents->size);
2780
862
    item = item->next;
2781
862
  }
2782
2783
669
  if (doc->md.footnotes)
2784
669
    doc->md.footnotes(ob, work, &doc->data);
2785
669
  popbuf(doc, BUFFER_BLOCK);
2786
669
}
2787
2788
/* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */
2789
/*  returns tag length on match, 0 otherwise */
2790
/*  assumes data starts with "<" */
2791
static size_t
2792
htmlblock_is_end(
2793
  const char *tag,
2794
  size_t tag_len,
2795
  hoedown_document *doc,
2796
  uint8_t *data,
2797
  size_t size)
2798
7.90M
{
2799
7.90M
  size_t i = tag_len + 3, w;
2800
2801
  /* try to match the end tag */
2802
  /* note: we're not considering tags like "</tag >" which are still valid */
2803
7.90M
  if (i > size ||
2804
7.90M
    data[1] != '/' ||
2805
7.90M
    strncasecmp((char *)data + 2, tag, tag_len) != 0 ||
2806
7.90M
    data[tag_len + 2] != '>')
2807
7.80M
    return 0;
2808
2809
  /* rest of the line must be empty */
2810
105k
  if ((w = is_empty(data + i, size - i)) == 0 && i < size)
2811
86.4k
    return 0;
2812
2813
18.7k
  return i + w;
2814
105k
}
2815
2816
/* htmlblock_find_end • try to find HTML block ending tag */
2817
/*  returns the length on match, 0 otherwise */
2818
static size_t
2819
htmlblock_find_end(
2820
  const char *tag,
2821
  size_t tag_len,
2822
  hoedown_document *doc,
2823
  uint8_t *data,
2824
  size_t size)
2825
16.5M
{
2826
16.5M
  size_t i = 0, w;
2827
2828
24.4M
  while (1) {
2829
915M
    while (i < size && data[i] != '<') i++;
2830
24.4M
    if (i >= size) return 0;
2831
2832
7.90M
    w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i);
2833
7.90M
    if (w) return i + w;
2834
7.89M
    i++;
2835
7.89M
  }
2836
16.5M
}
2837
2838
/* htmlblock_find_end_strict • try to find end of HTML block in strict mode */
2839
/*  (it must have a blank line or a new HTML tag afterwards) */
2840
/*  returns the length on match, 0 otherwise */
2841
static size_t
2842
htmlblock_find_end_strict(
2843
  const char *tag,
2844
  size_t tag_len,
2845
  hoedown_document *doc,
2846
  uint8_t *data,
2847
  size_t size)
2848
15.9k
{
2849
15.9k
  size_t i = 0, mark;
2850
2851
16.5M
  while (1) {
2852
16.5M
    mark = i;
2853
523M
    while (i < size && data[i] != '\n') i++;
2854
16.5M
    if (i < size) i++;
2855
16.5M
    if (i == mark) return 0;
2856
2857
16.5M
    mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark);
2858
16.5M
    if (mark == i && (is_empty(data + i, size - i) || (i + 1 < size && data[i] == '<' && data[i + 1] != '/') || i >= size)) break;
2859
16.5M
  }
2860
2861
1.81k
  return i;
2862
15.9k
}
2863
2864
/* parse_htmlblock • parsing of inline HTML block */
2865
static size_t
2866
parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render)
2867
336k
{
2868
336k
  hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL };
2869
336k
  size_t i, j = 0, tag_len, tag_end;
2870
336k
  const char *curtag = NULL;
2871
336k
  int meta = 0;
2872
2873
336k
  work.data = data;
2874
2875
  /* identification of the opening tag */
2876
336k
  if (size < 2 || data[0] != '<')
2877
250
    return 0;
2878
2879
335k
  i = 1;
2880
10.6M
  while (i < size && data[i] != '>' && data[i] != ' ')
2881
10.2M
    i++;
2882
2883
335k
  if (i < size) {
2884
324k
    if (doc->ext_flags & HOEDOWN_EXT_HTML5_BLOCKS)
2885
324k
      curtag = hoedown_find_html5_block_tag((char *)data + 1, (int)i - 1);
2886
0
    else
2887
0
      curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1);
2888
324k
  }  
2889
2890
  /* handling of special cases */
2891
335k
  if (!curtag) {
2892
2893
    /* HTML comment, laxist form */
2894
319k
    if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') {
2895
24.1k
      i = 5;
2896
2897
24.1k
      if (data[4] == '*') {
2898
953
        meta++;
2899
953
      }
2900
2901
210M
      while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>'))
2902
210M
        i++;
2903
2904
24.1k
      if (data[i - 3] == '*') {
2905
255
        meta++;
2906
255
      }
2907
2908
24.1k
      i++;
2909
2910
24.1k
      if (i < size)
2911
19.1k
        j = is_empty(data + i, size - i);
2912
2913
24.1k
      if (j) {
2914
1.37k
        work.size = i + j;
2915
2916
1.37k
        if (do_render && doc->ext_flags & HOEDOWN_EXT_META_BLOCK &&
2917
1.37k
          meta == 2 && doc->meta) {
2918
0
          size_t org, sz;
2919
2920
0
          sz = work.size - 5;
2921
0
          while (sz > 0 && work.data[sz - 1] == '\n') {
2922
0
            sz--;
2923
0
          }
2924
2925
0
          org = 5;
2926
0
          while (org < sz && work.data[org] == '\n') {
2927
0
            org++;
2928
0
          }
2929
2930
0
          if (org < sz) {
2931
0
            hoedown_buffer_put(doc->meta, work.data + org, sz - org);
2932
0
            hoedown_buffer_putc(doc->meta, '\n');
2933
0
          }
2934
1.37k
        } else if (do_render && doc->md.blockhtml) {
2935
0
          doc->md.blockhtml(ob, &work, &doc->data);
2936
0
        }
2937
1.37k
        return work.size;
2938
1.37k
      }
2939
24.1k
    }
2940
2941
    /* HR, which is the only self-closing block tag considered */
2942
318k
    if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) {
2943
5.36k
      i = 3;
2944
22.0M
      while (i < size && data[i] != '>')
2945
22.0M
        i++;
2946
2947
5.36k
      if (i + 1 < size) {
2948
4.74k
        i++;
2949
4.74k
        j = is_empty(data + i, size - i);
2950
4.74k
        if (j) {
2951
476
          work.size = i + j;
2952
476
          if (do_render && doc->md.blockhtml)
2953
0
            doc->md.blockhtml(ob, &work, &doc->data);
2954
476
          return work.size;
2955
476
        }
2956
4.74k
      }
2957
5.36k
    }
2958
2959
    /* Extension script tags */
2960
317k
    if (doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS) {
2961
317k
      i = script_tag_length(data, size);
2962
317k
      if (i) {
2963
7.60k
        if (i < size) {
2964
3.01k
          j = is_empty(data + i, size - i);
2965
3.01k
        }
2966
2967
7.60k
        if (j) {
2968
1.50k
          work.size = i + j;
2969
1.50k
          if (do_render && doc->md.blockhtml) {
2970
0
            doc->md.blockhtml(ob, &work, &doc->data);
2971
0
          }
2972
1.50k
          return work.size;
2973
1.50k
        }
2974
7.60k
      }
2975
2976
317k
    }
2977
2978
    /* no special case recognised */
2979
316k
    return 0;
2980
317k
  }
2981
2982
  /* looking for a matching closing tag in strict mode */
2983
15.9k
  tag_len = strlen(curtag);
2984
15.9k
  tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size);
2985
2986
  /* if not found, trying a second pass looking for indented match */
2987
  /* but not if tag is "ins" or "del" (following original Markdown.pl) */
2988
15.9k
  if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0)
2989
12.9k
    tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size);
2990
2991
15.9k
  if (!tag_end)
2992
11.6k
    return 0;
2993
2994
  /* the end of the block has been found */
2995
4.29k
  work.size = tag_end;
2996
4.29k
  if (do_render && doc->md.blockhtml)
2997
0
    doc->md.blockhtml(ob, &work, &doc->data);
2998
2999
4.29k
  return tag_end;
3000
15.9k
}
3001
3002
/* Common function to parse table main rows and continued rows. */
3003
static size_t
3004
parse_table_cell_line(
3005
    hoedown_buffer *ob,
3006
    uint8_t *data,
3007
    size_t size,
3008
    size_t offset,
3009
    char separator,
3010
64.1k
    int is_continuation) {
3011
64.1k
  size_t pos, line_end, cell_start, cell_end, len, copy_start, copy_end;
3012
3013
64.1k
  pos = offset;
3014
3015
138k
  while (pos < size && _isspace(data[pos])) pos++;
3016
3017
64.1k
  cell_start = pos;
3018
3019
64.1k
  line_end = pos;
3020
15.0M
  while (line_end < size && data[line_end] != '\n') line_end++;
3021
64.1k
  len = find_separator_char(data + pos, line_end - pos, separator);
3022
3023
  /* Two possibilities for len == 0:
3024
     1) No more separator char found in the current line.
3025
     2) The next separator is right after the current one, i.e. empty cell.
3026
     For case 1, we skip to the end of line; for case 2 we just continue.
3027
  */
3028
64.1k
  if (len == 0 && pos < size && data[pos] != separator) {
3029
3.10M
    while (pos + len < size && data[pos + len] != '\n') len++;
3030
24.4k
  }
3031
64.1k
  pos += len;
3032
3033
64.1k
  cell_end = pos - 1;
3034
3035
514k
  while (cell_end > cell_start && _isspace(data[cell_end]))
3036
450k
    cell_end--;
3037
3038
  /* If this isn't the first line of the cell, add a new line before the
3039
     extra cell contents, to separate them (and make backslash linebreaks
3040
     work).
3041
  */
3042
64.1k
  if (is_continuation) hoedown_buffer_putc(ob, '\n');
3043
3044
  /* Remove escaping from pipes */
3045
64.1k
  copy_start = copy_end = cell_start;
3046
12.2M
  while (copy_end < cell_end + 1) {
3047
12.2M
    if (data[copy_end] == separator && copy_end > copy_start && data[copy_end - 1] == '\\') {
3048
2.81k
      hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start - 1);
3049
2.81k
      copy_start = copy_end;
3050
2.81k
    }
3051
12.2M
    copy_end++;
3052
12.2M
  }
3053
64.1k
  hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start);
3054
3055
64.1k
  return pos - offset;
3056
64.1k
}
3057
3058
static void
3059
parse_table_row(
3060
  hoedown_buffer *ob,
3061
  hoedown_document *doc,
3062
  uint8_t *data,
3063
  size_t size,
3064
  size_t columns,
3065
  size_t rows,
3066
  hoedown_table_flags *col_data,
3067
  hoedown_table_flags header_flag)
3068
394k
{
3069
394k
  size_t i = 0, col;
3070
394k
  hoedown_buffer *row_work = 0;
3071
3072
394k
  if (!doc->md.table_cell || !doc->md.table_row)
3073
370k
    return;
3074
3075
23.9k
  row_work = newbuf(doc, BUFFER_SPAN);
3076
3077
  /* skip optional first pipe */
3078
23.9k
  if (i < size && data[i] == '|')
3079
11.8k
    i++;
3080
3081
57.4k
  for (col = 0; col < columns && i < size; ++col) {
3082
33.4k
    size_t pos, extra_rows_in_cell;
3083
33.4k
    hoedown_buffer *cell_content;
3084
33.4k
    hoedown_buffer *cell_work;
3085
3086
    /* cell_content is the text that is inline parsed into cell_work. It
3087
       consists of the values of this cell from each row, concatenated and
3088
       separated by new lines.
3089
    */
3090
33.4k
    cell_content = newbuf(doc, BUFFER_SPAN);
3091
33.4k
    cell_work = newbuf(doc, BUFFER_SPAN);
3092
3093
33.4k
    i += parse_table_cell_line(cell_content, data, size, i, '|', 0 /* is_contination */);
3094
3095
    /* Add extra rows of the cell. This only occurs if rows is greater than 0,
3096
       which only happens when multiline tables are enabled.
3097
3098
       Each extra row is a colon, followed by cell contents for the continued
3099
       row, separated by colons.
3100
    */
3101
33.4k
    extra_rows_in_cell = rows - 1;
3102
33.4k
    pos = i;
3103
64.1k
    while (extra_rows_in_cell > 0 && pos < size) {
3104
30.7k
      size_t c;
3105
3106
      /* seek to the end of the current row */
3107
2.50M
      while (pos < size && data[pos] != '\n') {
3108
2.47M
        pos++;
3109
2.47M
      }
3110
3111
      /* skip new line and leading colon */
3112
30.7k
      if (pos < size) pos++;
3113
30.7k
      if (pos < size) pos++;
3114
3115
      /* Seek to the beginning of the correct column on the continuation line.
3116
       * The continuation line should have the expected number of columns, and
3117
       * so we never expect pos >= size or data[pos] == '\n'. These checks serve
3118
       * as defense in depth against wrong preconditions. */
3119
57.0k
      for (c = 0; c < col; c++) {
3120
4.63M
        while (pos < size && data[pos] != '\n' && (is_backslashed(data, pos) || data[pos] != ':'))
3121
4.60M
          pos++;
3122
26.3k
        if (pos < size && data[pos] == ':') pos++;  /* skip colon */
3123
26.3k
      }
3124
3125
30.7k
      parse_table_cell_line(cell_content, data, size, pos, ':', 1 /* is_contination */);
3126
3127
30.7k
      extra_rows_in_cell--;
3128
30.7k
    }
3129
3130
33.4k
    parse_inline(cell_work, doc, cell_content->data, cell_content->size);
3131
3132
33.4k
    doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data);
3133
3134
33.4k
    popbuf(doc, BUFFER_SPAN);
3135
33.4k
    popbuf(doc, BUFFER_SPAN);
3136
33.4k
    i++;
3137
33.4k
  }
3138
3139
27.2k
  for (; col < columns; ++col) {
3140
3.27k
    hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL };
3141
3.27k
    doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data);
3142
3.27k
  }
3143
3144
23.9k
  doc->md.table_row(ob, row_work, &doc->data);
3145
3146
23.9k
  popbuf(doc, BUFFER_SPAN);
3147
23.9k
}
3148
3149
static size_t
3150
parse_table_header(
3151
  hoedown_buffer *ob,
3152
  hoedown_buffer *attr,
3153
  hoedown_document *doc,
3154
  uint8_t *data,
3155
  size_t size,
3156
  size_t *columns,
3157
  hoedown_table_flags **column_data)
3158
5.57M
{
3159
5.57M
  int pipes, rows;
3160
5.57M
  size_t i = 0, col, header_end, under_end;
3161
5.57M
  hoedown_buffer *header_contents = 0;
3162
3163
5.57M
  pipes = 0;
3164
267M
  while (i < size && data[i] != '\n') {
3165
261M
    if (!is_backslashed(data, i) && data[i] == '|') {
3166
9.76M
      pipes++;
3167
9.76M
    }
3168
261M
    i++;
3169
261M
  }
3170
3171
5.57M
  if (i == size || pipes == 0)
3172
5.25M
    return 0;
3173
3174
314k
  header_end = i;
3175
3176
714k
  while (header_end > 0 && _isspace(data[header_end - 1]))
3177
400k
    header_end--;
3178
3179
314k
  if (data[0] == '|')
3180
115k
    pipes--;
3181
3182
314k
  if (header_end && data[header_end - 1] == '|' && !is_backslashed(data, header_end - 1))
3183
97.5k
    pipes--;
3184
3185
314k
  if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) {
3186
314k
    size_t n = parse_attributes(data, header_end, attr, NULL, "", 1, doc->attr_activation);
3187
    /* n == header_end when no attribute is found */
3188
314k
    if (n != header_end) {
3189
40.1k
      while (n > 0 && _isspace(data[n - 1]))
3190
0
        n--;
3191
40.1k
      if (attr->size && n && data[n - 1] == '|' && !is_backslashed(data, n - 1))
3192
1.96k
        pipes--;
3193
3194
40.1k
      header_end = n + 1;
3195
40.1k
    }
3196
314k
  }
3197
3198
314k
  if (pipes < 0)
3199
29.2k
    return 0;
3200
3201
  /* header_contents will have the lines of the header copied into it, and then
3202
     is passed to parse_table_row. We need a separate buffer to avoid passing
3203
     the attribute to parse_table_row.
3204
  */
3205
285k
  header_contents = newbuf(doc, BUFFER_SPAN);
3206
285k
  hoedown_buffer_put(header_contents, data, header_end);
3207
3208
285k
  *columns = pipes + 1;
3209
285k
  *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags));
3210
3211
  /* If the multiline table extension is enabled, check the next lines for
3212
     continuation markers, to find the number of text rows that make up this
3213
     logical row, and copy the contents of each row to header_contents,
3214
     separated by new lines.
3215
  */
3216
285k
  rows = 1;
3217
285k
  if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3218
295k
    while (i < size) {
3219
295k
      size_t j = i + 1;
3220
295k
      int colons = 0;
3221
3222
      /* Require that the continuation line starts with a colon */
3223
295k
      if (j >= size || data[j] != ':') break;
3224
      /* Skip the leading colon to match the pipe counting behavior above */
3225
59.8k
      j++;
3226
3227
      /* Require that the continuation line start with ": ", to
3228
         distinguish from ":-" which could start a left-aligned header
3229
         bar.
3230
      */
3231
59.8k
      if (j >= size || data[j] != ' ') break;
3232
3233
7.39M
      while (j < size && data[j] != '\n') {
3234
7.37M
        j++;
3235
7.37M
        if (j < size && !is_backslashed(data, j) && data[j] == ':')
3236
2.74M
          colons++;
3237
7.37M
      }
3238
3239
      /* Allow a trailing colon to match the pipe counting behavior above */
3240
14.1k
      if (!is_backslashed(data, j - 1) && data[j - 1] == ':')
3241
1.06k
        colons--;
3242
3243
14.1k
      if (colons != pipes) break;
3244
3245
10.5k
      hoedown_buffer_putc(header_contents, '\n');
3246
      /* data[i] is the previous new line, and data[j] is the next new
3247
         line. This copies all the text between the new lines.
3248
       */
3249
10.5k
      hoedown_buffer_put(header_contents, data + i + 1, j - i - 1);
3250
3251
10.5k
      rows++;
3252
10.5k
      i = j;
3253
10.5k
      header_end = j;
3254
10.5k
    }
3255
285k
  }
3256
3257
  /* Parse the header underline */
3258
285k
  i++;
3259
285k
  if (i < size && data[i] == '|')
3260
23.1k
    i++;
3261
3262
285k
  under_end = i;
3263
40.4M
  while (under_end < size && data[under_end] != '\n')
3264
40.2M
    under_end++;
3265
3266
337k
  for (col = 0; col < *columns && i < under_end; ++col) {
3267
286k
    size_t dashes = 0;
3268
3269
593k
    while (i < under_end && data[i] == ' ')
3270
306k
      i++;
3271
3272
286k
    if (i < under_end && data[i] == ':') {
3273
64.6k
      i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT;
3274
64.6k
      dashes++;
3275
64.6k
    }
3276
3277
501k
    while (i < under_end && data[i] == '-') {
3278
214k
      i++; dashes++;
3279
214k
    }
3280
3281
286k
    if (i < under_end && data[i] == ':') {
3282
40.3k
      i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT;
3283
40.3k
      dashes++;
3284
40.3k
    }
3285
3286
578k
    while (i < under_end && data[i] == ' ')
3287
292k
      i++;
3288
3289
286k
    if (i < under_end && data[i] != '|' && data[i] != '+')
3290
153k
      break;
3291
3292
133k
    if (dashes < 3)
3293
80.9k
      break;
3294
3295
52.4k
    i++;
3296
52.4k
  }
3297
3298
285k
  if (col < *columns) {
3299
    /* clean up header_contents */
3300
254k
    popbuf(doc, BUFFER_SPAN);
3301
254k
    return 0;
3302
254k
  }
3303
3304
30.8k
  parse_table_row(
3305
30.8k
    ob, doc, header_contents->data,
3306
30.8k
    header_contents->size,
3307
30.8k
    *columns,
3308
30.8k
    rows,
3309
30.8k
    *column_data,
3310
30.8k
    HOEDOWN_TABLE_HEADER
3311
30.8k
  );
3312
3313
  /* clean up header_contents */
3314
30.8k
  popbuf(doc, BUFFER_SPAN);
3315
3316
30.8k
  return under_end + 1;
3317
285k
}
3318
3319
static size_t
3320
parse_table(
3321
  hoedown_buffer *ob,
3322
  hoedown_document *doc,
3323
  uint8_t *data,
3324
  size_t size)
3325
5.57M
{
3326
5.57M
  size_t i;
3327
3328
5.57M
  hoedown_buffer *work = 0;
3329
5.57M
  hoedown_buffer *header_work = 0;
3330
5.57M
  hoedown_buffer *body_work = 0;
3331
5.57M
  hoedown_buffer *attr_work = 0;
3332
3333
5.57M
  size_t columns;
3334
5.57M
  hoedown_table_flags *col_data = NULL;
3335
3336
5.57M
  work = newbuf(doc, BUFFER_BLOCK);
3337
5.57M
  header_work = newbuf(doc, BUFFER_SPAN);
3338
5.57M
  body_work = newbuf(doc, BUFFER_BLOCK);
3339
5.57M
  attr_work = newbuf(doc, BUFFER_ATTRIBUTE);
3340
5.57M
  i = parse_table_header(header_work, attr_work, doc, data, size, &columns, &col_data);
3341
5.57M
  if (i > 0) {
3342
3343
394k
    while (i < size) {
3344
385k
      size_t row_start;
3345
385k
      size_t pipes = 0;
3346
385k
      size_t rows = 1;
3347
3348
385k
      row_start = i;
3349
3350
113M
      while (i < size && data[i] != '\n') {
3351
112M
        if (data[i] == '|' && !is_backslashed(data, i)) pipes++;
3352
112M
        i++;
3353
112M
      }
3354
3355
385k
      if (pipes == 0 || i == size) {
3356
22.3k
        i = row_start;
3357
22.3k
        break;
3358
22.3k
      }
3359
3360
      /* Don't count a leading pipe. */
3361
363k
      if (data[row_start] == '|')
3362
330k
        pipes--;
3363
3364
      /* Don't count a trailing pipe. */
3365
363k
      if (data[i - 1] == '|' && !is_backslashed(data, i - 1))
3366
104k
        pipes--;
3367
3368
      /* If the multiline table extension is enabled, check the next
3369
         lines for continuation markers, to find the number of text rows
3370
         that make up this logical row.
3371
      */
3372
363k
      if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3373
391k
        while (i < size) {
3374
391k
          size_t j = i + 1;
3375
391k
          size_t colons = 0;
3376
3377
          /* Require that a continued row starts with a colon. */
3378
391k
          if (j >= size || data[j] != ':') break;
3379
3380
          /* Don't count leading colon for comparison to pipes. */
3381
36.8k
          j++;
3382
3383
27.5M
          while (j < size && data[j] != '\n') {
3384
27.5M
            if (!is_backslashed(data, j) && data[j] == ':')
3385
3.45M
              colons++;
3386
27.5M
            j++;
3387
27.5M
          }
3388
3389
          /* Don't count a trailing colon for comparison to pipes. */
3390
36.8k
          if (!is_backslashed(data, j - 1) && data[j - 1] == ':')
3391
3.56k
            colons--;
3392
3393
          /* Hoedown allows table rows where the number of cells is different
3394
           * from `columns`. In this case, `parse_table_row` will add empty
3395
           * cells. However, the code does not work in the multi-line case, so
3396
           * we require the right number of columns. */
3397
36.8k
          if (colons != pipes || colons + 1 != columns) break;
3398
3399
28.2k
          rows++;
3400
28.2k
          i = j;
3401
28.2k
        }
3402
363k
      }
3403
3404
363k
      parse_table_row(
3405
363k
        body_work,
3406
363k
        doc,
3407
363k
        data + row_start,
3408
363k
        i - row_start,
3409
363k
        columns,
3410
363k
        rows,
3411
363k
        col_data, 0
3412
363k
      );
3413
3414
363k
      i++;
3415
3416
      /* Skip an optional row separator, if it's there. */
3417
363k
      if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) {
3418
        /* Use j instead of i, and set i to j only if this is actually a row separator. */
3419
363k
        size_t j = i, next_line_end = i, col;
3420
3421
        /* Seek next_line_end to the position of the terminating new line. */
3422
107M
        while (next_line_end < size && data[next_line_end] != '\n')
3423
106M
          next_line_end++;
3424
3425
        /* Skip leading pipe, if any. */
3426
363k
        if (j < next_line_end && data[j] == '|')
3427
233k
          j++;
3428
3429
        /* Ensure that there are at least columns pipe/plus separated
3430
           runs of dashes, each at least 3 long. The pipes may be
3431
           padded with spaces, and the line may end in a pipe.
3432
        */
3433
452k
        for (col = 0; col < columns && j < next_line_end; col++) {
3434
352k
          size_t dashes = 0;
3435
3436
2.13M
          while (j < next_line_end && data[j] == ' ')
3437
1.78M
            j++;
3438
3439
650k
          while (j < next_line_end && data[j] == '-') {
3440
298k
            j++;
3441
298k
            dashes++;
3442
298k
          }
3443
3444
720k
          while (j < next_line_end && data[j] == ' ')
3445
368k
            j++;
3446
3447
352k
          if (j < next_line_end && data[j] != '|' && data[j] != '+')
3448
145k
            break;
3449
3450
206k
          if (dashes < 3)
3451
117k
            break;
3452
3453
89.0k
          j++;
3454
89.0k
        }
3455
3456
        /* Skip i past the row separator, if it was valid. */
3457
363k
        if (col == columns)
3458
88.6k
          i = next_line_end + 1;
3459
363k
      }
3460
363k
    }
3461
3462
30.8k
    if (doc->md.table_header)
3463
9.12k
      doc->md.table_header(work, header_work, &doc->data);
3464
3465
30.8k
    if (doc->md.table_body)
3466
9.12k
      doc->md.table_body(work, body_work, &doc->data);
3467
3468
30.8k
    if (doc->md.table)
3469
9.12k
      doc->md.table(ob, work, attr_work, &doc->data);
3470
30.8k
  }
3471
3472
5.57M
  free(col_data);
3473
5.57M
  popbuf(doc, BUFFER_SPAN);
3474
5.57M
  popbuf(doc, BUFFER_BLOCK);
3475
5.57M
  popbuf(doc, BUFFER_BLOCK);
3476
5.57M
  popbuf(doc, BUFFER_ATTRIBUTE);
3477
5.57M
  return i;
3478
5.57M
}
3479
3480
/* parse_userblock • parsing of user block */
3481
static size_t
3482
parse_userblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
3483
0
{
3484
0
  hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL };
3485
0
  size_t len = doc->user_block(data, size, &doc->data);
3486
3487
0
  if (!len) {
3488
0
    return 0;
3489
0
  }
3490
3491
0
  work.data = data;
3492
0
  work.size = len;
3493
3494
0
  if (doc->md.user_block) {
3495
0
    doc->md.user_block(ob, &work, &doc->data);
3496
0
  } else {
3497
0
    hoedown_buffer_put(ob, work.data, work.size);
3498
0
  }
3499
0
  return len;
3500
0
}
3501
3502
/* is_paragraph • returns if the next block is a paragraph (doesn't follow any
3503
 * other special rules for other types of blocks) */
3504
static int
3505
is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end)
3506
5.09M
{
3507
  /* temporary buffer for results of checking special blocks */
3508
5.09M
  hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK);
3509
  /* temporary renderer that has no rendering function */
3510
5.09M
  hoedown_renderer temp_renderer;
3511
  /* ensure all callbacks are NULL */
3512
5.09M
  memset(&temp_renderer, 0, sizeof(hoedown_renderer));
3513
  /* store the old renderer */
3514
5.09M
  hoedown_renderer old_renderer;
3515
5.09M
  memcpy(&old_renderer, &doc->md, sizeof(hoedown_renderer));
3516
  /* copy the new renderer over to the document */
3517
5.09M
  memcpy(&doc->md, &temp_renderer, sizeof(hoedown_renderer));
3518
  /* these are all the if branches inside parse_block, wrapped into one bool,
3519
   * with minimal parsing, and completely idempotent */
3520
5.09M
  int result = !(is_atxheader(doc, txt_data, end) ||
3521
5.09M
          (doc->user_block && parse_userblock(tmp, doc, txt_data, end)) ||
3522
5.09M
          (txt_data[0] == '<' &&
3523
5.07M
            parse_htmlblock(tmp, doc, txt_data, end, 0)) ||
3524
5.09M
          is_hrule(txt_data, end) ||
3525
5.09M
          ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) &&
3526
5.06M
            parse_fencedcode(tmp, doc, txt_data, end, doc->ext_flags)) ||
3527
5.09M
          ((doc->ext_flags & HOEDOWN_EXT_TABLES) &&
3528
5.05M
            parse_table(tmp, doc, txt_data, end)) ||
3529
5.09M
          prefix_quote(txt_data, end) ||
3530
5.09M
          (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) &&
3531
5.02M
            prefix_code(txt_data, end)) ||
3532
5.09M
          prefix_uli(txt_data, end) ||
3533
5.09M
          prefix_oli(txt_data, end) ||
3534
5.09M
          ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) &&
3535
4.97M
            prefix_dli(doc, txt_data, end)));
3536
5.09M
  popbuf(doc, BUFFER_BLOCK);
3537
5.09M
  memcpy(&doc->md, &old_renderer, sizeof(hoedown_renderer));
3538
5.09M
  return result;
3539
5.09M
}
3540
3541
/* parse_block • parsing of one block, returning next uint8_t to parse */
3542
static void
3543
parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size)
3544
133k
{
3545
133k
  size_t beg, end, i;
3546
133k
  uint8_t *txt_data;
3547
133k
  beg = 0;
3548
3549
133k
  if (doc->work_bufs[BUFFER_SPAN].size +
3550
133k
    doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting)
3551
333
    return;
3552
3553
814k
  while (beg < size) {
3554
680k
    txt_data = data + beg;
3555
680k
    end = size - beg;
3556
3557
680k
    if (is_atxheader(doc, txt_data, end))
3558
16.8k
      beg += parse_atxheader(ob, doc, txt_data, end);
3559
3560
663k
    else if (doc->user_block &&
3561
663k
        (i = parse_userblock(ob, doc, txt_data, end)) != 0)
3562
0
      beg += i;
3563
3564
663k
    else if (data[beg] == '<' && doc->md.blockhtml &&
3565
663k
        (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0)
3566
0
      beg += i;
3567
3568
663k
    else if ((i = is_empty(txt_data, end)) != 0)
3569
134k
      beg += i;
3570
3571
528k
    else if (is_hrule(txt_data, end)) {
3572
85.3k
      while (beg < size && data[beg] != '\n')
3573
79.6k
        beg++;
3574
3575
5.66k
      if (doc->md.hrule) {
3576
5.66k
        doc->hrule_char = data[beg - 1];
3577
5.66k
        doc->md.hrule(ob, &doc->data);
3578
5.66k
        doc->hrule_char = 0;
3579
5.66k
      }
3580
3581
5.66k
      beg++;
3582
5.66k
    }
3583
3584
523k
    else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 &&
3585
523k
      (i = parse_fencedcode(ob, doc, txt_data, end, doc->ext_flags)) != 0)
3586
6.61k
      beg += i;
3587
3588
516k
    else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 &&
3589
516k
      (i = parse_table(ob, doc, txt_data, end)) != 0)
3590
9.12k
      beg += i;
3591
3592
507k
    else if (prefix_quote(txt_data, end))
3593
27.5k
      beg += parse_blockquote(ob, doc, txt_data, end);
3594
3595
479k
    else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end))
3596
0
      beg += parse_blockcode(ob, doc, txt_data, end);
3597
3598
479k
    else if (prefix_uli(txt_data, end))
3599
95.6k
      beg += parse_list(ob, doc, txt_data, end, 0);
3600
3601
384k
    else if (prefix_oli(txt_data, end))
3602
84.4k
      beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED);
3603
3604
299k
    else if ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && prefix_dli(doc, txt_data, end))
3605
17.8k
      beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_DEFINITION);
3606
3607
282k
    else
3608
282k
      beg += parse_paragraph(ob, doc, txt_data, end);
3609
680k
  }
3610
133k
}
3611
3612
3613
3614
/*********************
3615
 * REFERENCE PARSING *
3616
 *********************/
3617
3618
/* is_footnote • returns whether a line is a footnote definition or not */
3619
static int
3620
is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list)
3621
1.56M
{
3622
1.56M
  size_t i = 0;
3623
1.56M
  hoedown_buffer *contents = NULL;
3624
1.56M
  hoedown_buffer *name = NULL;
3625
1.56M
  size_t ind = 0;
3626
1.56M
  int in_empty = 0;
3627
1.56M
  size_t start = 0;
3628
3629
1.56M
  size_t id_offset, id_end;
3630
1.56M
  size_t id_indent = 0, content_line = 0, content_indent = 0;
3631
3632
  /* up to 3 optional leading spaces */
3633
1.56M
  if (beg + 3 >= end) return 0;
3634
1.56M
  if (data[beg] == ' ') { i = 1;
3635
131k
  if (data[beg + 1] == ' ') { i = 2;
3636
23.4k
  if (data[beg + 2] == ' ') { i = 3;
3637
14.7k
  if (data[beg + 3] == ' ') return 0; } } }
3638
1.55M
  i += beg;
3639
3640
  /* id part: caret followed by anything between brackets */
3641
1.55M
  if (data[i] != '[') return 0;
3642
347k
  i++;
3643
347k
  if (i >= end || data[i] != '^') return 0;
3644
205k
  i++;
3645
205k
  id_offset = i;
3646
714k
  while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3647
508k
    i++;
3648
205k
  if (i >= end || data[i] != ']') return 0;
3649
199k
  id_end = i;
3650
3651
  /* spacer: colon (space | tab)* newline? (space | tab)* */
3652
199k
  i++;
3653
199k
  if (i >= end || data[i] != ':') return 0;
3654
192k
  i++;
3655
192k
  if (i >= end) return 0;
3656
3657
  /* getting content and name buffers */
3658
192k
  contents = hoedown_buffer_new(64);
3659
192k
  name = hoedown_buffer_new(64);
3660
3661
192k
  start = i;
3662
3663
  /* getting item indent size */
3664
754k
  while (id_indent != start && data[start - id_indent] != '\n' && data[start - id_indent] != '\r') {
3665
561k
    id_indent++;
3666
561k
  }
3667
3668
  /* process lines similar to a list item */
3669
472k
  while (i < end) {
3670
9.69M
    while (i < end && data[i] != '\n' && data[i] != '\r') i++;
3671
3672
    /* process an empty line */
3673
471k
    if (is_empty(data + start, i - start)) {
3674
148k
      in_empty = 1;
3675
148k
      if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3676
148k
        i++;
3677
148k
        if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3678
148k
      }
3679
148k
      start = i;
3680
148k
      continue;
3681
148k
    }
3682
3683
    /* calculating the indentation */
3684
323k
    ind = 0;
3685
543k
    while (ind < 4 && start + ind < end && data[start + ind] == ' ')
3686
220k
      ind++;
3687
3688
323k
    content_line++;
3689
3690
    /* joining only indented stuff after empty lines;
3691
     * note that now we only require 1 space of indentation
3692
     * to continue, just like lists */
3693
323k
    if (ind == 0) {
3694
230k
      if (start == id_end + 2 && data[start] == '\t') {}
3695
192k
      else break;
3696
230k
    }
3697
93.4k
    else if (in_empty) {
3698
2.91k
      hoedown_buffer_putc(contents, '\n');
3699
2.91k
    }
3700
3701
130k
    in_empty = 0;
3702
3703
    /* re-calculating the indentation */
3704
130k
    if (content_line == 2 && data[start + ind] == ' ') {
3705
53.8k
      while (ind < id_indent && data[start + ind] == ' ') {
3706
27.0k
        ind++;
3707
27.0k
      }
3708
26.7k
      content_indent = ind;
3709
26.7k
    }
3710
130k
    if (content_indent > ind) {
3711
28.5k
      while (ind < content_indent && data[start + ind] == ' ') {
3712
9.28k
        ind++;
3713
9.28k
      }
3714
19.2k
    }
3715
3716
    /* adding the line into the content buffer */
3717
130k
    hoedown_buffer_put(contents, data + start + ind, i - start - ind);
3718
    /* add carriage return */
3719
130k
    if (i < end) {
3720
130k
      hoedown_buffer_putc(contents, '\n');
3721
130k
      if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3722
130k
        i++;
3723
130k
        if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++;
3724
130k
      }
3725
130k
    }
3726
130k
    start = i;
3727
130k
  }
3728
3729
192k
  if (last)
3730
192k
    *last = start;
3731
3732
192k
  if (list) {
3733
192k
    struct footnote_ref *ref;
3734
192k
    ref = create_footnote_ref(list, data + id_offset, id_end - id_offset);
3735
192k
    if (!ref)
3736
0
      return 0;
3737
192k
    if (!add_footnote_ref(list, ref)) {
3738
0
      free_footnote_ref(ref);
3739
0
      return 0;
3740
0
    }
3741
192k
    ref->contents = contents;
3742
192k
    hoedown_buffer_put(name, data + id_offset, id_end - id_offset);
3743
192k
    ref->name = name;
3744
192k
  }
3745
3746
192k
  return 1;
3747
192k
}
3748
3749
/* is_html_comment • returns whether a html comment or not */
3750
static int
3751
is_html_comment(const uint8_t *data, size_t beg, size_t end, size_t *last)
3752
1.37M
{
3753
1.37M
  size_t i = 0;
3754
3755
1.37M
  if (beg + 5 >= end) return 0;
3756
1.37M
  if (!(data[beg] == '<'  && data[beg + 1] == '!' && data[beg + 2] == '-' && data[beg + 3] == '-')) return 0;
3757
3758
28.1k
  i = 5;
3759
41.8M
  while (beg + i < end && !(data[beg + i - 2] == '-' && data[beg + i - 1] == '-' && data[beg + i] == '>')) i++;
3760
  /* i can only ever be beyond the end if the ending --> is not found */
3761
28.1k
  if (beg + i >= end) return 0;
3762
26.2k
  i++;
3763
3764
26.2k
  if (beg + i < end && (data[beg + i] == '\n' || data[beg + i] == '\r')) {
3765
21.5k
    i++;
3766
21.5k
    if (beg + i < end && data[beg + i] == '\r' && data[beg + i - 1] == '\n') i++;
3767
21.5k
  }
3768
3769
26.2k
  if (last)
3770
26.2k
    *last = beg + i;
3771
3772
26.2k
  return 1;
3773
28.1k
}
3774
3775
/* is_ref • returns whether a line is a reference or not */
3776
static int
3777
is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs)
3778
1.35M
{
3779
/*  int n; */
3780
1.35M
  size_t i = 0;
3781
1.35M
  size_t id_offset, id_end;
3782
1.35M
  size_t link_offset, link_end;
3783
1.35M
  size_t title_offset, title_end;
3784
1.35M
  size_t line_end;
3785
1.35M
  size_t attr_offset = 0, attr_end = 0;
3786
3787
  /* up to 3 optional leading spaces */
3788
1.35M
  if (beg + 3 >= end) return 0;
3789
1.34M
  if (data[beg] == ' ') { i = 1;
3790
130k
  if (data[beg + 1] == ' ') { i = 2;
3791
23.4k
  if (data[beg + 2] == ' ') { i = 3;
3792
14.7k
  if (data[beg + 3] == ' ') return 0; } } }
3793
1.33M
  i += beg;
3794
3795
  /* id part: anything but a newline between brackets */
3796
1.33M
  if (data[i] != '[') return 0;
3797
154k
  i++;
3798
154k
  id_offset = i;
3799
2.63M
  while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']')
3800
2.47M
    i++;
3801
154k
  if (i >= end || data[i] != ']') return 0;
3802
138k
  id_end = i;
3803
3804
  /* spacer: colon (space | tab)* newline? (space | tab)* */
3805
138k
  i++;
3806
138k
  if (i >= end || data[i] != ':') return 0;
3807
119k
  i++;
3808
121k
  while (i < end && data[i] == ' ') i++;
3809
119k
  if (i < end && (data[i] == '\n' || data[i] == '\r')) {
3810
16.3k
    i++;
3811
16.3k
    if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; }
3812
120k
  while (i < end && data[i] == ' ') i++;
3813
119k
  if (i >= end) return 0;
3814
3815
  /* link: spacing-free sequence, optionally between angle brackets */
3816
119k
  if (data[i] == '<')
3817
1.97k
    i++;
3818
3819
119k
  link_offset = i;
3820
3821
1.59M
  while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r')
3822
1.47M
    i++;
3823
3824
119k
  if (data[i - 1] == '>') link_end = i - 1;
3825
118k
  else link_end = i;
3826
3827
  /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */
3828
176k
  while (i < end && data[i] == ' ') i++;
3829
119k
  if (i < end && data[i] != '\n' && data[i] != '\r'
3830
119k
      && data[i] != '\'' && data[i] != '"' && data[i] != '(')
3831
5.20k
    return 0;
3832
113k
  line_end = 0;
3833
  /* computing end-of-line */
3834
113k
  if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i;
3835
113k
  if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r')
3836
667
    line_end = i + 1;
3837
3838
  /* optional (space|tab)* spacer after a newline */
3839
113k
  if (line_end) {
3840
64.3k
    i = line_end + 1;
3841
77.9k
    while (i < end && data[i] == ' ') i++; }
3842
3843
  /* optional title: any non-newline sequence enclosed in '"()
3844
          alone on its line */
3845
113k
  title_offset = title_end = 0;
3846
113k
  if (i + 1 < end
3847
113k
  && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) {
3848
77.4k
    char d = data[i++];
3849
77.4k
    title_offset = i;
3850
3851
    /* looking for end of tile */
3852
1.99M
    while (i < end && data[i] != d && data[i] != '\n' && data[i] != '\r') {
3853
1.91M
      ++i;
3854
1.91M
    }
3855
3856
77.4k
    if (i + 1 < end && data[i] == d) {
3857
44.9k
      title_end = i++;
3858
44.9k
      attr_offset = i;
3859
3860
      /* looking for EOL */
3861
2.88M
      while (i < end && data[i] != '\n' && data[i] != '\r') {
3862
2.83M
        i++;
3863
2.83M
      }
3864
3865
      /* looking for attribute */
3866
44.9k
      if (data[i-1] == '}' &&
3867
44.9k
        memchr(&data[attr_offset], '{', i - attr_offset)) {
3868
127k
        while (attr_offset < i && data[attr_offset] != '{') {
3869
92.8k
          ++attr_offset;
3870
92.8k
        }
3871
34.2k
        ++attr_offset;
3872
34.2k
        attr_end = i - 1;
3873
34.2k
      } else {
3874
10.6k
        if (data[i-1] == d) {
3875
5.24k
          title_end = i - 1;
3876
5.44k
        } else {
3877
5.44k
          title_end = i;
3878
5.44k
        }
3879
10.6k
        attr_offset = 0;
3880
10.6k
        attr_end = 0;
3881
10.6k
      }
3882
44.9k
      if (i + 1 < end && data[i] == '\r' && data[i + 1] == '\n') {
3883
2.66k
        ++i;
3884
2.66k
      }
3885
3886
44.9k
      line_end = i;
3887
44.9k
    } else {
3888
      /* looking for EOL */
3889
32.4k
      while (i < end && data[i] != '\n' && data[i] != '\r') {
3890
6
        i++;
3891
6
      }
3892
32.4k
      if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') {
3893
476
        title_end = i + 1;
3894
31.9k
      } else {
3895
31.9k
        title_end = i;
3896
31.9k
      }
3897
      /* stepping back */
3898
32.4k
      i -= 1;
3899
33.7k
      while (i > title_offset && data[i] == ' ') {
3900
1.33k
        i -= 1;
3901
1.33k
      }
3902
32.4k
      if (i > title_offset &&
3903
32.4k
        (data[i] == '\'' || data[i] == '"' || data[i] == ')')) {
3904
24.9k
        line_end = title_end;
3905
24.9k
        title_end = i;
3906
24.9k
      }
3907
32.4k
    }
3908
77.4k
  }
3909
3910
113k
  if (!line_end || link_end == link_offset)
3911
12.5k
    return 0; /* garbage after the link empty link */
3912
3913
  /* a valid ref has been found, filling-in return structures */
3914
101k
  if (last)
3915
101k
    *last = line_end;
3916
3917
101k
  if (refs) {
3918
101k
    struct link_ref *ref;
3919
3920
101k
    ref = add_link_ref(refs, data + id_offset, id_end - id_offset);
3921
101k
    if (!ref)
3922
0
      return 0;
3923
3924
101k
    ref->link = hoedown_buffer_new(link_end - link_offset);
3925
101k
    hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset);
3926
3927
101k
    if (title_end > title_offset) {
3928
34.9k
      ref->title = hoedown_buffer_new(title_end - title_offset);
3929
34.9k
      hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset);
3930
34.9k
    }
3931
101k
    if (attr_end > attr_offset) {
3932
34.2k
      ref->attr = hoedown_buffer_new(attr_end - attr_offset);
3933
34.2k
      hoedown_buffer_put(ref->attr, data + attr_offset, attr_end - attr_offset);
3934
34.2k
    }
3935
101k
  }
3936
3937
101k
  return 1;
3938
101k
}
3939
3940
static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size)
3941
1.14M
{
3942
  /* This code makes two assumptions:
3943
   * - Input is valid UTF-8.  (Any byte with top two bits 10 is skipped,
3944
   *   whether or not it is a valid UTF-8 continuation byte.)
3945
   * - Input contains no combining characters.  (Combining characters
3946
   *   should be skipped but are not.)
3947
   */
3948
1.14M
  size_t  i = 0, tab = 0;
3949
3950
38.3M
  while (i < size) {
3951
38.2M
    size_t org = i;
3952
3953
82.5M
    while (i < size && line[i] != '\t') {
3954
      /* ignore UTF-8 continuation bytes */
3955
44.3M
      if ((line[i] & 0xc0) != 0x80)
3956
42.9M
        tab++;
3957
44.3M
      i++;
3958
44.3M
    }
3959
3960
38.2M
    if (i > org)
3961
1.55M
      hoedown_buffer_put(ob, line + org, i - org);
3962
3963
38.2M
    if (i >= size)
3964
1.05M
      break;
3965
3966
147M
    do {
3967
147M
      hoedown_buffer_putc(ob, ' '); tab++;
3968
147M
    } while (tab % 4);
3969
3970
37.1M
    i++;
3971
37.1M
  }
3972
1.14M
}
3973
3974
/**********************
3975
 * EXPORTED FUNCTIONS *
3976
 **********************/
3977
3978
hoedown_document *
3979
hoedown_document_new(
3980
  const hoedown_renderer *renderer,
3981
  hoedown_extensions extensions,
3982
  size_t max_nesting,
3983
  uint8_t attr_activation,
3984
  hoedown_user_block user_block,
3985
  hoedown_buffer *meta)
3986
8.92k
{
3987
8.92k
  hoedown_document *doc = NULL;
3988
3989
8.92k
  assert(max_nesting > 0 && renderer);
3990
3991
8.92k
  doc = hoedown_malloc(sizeof(hoedown_document));
3992
8.92k
  memcpy(&doc->md, renderer, sizeof(hoedown_renderer));
3993
3994
8.92k
  doc->data.opaque = renderer->opaque;
3995
3996
8.92k
  hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4);
3997
8.92k
  hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8);
3998
8.92k
  hoedown_stack_init(&doc->work_bufs[BUFFER_ATTRIBUTE], 8);
3999
4000
8.92k
  memset(doc->active_char, 0x0, 256);
4001
4002
8.92k
  if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) {
4003
8.92k
    doc->active_char['_'] = MD_CHAR_EMPHASIS;
4004
8.92k
  }
4005
4006
8.92k
  if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) {
4007
8.92k
    doc->active_char['*'] = MD_CHAR_EMPHASIS;
4008
8.92k
    doc->active_char['_'] = MD_CHAR_EMPHASIS;
4009
8.92k
    if (extensions & HOEDOWN_EXT_STRIKETHROUGH)
4010
8.92k
      doc->active_char['~'] = MD_CHAR_EMPHASIS;
4011
8.92k
    if (extensions & HOEDOWN_EXT_HIGHLIGHT)
4012
8.92k
      doc->active_char['='] = MD_CHAR_EMPHASIS;
4013
8.92k
  }
4014
4015
8.92k
  if (doc->md.codespan)
4016
8.92k
    doc->active_char['`'] = MD_CHAR_CODESPAN;
4017
4018
8.92k
  if (doc->md.linebreak)
4019
8.92k
    doc->active_char['\n'] = MD_CHAR_LINEBREAK;
4020
4021
8.92k
  if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) {
4022
8.92k
    doc->active_char['['] = MD_CHAR_LINK;
4023
8.92k
    doc->active_char['!'] = MD_CHAR_IMAGE;
4024
8.92k
  }
4025
4026
8.92k
  doc->active_char['<'] = MD_CHAR_LANGLE;
4027
8.92k
  doc->active_char['\\'] = MD_CHAR_ESCAPE;
4028
8.92k
  doc->active_char['&'] = MD_CHAR_ENTITY;
4029
4030
8.92k
  if (extensions & HOEDOWN_EXT_AUTOLINK) {
4031
8.92k
    doc->active_char[':'] = MD_CHAR_AUTOLINK_URL;
4032
8.92k
    doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL;
4033
8.92k
    doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW;
4034
8.92k
  }
4035
4036
8.92k
  if (extensions & HOEDOWN_EXT_SUPERSCRIPT)
4037
8.92k
    doc->active_char['^'] = MD_CHAR_SUPERSCRIPT;
4038
4039
8.92k
  if (extensions & HOEDOWN_EXT_QUOTE)
4040
8.92k
    doc->active_char['"'] = MD_CHAR_QUOTE;
4041
4042
8.92k
  if (extensions & HOEDOWN_EXT_MATH)
4043
8.92k
    doc->active_char['$'] = MD_CHAR_MATH;
4044
4045
  /* Extension data */
4046
8.92k
  doc->ext_flags = extensions;
4047
8.92k
  doc->max_nesting = max_nesting;
4048
8.92k
  doc->attr_activation = attr_activation;
4049
8.92k
  doc->in_link_body = 0;
4050
8.92k
  doc->link_id = NULL;
4051
8.92k
  doc->link_ref_attr = NULL;
4052
8.92k
  doc->link_inline_attr = NULL;
4053
8.92k
  doc->is_escape_char = 0;
4054
8.92k
  doc->header_type = HOEDOWN_HEADER_NONE;
4055
8.92k
  doc->link_type = HOEDOWN_LINK_NONE;
4056
8.92k
  doc->footnote_id = NULL;
4057
8.92k
  doc->list_depth = 0;
4058
8.92k
  doc->blockquote_depth = 0;
4059
8.92k
  doc->ul_item_char = 0;
4060
8.92k
  doc->hrule_char = 0;
4061
8.92k
  doc->fencedcode_char = 0;
4062
8.92k
  doc->ol_numeral = NULL;
4063
8.92k
  doc->user_block = user_block;
4064
8.92k
  doc->meta = meta;
4065
4066
8.92k
  return doc;
4067
8.92k
}
4068
4069
void
4070
hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
4071
8.92k
{
4072
8.92k
  static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
4073
4074
8.92k
  hoedown_buffer *text;
4075
8.92k
  size_t beg, end;
4076
4077
8.92k
  int footnotes_enabled;
4078
4079
8.92k
  text = hoedown_buffer_new(64);
4080
4081
  /* Preallocate enough space for our buffer to avoid expanding while copying */
4082
8.92k
  hoedown_buffer_grow(text, size);
4083
4084
  /* reset the references table */
4085
8.92k
  memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
4086
4087
8.92k
  footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES;
4088
4089
  /* reset the footnotes lists */
4090
8.92k
  if (footnotes_enabled) {
4091
8.92k
    memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found));
4092
8.92k
    memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used));
4093
8.92k
  }
4094
4095
  /* first pass: looking for references, copying everything else */
4096
8.92k
  beg = 0;
4097
4098
  /* Skip a possible UTF-8 BOM, even though the Unicode standard
4099
   * discourages having these in UTF-8 documents */
4100
8.92k
  if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0)
4101
62
    beg += 3;
4102
4103
1.57M
  while (beg < size) /* iterating over lines */
4104
1.56M
    if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found)) {
4105
192k
      if (doc->md.footnote_ref_def) {
4106
0
        hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL };
4107
0
        original.data = (uint8_t*) (data + beg);
4108
0
        original.size = end - beg;
4109
0
        doc->md.footnote_ref_def(&original, &doc->data);
4110
0
      }
4111
192k
      beg = end;
4112
1.37M
    } else if (is_html_comment(data, beg, size, &end)) {
4113
26.2k
      size_t  i = 0;
4114
9.72M
      while (i < (end - beg) && beg + i < size) {
4115
9.69M
        if (data[beg + i] == '\t' && (data[beg + i] & 0xc0) != 0x80) {
4116
3.24M
          hoedown_buffer_put(text, (uint8_t*)"    ", 4);
4117
6.45M
        } else {
4118
6.45M
          hoedown_buffer_putc(text, data[beg + i]);
4119
6.45M
        }
4120
9.69M
        i++;
4121
9.69M
      }
4122
26.2k
      beg = end;
4123
1.35M
    } else if (is_ref(data, beg, size, &end, doc->refs)) {
4124
101k
      if (doc->md.ref) {
4125
0
        hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL };
4126
0
        original.data = (uint8_t*) (data + beg);
4127
0
        original.size = end - beg;
4128
0
        doc->md.ref(&original, &doc->data);
4129
0
      }
4130
101k
      beg = end;
4131
1.24M
    } else { /* skipping to the next line */
4132
1.24M
      end = beg;
4133
82.7M
      while (end < size && data[end] != '\n' && data[end] != '\r')
4134
81.4M
        end++;
4135
4136
      /* adding the line body if present */
4137
1.24M
      if (end > beg)
4138
1.14M
        expand_tabs(text, data + beg, end - beg);
4139
4140
2.74M
      while (end < size && (data[end] == '\n' || data[end] == '\r')) {
4141
        /* add one \n per newline */
4142
1.49M
        if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n'))
4143
1.49M
          hoedown_buffer_putc(text, '\n');
4144
1.49M
        end++;
4145
1.49M
      }
4146
4147
1.24M
      beg = end;
4148
1.24M
    }
4149
4150
  /* pre-grow the output buffer to minimize allocations */
4151
8.92k
  hoedown_buffer_grow(ob, text->size + (text->size >> 1));
4152
4153
  /* second pass: actual rendering */
4154
8.92k
  if (doc->md.doc_header)
4155
0
    doc->md.doc_header(ob, 0, &doc->data);
4156
4157
8.92k
  if (text->size) {
4158
    /* adding a final newline if not already present */
4159
8.82k
    if (text->data[text->size - 1] != '\n')
4160
7.73k
      hoedown_buffer_putc(text, '\n');
4161
4162
8.82k
    parse_block(ob, doc, text->data, text->size);
4163
8.82k
  }
4164
4165
  /* footnotes */
4166
8.92k
  if (footnotes_enabled)
4167
8.92k
    parse_footnote_list(ob, doc, &doc->footnotes_used);
4168
4169
8.92k
  if (doc->md.doc_footer)
4170
0
    doc->md.doc_footer(ob, 0, &doc->data);
4171
4172
  /* clean-up */
4173
8.92k
  hoedown_buffer_free(text);
4174
8.92k
  free_link_refs(doc->refs);
4175
8.92k
  if (footnotes_enabled) {
4176
8.92k
    free_footnote_list(&doc->footnotes_found, 1);
4177
8.92k
    free_footnote_list(&doc->footnotes_used, 0);
4178
8.92k
  }
4179
4180
8.92k
  assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4181
8.92k
  assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4182
8.92k
  assert(doc->work_bufs[BUFFER_ATTRIBUTE].size == 0);
4183
8.92k
}
4184
4185
void
4186
hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size)
4187
0
{
4188
0
  size_t i = 0, mark;
4189
0
  hoedown_buffer *text = hoedown_buffer_new(64);
4190
4191
  /* reset the references table */
4192
0
  memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *));
4193
4194
  /* first pass: expand tabs and process newlines */
4195
0
  hoedown_buffer_grow(text, size);
4196
0
  while (1) {
4197
0
    mark = i;
4198
0
    while (i < size && data[i] != '\n' && data[i] != '\r')
4199
0
      i++;
4200
4201
0
    expand_tabs(text, data + mark, i - mark);
4202
4203
0
    if (i >= size)
4204
0
      break;
4205
4206
0
    while (i < size && (data[i] == '\n' || data[i] == '\r')) {
4207
      /* add one \n per newline */
4208
0
      if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n'))
4209
0
        hoedown_buffer_putc(text, '\n');
4210
0
      i++;
4211
0
    }
4212
0
  }
4213
4214
  /* second pass: actual rendering */
4215
0
  hoedown_buffer_grow(ob, text->size + (text->size >> 1));
4216
4217
0
  if (doc->md.doc_header)
4218
0
    doc->md.doc_header(ob, 1, &doc->data);
4219
4220
0
  parse_inline(ob, doc, text->data, text->size);
4221
4222
0
  if (doc->md.doc_footer)
4223
0
    doc->md.doc_footer(ob, 1, &doc->data);
4224
4225
  /* clean-up */
4226
0
  hoedown_buffer_free(text);
4227
4228
0
  assert(doc->work_bufs[BUFFER_SPAN].size == 0);
4229
0
  assert(doc->work_bufs[BUFFER_BLOCK].size == 0);
4230
0
}
4231
4232
void
4233
hoedown_document_free(hoedown_document *doc)
4234
8.92k
{
4235
8.92k
  size_t i;
4236
4237
86.7k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i)
4238
77.8k
    hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]);
4239
4240
49.7k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i)
4241
40.8k
    hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]);
4242
4243
80.8k
  for (i = 0; i < (size_t)doc->work_bufs[BUFFER_ATTRIBUTE].asize; ++i)
4244
71.8k
    hoedown_buffer_free(doc->work_bufs[BUFFER_ATTRIBUTE].item[i]);
4245
4246
8.92k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]);
4247
8.92k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]);
4248
8.92k
  hoedown_stack_uninit(&doc->work_bufs[BUFFER_ATTRIBUTE]);
4249
4250
8.92k
  free(doc);
4251
8.92k
}
4252
4253
const hoedown_buffer*
4254
hoedown_document_link_id(hoedown_document* document)
4255
0
{
4256
0
  return document->link_id;
4257
0
}
4258
4259
const hoedown_buffer*
4260
hoedown_document_link_ref_attr(hoedown_document* document)
4261
0
{
4262
0
  return document->link_ref_attr;
4263
0
}
4264
4265
const hoedown_buffer*
4266
hoedown_document_link_inline_attr(hoedown_document* document)
4267
0
{
4268
0
  return document->link_inline_attr;
4269
0
}
4270
4271
int
4272
hoedown_document_is_escaped(hoedown_document* document)
4273
0
{
4274
0
  return document->is_escape_char;
4275
0
}
4276
4277
hoedown_header_type
4278
hoedown_document_header_type(hoedown_document* document)
4279
0
{
4280
0
  return document->header_type;
4281
0
}
4282
4283
hoedown_link_type
4284
hoedown_document_link_type(hoedown_document* document)
4285
0
{
4286
0
  return document->link_type;
4287
0
}
4288
4289
const hoedown_buffer*
4290
hoedown_document_footnote_id(hoedown_document* document)
4291
0
{
4292
0
  return document->footnote_id;
4293
0
}
4294
4295
int
4296
hoedown_document_list_depth(hoedown_document* document)
4297
0
{
4298
0
  return document->list_depth;
4299
0
}
4300
4301
int
4302
hoedown_document_blockquote_depth(hoedown_document* document)
4303
0
{
4304
0
  return document->blockquote_depth;
4305
0
}
4306
4307
uint8_t
4308
hoedown_document_ul_item_char(hoedown_document* document)
4309
0
{
4310
0
  return document->ul_item_char;
4311
0
}
4312
4313
uint8_t
4314
hoedown_document_hrule_char(hoedown_document* document)
4315
0
{
4316
0
  return document->hrule_char;
4317
0
}
4318
4319
uint8_t
4320
hoedown_document_fencedcode_char(hoedown_document* document)
4321
0
{
4322
0
  return document->fencedcode_char;
4323
0
}
4324
4325
const hoedown_buffer*
4326
hoedown_document_ol_numeral(hoedown_document* document)
4327
0
{
4328
0
    return document->ol_numeral;
4329
0
}