/src/hoextdown/src/document.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "document.h" |
2 | | |
3 | | #include <assert.h> |
4 | | #include <string.h> |
5 | | #include <ctype.h> |
6 | | #include <stdio.h> |
7 | | |
8 | | #include "stack.h" |
9 | | |
10 | | #ifndef _MSC_VER |
11 | | #include <strings.h> |
12 | | #else |
13 | | #define strncasecmp _strnicmp |
14 | | #endif |
15 | | |
16 | 780k | #define REF_TABLE_SIZE 8 |
17 | | |
18 | 34.9M | #define BUFFER_BLOCK 0 |
19 | 22.1M | #define BUFFER_SPAN 1 |
20 | 24.3M | #define BUFFER_ATTRIBUTE 2 |
21 | | |
22 | | const char *hoedown_find_block_tag(const char *str, unsigned int len); |
23 | | const char *hoedown_find_html5_block_tag(const char *str, unsigned int len); |
24 | | |
25 | | /*************** |
26 | | * LOCAL TYPES * |
27 | | ***************/ |
28 | | |
29 | | /* link_ref: reference to a link */ |
30 | | struct link_ref { |
31 | | unsigned int id; |
32 | | |
33 | | hoedown_buffer *link; |
34 | | hoedown_buffer *title; |
35 | | hoedown_buffer *attr; |
36 | | |
37 | | struct link_ref *next; |
38 | | }; |
39 | | |
40 | | /* footnote_ref: reference to a footnote */ |
41 | | struct footnote_ref { |
42 | | unsigned int id; |
43 | | |
44 | | int is_used; |
45 | | unsigned int num; |
46 | | |
47 | | hoedown_buffer *contents; |
48 | | |
49 | | /* the original string id of the footnote, before conversion to an int */ |
50 | | hoedown_buffer *name; |
51 | | }; |
52 | | |
53 | | /* footnote_item: an item in a footnote_list */ |
54 | | struct footnote_item { |
55 | | struct footnote_ref *ref; |
56 | | struct footnote_item *next; |
57 | | }; |
58 | | |
59 | | /* footnote_list: linked list of footnote_item */ |
60 | | struct footnote_list { |
61 | | unsigned int count; |
62 | | struct footnote_item *head; |
63 | | struct footnote_item *tail; |
64 | | }; |
65 | | |
66 | | /* char_trigger: function pointer to render active chars */ |
67 | | /* returns the number of chars taken care of */ |
68 | | /* data is the pointer of the beginning of the span */ |
69 | | /* offset is the number of valid chars before data */ |
70 | | typedef size_t |
71 | | (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
72 | | |
73 | | static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
74 | | static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
75 | | static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
76 | | static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
77 | | static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
78 | | static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
79 | | static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
80 | | static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
81 | | static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
82 | | static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
83 | | static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
84 | | static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
85 | | static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
86 | | static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
87 | | |
88 | | enum markdown_char_t { |
89 | | MD_CHAR_NONE = 0, |
90 | | MD_CHAR_EMPHASIS, |
91 | | MD_CHAR_CODESPAN, |
92 | | MD_CHAR_LINEBREAK, |
93 | | MD_CHAR_LINK, |
94 | | MD_CHAR_IMAGE, |
95 | | MD_CHAR_LANGLE, |
96 | | MD_CHAR_ESCAPE, |
97 | | MD_CHAR_ENTITY, |
98 | | MD_CHAR_AUTOLINK_URL, |
99 | | MD_CHAR_AUTOLINK_EMAIL, |
100 | | MD_CHAR_AUTOLINK_WWW, |
101 | | MD_CHAR_SUPERSCRIPT, |
102 | | MD_CHAR_QUOTE, |
103 | | MD_CHAR_MATH |
104 | | }; |
105 | | |
106 | | static char_trigger markdown_char_ptrs[] = { |
107 | | NULL, |
108 | | &char_emphasis, |
109 | | &char_codespan, |
110 | | &char_linebreak, |
111 | | &char_link, |
112 | | &char_image, |
113 | | &char_langle_tag, |
114 | | &char_escape, |
115 | | &char_entity, |
116 | | &char_autolink_url, |
117 | | &char_autolink_email, |
118 | | &char_autolink_www, |
119 | | &char_superscript, |
120 | | &char_quote, |
121 | | &char_math |
122 | | }; |
123 | | |
124 | | struct hoedown_document { |
125 | | hoedown_renderer md; |
126 | | hoedown_renderer_data data; |
127 | | |
128 | | uint8_t attr_activation; |
129 | | |
130 | | struct link_ref *refs[REF_TABLE_SIZE]; |
131 | | struct footnote_list footnotes_found; |
132 | | struct footnote_list footnotes_used; |
133 | | uint8_t active_char[256]; |
134 | | hoedown_stack work_bufs[3]; |
135 | | hoedown_extensions ext_flags; |
136 | | size_t max_nesting; |
137 | | int in_link_body; |
138 | | |
139 | | /* extra information provided to callbacks */ |
140 | | const hoedown_buffer *link_id; |
141 | | const hoedown_buffer *link_inline_attr; |
142 | | const hoedown_buffer *link_ref_attr; |
143 | | int is_escape_char; |
144 | | hoedown_header_type header_type; |
145 | | hoedown_link_type link_type; |
146 | | const hoedown_buffer *footnote_id; |
147 | | int list_depth; |
148 | | int blockquote_depth; |
149 | | uint8_t ul_item_char; |
150 | | uint8_t hrule_char; |
151 | | uint8_t fencedcode_char; |
152 | | const hoedown_buffer *ol_numeral; |
153 | | |
154 | | hoedown_user_block user_block; |
155 | | hoedown_buffer *meta; |
156 | | }; |
157 | | |
158 | | /*************************** |
159 | | * HELPER FUNCTIONS * |
160 | | ***************************/ |
161 | | |
162 | | static hoedown_buffer * |
163 | | newbuf(hoedown_document *doc, int type) |
164 | 39.0M | { |
165 | 39.0M | static const size_t buf_size[3] = {256, 64, 64}; |
166 | 39.0M | hoedown_buffer *work = NULL; |
167 | 39.0M | hoedown_stack *pool = &doc->work_bufs[type]; |
168 | | |
169 | 39.0M | if (pool->size < pool->asize && |
170 | 39.0M | pool->item[pool->size] != NULL) { |
171 | 38.9M | work = pool->item[pool->size++]; |
172 | 38.9M | work->size = 0; |
173 | 38.9M | } else { |
174 | 72.2k | work = hoedown_buffer_new(buf_size[type]); |
175 | 72.2k | hoedown_stack_push(pool, work); |
176 | 72.2k | } |
177 | | |
178 | 39.0M | return work; |
179 | 39.0M | } |
180 | | |
181 | | static void |
182 | | popbuf(hoedown_document *doc, int type) |
183 | 37.8M | { |
184 | 37.8M | doc->work_bufs[type].size--; |
185 | 37.8M | } |
186 | | |
187 | | static void |
188 | | unscape_text(hoedown_buffer *ob, hoedown_buffer *src) |
189 | 276k | { |
190 | 276k | size_t i = 0, org; |
191 | 291k | while (i < src->size) { |
192 | 290k | org = i; |
193 | 13.8M | while (i < src->size && src->data[i] != '\\') |
194 | 13.5M | i++; |
195 | | |
196 | 290k | if (i > org) |
197 | 238k | hoedown_buffer_put(ob, src->data + org, i - org); |
198 | | |
199 | 290k | if (i + 1 >= src->size) |
200 | 275k | break; |
201 | | |
202 | 15.0k | hoedown_buffer_putc(ob, src->data[i + 1]); |
203 | 15.0k | i += 2; |
204 | 15.0k | } |
205 | 276k | } |
206 | | |
207 | | static unsigned int |
208 | | hash_link_ref(const uint8_t *link_ref, size_t length) |
209 | 797k | { |
210 | 797k | size_t i; |
211 | 797k | unsigned int hash = 0; |
212 | | |
213 | 128M | for (i = 0; i < length; ++i) |
214 | 127M | hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; |
215 | | |
216 | 797k | return hash; |
217 | 797k | } |
218 | | |
219 | | static struct link_ref * |
220 | | add_link_ref( |
221 | | struct link_ref **references, |
222 | | const uint8_t *name, size_t name_size) |
223 | 101k | { |
224 | 101k | struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref)); |
225 | | |
226 | 101k | ref->id = hash_link_ref(name, name_size); |
227 | 101k | ref->next = references[ref->id % REF_TABLE_SIZE]; |
228 | | |
229 | 101k | references[ref->id % REF_TABLE_SIZE] = ref; |
230 | 101k | return ref; |
231 | 101k | } |
232 | | |
233 | | static struct link_ref * |
234 | | find_link_ref(struct link_ref **references, uint8_t *name, size_t length) |
235 | 488k | { |
236 | 488k | unsigned int hash = hash_link_ref(name, length); |
237 | 488k | struct link_ref *ref = NULL; |
238 | | |
239 | 488k | ref = references[hash % REF_TABLE_SIZE]; |
240 | | |
241 | 22.7M | while (ref != NULL) { |
242 | 22.4M | if (ref->id == hash) |
243 | 264k | return ref; |
244 | | |
245 | 22.2M | ref = ref->next; |
246 | 22.2M | } |
247 | | |
248 | 224k | return NULL; |
249 | 488k | } |
250 | | |
251 | | static void |
252 | | free_link_refs(struct link_ref **references) |
253 | 8.92k | { |
254 | 8.92k | size_t i; |
255 | | |
256 | 80.3k | for (i = 0; i < REF_TABLE_SIZE; ++i) { |
257 | 71.4k | struct link_ref *r = references[i]; |
258 | 71.4k | struct link_ref *next; |
259 | | |
260 | 172k | while (r) { |
261 | 101k | next = r->next; |
262 | 101k | hoedown_buffer_free(r->link); |
263 | 101k | hoedown_buffer_free(r->title); |
264 | 101k | hoedown_buffer_free(r->attr); |
265 | 101k | free(r); |
266 | 101k | r = next; |
267 | 101k | } |
268 | 71.4k | } |
269 | 8.92k | } |
270 | | |
271 | | static struct footnote_ref * |
272 | | create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size) |
273 | 192k | { |
274 | 192k | struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref)); |
275 | | |
276 | 192k | ref->id = hash_link_ref(name, name_size); |
277 | | |
278 | 192k | return ref; |
279 | 192k | } |
280 | | |
281 | | static int |
282 | | add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref) |
283 | 193k | { |
284 | 193k | struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item)); |
285 | 193k | if (!item) |
286 | 0 | return 0; |
287 | 193k | item->ref = ref; |
288 | | |
289 | 193k | if (list->head == NULL) { |
290 | 1.81k | list->head = list->tail = item; |
291 | 191k | } else { |
292 | 191k | list->tail->next = item; |
293 | 191k | list->tail = item; |
294 | 191k | } |
295 | 193k | list->count++; |
296 | | |
297 | 193k | return 1; |
298 | 193k | } |
299 | | |
300 | | static struct footnote_ref * |
301 | | find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length) |
302 | 14.6k | { |
303 | 14.6k | unsigned int hash = hash_link_ref(name, length); |
304 | 14.6k | struct footnote_item *item = NULL; |
305 | | |
306 | 14.6k | item = list->head; |
307 | | |
308 | 6.38M | while (item != NULL) { |
309 | 6.37M | if (item->ref->id == hash) |
310 | 6.25k | return item->ref; |
311 | 6.36M | item = item->next; |
312 | 6.36M | } |
313 | | |
314 | 8.35k | return NULL; |
315 | 14.6k | } |
316 | | |
317 | | static void |
318 | | free_footnote_ref(struct footnote_ref *ref) |
319 | 192k | { |
320 | 192k | hoedown_buffer_free(ref->contents); |
321 | 192k | hoedown_buffer_free(ref->name); |
322 | 192k | free(ref); |
323 | 192k | } |
324 | | |
325 | | static void |
326 | | free_footnote_list(struct footnote_list *list, int free_refs) |
327 | 17.8k | { |
328 | 17.8k | struct footnote_item *item = list->head; |
329 | 17.8k | struct footnote_item *next; |
330 | | |
331 | 211k | while (item) { |
332 | 193k | next = item->next; |
333 | 193k | if (free_refs) |
334 | 192k | free_footnote_ref(item->ref); |
335 | 193k | free(item); |
336 | 193k | item = next; |
337 | 193k | } |
338 | 17.8k | } |
339 | | |
340 | | |
341 | | /* |
342 | | * Check whether a char is a Markdown spacing char. |
343 | | |
344 | | * Right now we only consider spaces the actual |
345 | | * space and a newline: tabs and carriage returns |
346 | | * are filtered out during the preprocessing phase. |
347 | | * |
348 | | * If we wanted to actually be UTF-8 compliant, we |
349 | | * should instead extract an Unicode codepoint from |
350 | | * this character and check for space properties. |
351 | | */ |
352 | | static int |
353 | | _isspace(int c) |
354 | 45.6M | { |
355 | 45.6M | return c == ' ' || c == '\n'; |
356 | 45.6M | } |
357 | | |
358 | | /* is_empty_all: verify that all the data is spacing */ |
359 | | static int |
360 | | is_empty_all(const uint8_t *data, size_t size) |
361 | 87.3k | { |
362 | 87.3k | size_t i = 0; |
363 | 983k | while (i < size && _isspace(data[i])) i++; |
364 | 87.3k | return i == size; |
365 | 87.3k | } |
366 | | |
367 | | /* |
368 | | * Replace all spacing characters in data with spaces. As a special |
369 | | * case, this collapses a newline with the previous space, if possible. |
370 | | */ |
371 | | static void |
372 | | replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size) |
373 | 461k | { |
374 | 461k | size_t i = 0, mark; |
375 | 461k | hoedown_buffer_grow(ob, size); |
376 | 1.12M | while (1) { |
377 | 1.12M | mark = i; |
378 | 119M | while (i < size && data[i] != '\n') i++; |
379 | 1.12M | hoedown_buffer_put(ob, data + mark, i - mark); |
380 | | |
381 | 1.12M | if (i >= size) break; |
382 | | |
383 | 664k | if (!(i > 0 && data[i-1] == ' ')) |
384 | 301k | hoedown_buffer_putc(ob, ' '); |
385 | 664k | i++; |
386 | 664k | } |
387 | 461k | } |
388 | | |
389 | | /**************************** |
390 | | * INLINE PARSING FUNCTIONS * |
391 | | ****************************/ |
392 | | |
393 | | /* is_mail_autolink • looks for the address part of a mail autolink and '>' */ |
394 | | /* this is less strict than the original markdown e-mail address matching */ |
395 | | static size_t |
396 | | is_mail_autolink(uint8_t *data, size_t size) |
397 | 5.32k | { |
398 | 5.32k | size_t i = 0, nb = 0; |
399 | | |
400 | | /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ |
401 | 66.4k | for (i = 0; i < size; ++i) { |
402 | 66.0k | if (isalnum(data[i])) |
403 | 43.5k | continue; |
404 | | |
405 | 22.5k | switch (data[i]) { |
406 | 12.7k | case '@': |
407 | 12.7k | nb++; |
408 | | |
409 | 13.9k | case '-': |
410 | 17.0k | case '.': |
411 | 17.5k | case '_': |
412 | 17.5k | break; |
413 | | |
414 | 3.59k | case '>': |
415 | 3.59k | return (nb == 1) ? i + 1 : 0; |
416 | | |
417 | 1.35k | default: |
418 | 1.35k | return 0; |
419 | 22.5k | } |
420 | 22.5k | } |
421 | | |
422 | 373 | return 0; |
423 | 5.32k | } |
424 | | |
425 | | static size_t |
426 | | script_tag_length(uint8_t *data, size_t size) |
427 | 398k | { |
428 | 398k | size_t i = 2; |
429 | 398k | char comment = 0; |
430 | | |
431 | 398k | if (size < 3 || data[0] != '<' || data[1] != '?') { |
432 | 386k | return 0; |
433 | 386k | } |
434 | | |
435 | 11.8k | i = 2; |
436 | | |
437 | 206M | while (i < size) { |
438 | 206M | if (data[i - 1] == '?' && data[i] == '>' && comment == 0) { |
439 | 3.73k | break; |
440 | 3.73k | } |
441 | | |
442 | 206M | if (data[i] == '\'' || data[i] == '"') { |
443 | 4.98M | if (comment != 0) { |
444 | 3.27M | if (data[i] == comment && data[i - 1] != '\\') { |
445 | 1.70M | comment = 0; |
446 | 1.70M | } |
447 | 3.27M | } else { |
448 | 1.71M | comment = data[i]; |
449 | 1.71M | } |
450 | 4.98M | } |
451 | | |
452 | 206M | ++i; |
453 | 206M | } |
454 | | |
455 | 11.8k | if (i >= size) return i; |
456 | | |
457 | 3.73k | return i + 1; |
458 | 11.8k | } |
459 | | |
460 | | /* tag_length • returns the length of the given tag, or 0 is it's not valid */ |
461 | | static size_t |
462 | | tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink, int script_tag) |
463 | 199k | { |
464 | 199k | size_t i, j; |
465 | | |
466 | | /* a valid tag can't be shorter than 3 chars */ |
467 | 199k | if (size < 3) return 0; |
468 | | |
469 | 188k | if (data[0] != '<') return 0; |
470 | | |
471 | | /* HTML comment, laxist form */ |
472 | 188k | if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { |
473 | 41.7k | i = 5; |
474 | | |
475 | 13.2M | while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) |
476 | 13.2M | i++; |
477 | | |
478 | 41.7k | i++; |
479 | | |
480 | 41.7k | if (i <= size) |
481 | 20.4k | return i; |
482 | 41.7k | } |
483 | | |
484 | | /* begins with a '<' optionally followed by '/', followed by letter or number */ |
485 | 168k | i = (data[1] == '/') ? 2 : 1; |
486 | | |
487 | 168k | if (!isalnum(data[i])) { |
488 | 80.0k | if (script_tag) { |
489 | 80.0k | return script_tag_length(data, size); |
490 | 80.0k | } |
491 | 0 | return 0; |
492 | 80.0k | } |
493 | | |
494 | | /* scheme test */ |
495 | 88.4k | *autolink = HOEDOWN_AUTOLINK_NONE; |
496 | | |
497 | | /* try to find the beginning of an URI */ |
498 | 409k | while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) |
499 | 321k | i++; |
500 | | |
501 | 88.4k | if (i > 1 && i < size && data[i] == '@') { |
502 | 5.32k | if ((j = is_mail_autolink(data + i, size - i)) != 0) { |
503 | 2.97k | *autolink = HOEDOWN_AUTOLINK_EMAIL; |
504 | 2.97k | return i + j; |
505 | 2.97k | } |
506 | 5.32k | } |
507 | | |
508 | 85.4k | if (i > 2 && i < size && data[i] == ':') { |
509 | 16.3k | *autolink = HOEDOWN_AUTOLINK_NORMAL; |
510 | 16.3k | i++; |
511 | 16.3k | } |
512 | | |
513 | | /* completing autolink test: no spacing or ' or " */ |
514 | 85.4k | if (i >= size) |
515 | 3.86k | *autolink = HOEDOWN_AUTOLINK_NONE; |
516 | | |
517 | 81.5k | else if (*autolink) { |
518 | 16.3k | j = i; |
519 | | |
520 | 4.46M | while (i < size) { |
521 | 4.45M | if (data[i] == '\\') i += 2; |
522 | 4.44M | else if (data[i] == '>' || data[i] == '\'' || |
523 | 4.44M | data[i] == '"' || data[i] == ' ' || data[i] == '\n') |
524 | 12.0k | break; |
525 | 4.43M | else i++; |
526 | 4.45M | } |
527 | | |
528 | 16.3k | if (i >= size) return 0; |
529 | 12.0k | if (i > j && data[i] == '>') return i + 1; |
530 | | /* one of the forbidden chars has been found */ |
531 | 7.43k | *autolink = HOEDOWN_AUTOLINK_NONE; |
532 | 7.43k | } |
533 | | |
534 | | /* looking for something looking like a tag end */ |
535 | 48.6M | while (i < size && data[i] != '>') i++; |
536 | 76.5k | if (i >= size) return 0; |
537 | 27.7k | return i + 1; |
538 | 76.5k | } |
539 | | |
540 | | /* parse_inline • parses inline markdown elements */ |
541 | | static void |
542 | | parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
543 | 1.07M | { |
544 | 1.07M | size_t i = 0, end = 0, consumed = 0; |
545 | 1.07M | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
546 | 1.07M | uint8_t *active_char = doc->active_char; |
547 | | |
548 | 1.07M | if (doc->work_bufs[BUFFER_SPAN].size + |
549 | 1.07M | doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting) |
550 | 5.08k | return; |
551 | | |
552 | 6.36M | while (i < size) { |
553 | 6.04M | size_t user_block = 0; |
554 | 128M | while (end < size) { |
555 | 127M | if (doc->user_block) { |
556 | 0 | user_block = doc->user_block(data+end, size - end, &doc->data); |
557 | 0 | if (user_block) { |
558 | 0 | break; |
559 | 0 | } |
560 | 0 | } |
561 | | /* copying inactive chars into the output */ |
562 | 127M | if (active_char[data[end]] != 0) { |
563 | 5.29M | break; |
564 | 5.29M | } |
565 | 122M | end++; |
566 | 122M | } |
567 | | |
568 | 6.04M | if (doc->md.normal_text) { |
569 | 6.04M | work.data = data + i; |
570 | 6.04M | work.size = end - i; |
571 | 6.04M | doc->md.normal_text(ob, &work, &doc->data); |
572 | 6.04M | } |
573 | 0 | else |
574 | 0 | hoedown_buffer_put(ob, data + i, end - i); |
575 | | |
576 | 6.04M | if (end >= size) { |
577 | 748k | break; |
578 | 748k | } |
579 | 5.29M | i = end; |
580 | | |
581 | 5.29M | if (user_block) { |
582 | 0 | work.data = data + i; |
583 | 0 | work.size = user_block; |
584 | 0 | end = user_block; |
585 | 0 | if (doc->md.user_block) { |
586 | 0 | doc->md.user_block(ob, &work, &doc->data); |
587 | 0 | } else { |
588 | 0 | hoedown_buffer_put(ob, data + i, size - i); |
589 | 0 | } |
590 | 0 | if (!end) { |
591 | 0 | end = i + 1; |
592 | 0 | } else { |
593 | 0 | i += end; |
594 | 0 | end = i; |
595 | 0 | consumed = i; |
596 | 0 | } |
597 | 5.29M | } else { |
598 | 5.29M | end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i); |
599 | 5.29M | if (!end) /* no action from the callback */ |
600 | 4.43M | end = i + 1; |
601 | 869k | else { |
602 | 869k | i += end; |
603 | 869k | end = i; |
604 | 869k | consumed = i; |
605 | 869k | } |
606 | 5.29M | } |
607 | 5.29M | } |
608 | 1.06M | } |
609 | | |
610 | | /* parse_inline_attributes • parses inline attributes, returning the end position of the |
611 | | * attributes. attributes must be in the start. differs from parse_attributes in |
612 | | * that parses_attributes assumes attributes are at the end of data.*/ |
613 | | static size_t parse_inline_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, uint8_t attr_activation) |
614 | 317k | { |
615 | 317k | size_t attr_start, i = 0; |
616 | | |
617 | 317k | if (size < 1) |
618 | 54.9k | return 0; |
619 | | |
620 | 262k | if (data[i] == '{' && (!attr_activation || (i + 1 < size && data[i + 1] == attr_activation))) { |
621 | 24.2k | attr_start = i + 1; |
622 | | /* skip an extra character to skip over the activation character if any */ |
623 | 24.2k | if (attr_activation) attr_start++; |
624 | 238k | } else { |
625 | 238k | return 0; |
626 | 238k | } |
627 | | |
628 | 19.8M | while (i < size) { |
629 | | /* ignore escaped characters */ |
630 | 19.8M | if (data[i] == '\\') { |
631 | 2.27k | i += 2; |
632 | 19.8M | } else if (data[i] == '}') { |
633 | 19.2k | if (attr != NULL) { |
634 | 7.05k | hoedown_buffer_put(attr, data + attr_start, i - attr_start); |
635 | 7.05k | } |
636 | 19.2k | return i + 1; |
637 | 19.7M | } else { |
638 | 19.7M | i++; |
639 | 19.7M | } |
640 | 19.8M | } |
641 | 4.93k | return 0; |
642 | 24.2k | } |
643 | | |
644 | | |
645 | | /* parse_attributes • parses special attributes at the end of the data */ |
646 | | static size_t parse_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, struct hoedown_buffer *block_attr, const char *block_id, int is_header, uint8_t attr_activation) |
647 | 1.02M | { |
648 | 1.02M | size_t i, len, begin = 0, end = 0; |
649 | | |
650 | 1.02M | if (size < 1) |
651 | 31.6k | return 0; |
652 | | |
653 | 990k | i = size; |
654 | 1.31M | while (i && data[i-1] == '\n') { |
655 | 320k | i--; |
656 | 320k | } |
657 | 990k | len = i; |
658 | | |
659 | 990k | if (i && data[i-1] == '}') { |
660 | 27.9M | do { |
661 | 27.9M | i--; |
662 | 27.9M | } while (i && data[i] != '{'); |
663 | | |
664 | 120k | begin = i + 1; |
665 | 120k | end = len - 1; |
666 | 157k | while (i && data[i-1] == ' ') { |
667 | 36.7k | i--; |
668 | 36.7k | } |
669 | 120k | } |
670 | | |
671 | 990k | if (is_header && i && data[i-1] == '#') { |
672 | 22.5k | while (i && data[i-1] == '#') { |
673 | 11.5k | i--; |
674 | 11.5k | } |
675 | 60.5k | while (i && data[i-1] == ' ') { |
676 | 49.5k | i--; |
677 | 49.5k | } |
678 | 11.0k | } |
679 | | |
680 | 990k | if (begin && end && data[begin-1] == '{' && data[end] == '}') { |
681 | 102k | if (begin >=2 && data[begin-2] == '\\' && data[end-1] == '\\') { |
682 | 1.28k | return len; |
683 | 1.28k | } |
684 | | |
685 | 101k | if (block_attr && data[begin] == '@') { |
686 | | /* skip the @ by incrementing past it */ |
687 | 8.64k | begin++; |
688 | 8.64k | if (*block_id) { |
689 | | /* if a block_id was fed in, check to make sure the string until the |
690 | | * space is identical */ |
691 | 32.9k | while (begin < end && *block_id) { |
692 | 26.5k | if (data[begin] != (uint8_t)(*block_id)) { |
693 | 2.21k | return len; |
694 | 2.21k | } |
695 | 24.2k | begin++; |
696 | 24.2k | block_id++; |
697 | 24.2k | } |
698 | | /* it might have matched only the first portion of block_id; make sure |
699 | | * there's no more to it here */ |
700 | 6.43k | if (*block_id) { |
701 | 862 | return len; |
702 | 862 | } |
703 | 6.43k | } |
704 | 5.56k | if (begin < end && data[begin] != ' ') { |
705 | 796 | return len; |
706 | 796 | } |
707 | 4.77k | if (block_attr) { |
708 | 4.77k | if (block_attr->size) { |
709 | 2.54k | hoedown_buffer_reset(block_attr); |
710 | 2.54k | } |
711 | 4.77k | hoedown_buffer_put(block_attr, data + begin, end - begin); |
712 | 4.77k | } |
713 | 4.77k | len = i; |
714 | 4.77k | if (attr) { |
715 | 4.77k | len = parse_attributes(data, len, attr, NULL, "", is_header, attr_activation); |
716 | 4.77k | } |
717 | 92.8k | } else if (attr && (!attr_activation || attr_activation == data[begin])) { |
718 | 80.0k | if (attr->size) { |
719 | 0 | hoedown_buffer_reset(attr); |
720 | 0 | } |
721 | 80.0k | if (attr_activation) { |
722 | 0 | begin++; |
723 | 0 | } |
724 | 80.0k | hoedown_buffer_put(attr, data + begin, end - begin); |
725 | 80.0k | len = i; |
726 | 80.0k | } |
727 | 101k | } |
728 | | |
729 | 985k | return len; |
730 | 990k | } |
731 | | |
732 | | /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */ |
733 | | static int |
734 | | is_escaped(uint8_t *data, size_t loc) |
735 | 4.57M | { |
736 | 4.57M | size_t i = loc; |
737 | 4.64M | while (i >= 1 && data[i - 1] == '\\') |
738 | 63.9k | i--; |
739 | | |
740 | | /* odd numbers of backslashes escapes data[loc] */ |
741 | 4.57M | return (loc - i) % 2; |
742 | 4.57M | } |
743 | | |
744 | | /* is_backslashed • returns whether special char at data[loc] is preceded by '\\', a stricter interpretation of escaping than is_escaped. */ |
745 | | static int |
746 | | is_backslashed(uint8_t *data, size_t loc) |
747 | 303M | { |
748 | 303M | return loc >= 1 && data[loc - 1] == '\\'; |
749 | 303M | } |
750 | | |
751 | | /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ |
752 | | static size_t |
753 | | find_emph_char(uint8_t *data, size_t size, uint8_t c) |
754 | 1.71M | { |
755 | 1.71M | size_t i = 0; |
756 | | |
757 | 4.23M | while (i < size) { |
758 | 218M | while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') |
759 | 214M | i++; |
760 | | |
761 | 4.20M | if (i == size) |
762 | 221k | return 0; |
763 | | |
764 | | /* not counting escaped chars */ |
765 | 3.98M | if (is_escaped(data, i)) { |
766 | 7.54k | i++; continue; |
767 | 7.54k | } |
768 | | |
769 | 3.97M | if (data[i] == c) |
770 | 1.16M | return i; |
771 | | |
772 | | /* skipping a codespan */ |
773 | 2.81M | if (data[i] == '`') { |
774 | 38.2k | size_t span_nb = 0, bt; |
775 | 38.2k | size_t tmp_i = 0; |
776 | | |
777 | | /* counting the number of opening backticks */ |
778 | 119k | while (i < size && data[i] == '`') { |
779 | 81.2k | i++; span_nb++; |
780 | 81.2k | } |
781 | | |
782 | 38.2k | if (i >= size) return 0; |
783 | | |
784 | | /* finding the matching closing sequence */ |
785 | 37.2k | bt = 0; |
786 | 41.2M | while (i < size && bt < span_nb) { |
787 | 41.2M | if (!tmp_i && data[i] == c) tmp_i = i; |
788 | 41.2M | if (data[i] == '`') bt++; |
789 | 41.1M | else bt = 0; |
790 | 41.2M | i++; |
791 | 41.2M | } |
792 | | |
793 | | /* not a well-formed codespan; use found matching emph char */ |
794 | 37.2k | if (bt < span_nb && i >= size) return tmp_i; |
795 | 37.2k | } |
796 | | /* skipping a link */ |
797 | 2.77M | else if (data[i] == '[') { |
798 | 2.77M | size_t tmp_i = 0; |
799 | 2.77M | uint8_t cc; |
800 | | |
801 | 2.77M | i++; |
802 | 229M | while (i < size && data[i] != ']') { |
803 | 226M | if (!tmp_i && data[i] == c) tmp_i = i; |
804 | 226M | i++; |
805 | 226M | } |
806 | | |
807 | 2.77M | i++; |
808 | 5.74M | while (i < size && _isspace(data[i])) |
809 | 2.97M | i++; |
810 | | |
811 | 2.77M | if (i >= size) |
812 | 189k | return tmp_i; |
813 | | |
814 | 2.58M | switch (data[i]) { |
815 | 411k | case '[': |
816 | 411k | cc = ']'; break; |
817 | | |
818 | 74.2k | case '(': |
819 | 74.2k | cc = ')'; break; |
820 | | |
821 | 2.10M | default: |
822 | 2.10M | if (tmp_i) |
823 | 27.3k | return tmp_i; |
824 | 2.07M | else |
825 | 2.07M | continue; |
826 | 2.58M | } |
827 | | |
828 | 485k | i++; |
829 | 94.4M | while (i < size && data[i] != cc) { |
830 | 93.9M | if (!tmp_i && data[i] == c) tmp_i = i; |
831 | 93.9M | i++; |
832 | 93.9M | } |
833 | | |
834 | 485k | if (i >= size) |
835 | 71.7k | return tmp_i; |
836 | | |
837 | 413k | i++; |
838 | 413k | } |
839 | 2.81M | } |
840 | | |
841 | 28.5k | return 0; |
842 | 1.71M | } |
843 | | |
844 | | /* find_separator_char • looks for the next unbackslashed separator character c */ |
845 | | static size_t |
846 | | find_separator_char(uint8_t *data, size_t size, uint8_t c) |
847 | 64.1k | { |
848 | 64.1k | size_t i = 0; |
849 | | |
850 | 67.0k | while (i < size) { |
851 | 12.7M | while (i < size && data[i] != c) |
852 | 12.6M | i++; |
853 | | |
854 | 62.1k | if (i == size) |
855 | 23.5k | return 0; |
856 | | |
857 | | /* not counting backslashed separators */ |
858 | 38.5k | if (is_backslashed(data, i)) { |
859 | 2.81k | i++; continue; |
860 | 2.81k | } |
861 | | |
862 | 35.6k | if (data[i] == c) |
863 | 35.6k | return i; |
864 | 35.6k | } |
865 | | |
866 | 4.89k | return 0; |
867 | 64.1k | } |
868 | | |
869 | | /* parse_emph1 • parsing single emphase */ |
870 | | /* closed by a symbol not preceded by spacing and not followed by symbol */ |
871 | | static size_t |
872 | | parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
873 | 87.0k | { |
874 | 87.0k | size_t i = 0, len; |
875 | 87.0k | hoedown_buffer *work = 0; |
876 | 87.0k | int r; |
877 | | |
878 | | /* skipping one symbol if coming from emph3 */ |
879 | 87.0k | if (size > 1 && data[0] == c && data[1] == c) i = 1; |
880 | | |
881 | 106k | while (i < size) { |
882 | 106k | len = find_emph_char(data + i, size - i, c); |
883 | 106k | if (!len) return 0; |
884 | 60.8k | i += len; |
885 | 60.8k | if (i >= size) return 0; |
886 | | |
887 | 60.8k | if (data[i] == c && !_isspace(data[i - 1])) { |
888 | | |
889 | 42.6k | if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS || |
890 | 42.6k | (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_UNDERLINE_EMPHASIS && c == '_')) { |
891 | 42.6k | if (i + 1 < size && isalnum(data[i + 1])) |
892 | 1.10k | continue; |
893 | 42.6k | } |
894 | | |
895 | 41.5k | work = newbuf(doc, BUFFER_SPAN); |
896 | 41.5k | parse_inline(work, doc, data, i); |
897 | | |
898 | 41.5k | if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_') |
899 | 38.5k | r = doc->md.underline(ob, work, &doc->data); |
900 | 2.98k | else |
901 | 2.98k | r = doc->md.emphasis(ob, work, &doc->data); |
902 | | |
903 | 41.5k | popbuf(doc, BUFFER_SPAN); |
904 | 41.5k | return r ? i + 1 : 0; |
905 | 42.6k | } |
906 | 60.8k | } |
907 | | |
908 | 0 | return 0; |
909 | 87.0k | } |
910 | | |
911 | | /* parse_emph2 • parsing single emphase */ |
912 | | static size_t |
913 | | parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
914 | 39.0k | { |
915 | 39.0k | size_t i = 0, len; |
916 | 39.0k | hoedown_buffer *work = 0; |
917 | 39.0k | int r; |
918 | | |
919 | 59.7k | while (i < size) { |
920 | 59.2k | len = find_emph_char(data + i, size - i, c); |
921 | 59.2k | if (!len) return 0; |
922 | 34.1k | i += len; |
923 | | |
924 | 34.1k | if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { |
925 | 13.4k | work = newbuf(doc, BUFFER_SPAN); |
926 | 13.4k | parse_inline(work, doc, data, i); |
927 | | |
928 | 13.4k | if (c == '~') |
929 | 2.97k | r = doc->md.strikethrough(ob, work, &doc->data); |
930 | 10.4k | else if (c == '=') |
931 | 741 | r = doc->md.highlight(ob, work, &doc->data); |
932 | 9.73k | else |
933 | 9.73k | r = doc->md.double_emphasis(ob, work, &doc->data); |
934 | | |
935 | 13.4k | popbuf(doc, BUFFER_SPAN); |
936 | 13.4k | return r ? i + 2 : 0; |
937 | 13.4k | } |
938 | 20.7k | i++; |
939 | 20.7k | } |
940 | 528 | return 0; |
941 | 39.0k | } |
942 | | |
943 | | /* parse_emph3 • parsing single emphase */ |
944 | | /* finds the first closing tag, and delegates to the other emph */ |
945 | | static size_t |
946 | | parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
947 | 52.1k | { |
948 | 52.1k | size_t i = 0, len; |
949 | 52.1k | int r; |
950 | | |
951 | 81.6k | while (i < size) { |
952 | 81.6k | len = find_emph_char(data + i, size - i, c); |
953 | 81.6k | if (!len) return 0; |
954 | 45.5k | i += len; |
955 | | |
956 | | /* skip spacing preceded symbols */ |
957 | 45.5k | if (data[i] != c || _isspace(data[i - 1])) |
958 | 29.5k | continue; |
959 | | |
960 | 15.9k | if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) { |
961 | | /* triple symbol found */ |
962 | 2.70k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
963 | | |
964 | 2.70k | parse_inline(work, doc, data, i); |
965 | 2.70k | r = doc->md.triple_emphasis(ob, work, &doc->data); |
966 | 2.70k | popbuf(doc, BUFFER_SPAN); |
967 | 2.70k | return r ? i + 3 : 0; |
968 | | |
969 | 13.2k | } else if (i + 1 < size && data[i + 1] == c) { |
970 | | /* double symbol found, handing over to emph1 */ |
971 | 6.48k | len = parse_emph1(ob, doc, data - 2, size + 2, c); |
972 | 6.48k | if (!len) return 0; |
973 | 0 | else return len - 2; |
974 | | |
975 | 6.77k | } else { |
976 | | /* single symbol found, handing over to emph2 */ |
977 | 6.77k | len = parse_emph2(ob, doc, data - 1, size + 1, c); |
978 | 6.77k | if (!len) return 0; |
979 | 0 | else return len - 1; |
980 | 6.77k | } |
981 | 15.9k | } |
982 | 0 | return 0; |
983 | 52.1k | } |
984 | | |
985 | | /* parse_math • parses a math span until the given ending delimiter */ |
986 | | static size_t |
987 | | parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode) |
988 | 34.6k | { |
989 | 34.6k | hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
990 | 34.6k | size_t i = delimsz; |
991 | | |
992 | 34.6k | if (!doc->md.math) |
993 | 0 | return 0; |
994 | | |
995 | | /* find ending delimiter */ |
996 | 93.1k | while (1) { |
997 | 52.7M | while (i < size && data[i] != (uint8_t)end[0]) |
998 | 52.6M | i++; |
999 | | |
1000 | 93.1k | if (i >= size) |
1001 | 19.3k | return 0; |
1002 | | |
1003 | 73.7k | if (!is_escaped(data, i) && !(i + delimsz > size) |
1004 | 73.7k | && memcmp(data + i, end, delimsz) == 0) |
1005 | 15.2k | break; |
1006 | | |
1007 | 58.5k | i++; |
1008 | 58.5k | } |
1009 | | |
1010 | | /* prepare buffers */ |
1011 | 15.2k | text.data = data + delimsz; |
1012 | 15.2k | text.size = i - delimsz; |
1013 | | |
1014 | | /* if this is a $$ and MATH_EXPLICIT is not active, |
1015 | | * guess whether displaymode should be enabled from the context */ |
1016 | 15.2k | i += delimsz; |
1017 | 15.2k | if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)) |
1018 | 0 | displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i); |
1019 | | |
1020 | | /* call callback */ |
1021 | 15.2k | if (doc->md.math(ob, &text, displaymode, &doc->data)) |
1022 | 15.2k | return i; |
1023 | | |
1024 | 0 | return 0; |
1025 | 15.2k | } |
1026 | | |
1027 | | /* char_emphasis • single and double emphasis parsing */ |
1028 | | static size_t |
1029 | | char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1030 | 966k | { |
1031 | 966k | uint8_t c = data[0]; |
1032 | 966k | size_t ret; |
1033 | | |
1034 | 966k | if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) { |
1035 | 966k | if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(') |
1036 | 680k | return 0; |
1037 | 966k | } |
1038 | | |
1039 | 286k | if (size > 2 && data[1] != c) { |
1040 | | /* spacing cannot follow an opening emphasis; |
1041 | | * strikethrough and highlight only takes two characters '~~' */ |
1042 | 126k | if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0) |
1043 | 87.1k | return 0; |
1044 | | |
1045 | 39.7k | return ret + 1; |
1046 | 126k | } |
1047 | | |
1048 | 159k | if (size > 3 && data[1] == c && data[2] != c) { |
1049 | 37.0k | if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0) |
1050 | 27.8k | return 0; |
1051 | | |
1052 | 9.16k | return ret + 2; |
1053 | 37.0k | } |
1054 | | |
1055 | 122k | if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { |
1056 | 62.2k | if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0) |
1057 | 60.0k | return 0; |
1058 | | |
1059 | 2.16k | return ret + 3; |
1060 | 62.2k | } |
1061 | | |
1062 | 60.2k | return 0; |
1063 | 122k | } |
1064 | | |
1065 | | |
1066 | | /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ |
1067 | | static size_t |
1068 | | char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1069 | 465k | { |
1070 | 465k | if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') |
1071 | 439k | return 0; |
1072 | | |
1073 | | /* removing the last space from ob and rendering */ |
1074 | 1.32M | while (ob->size && ob->data[ob->size - 1] == ' ') |
1075 | 1.30M | ob->size--; |
1076 | | |
1077 | 25.9k | return doc->md.linebreak(ob, &doc->data) ? 1 : 0; |
1078 | 465k | } |
1079 | | |
1080 | | |
1081 | | /* char_codespan • '`' parsing a code span (assuming codespan != 0) */ |
1082 | | static size_t |
1083 | | char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1084 | 107k | { |
1085 | 107k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1086 | 107k | size_t end, nb = 0, i, f_begin, f_end; |
1087 | | |
1088 | | /* counting the number of backticks in the delimiter */ |
1089 | 3.71M | while (nb < size && data[nb] == '`') |
1090 | 3.60M | nb++; |
1091 | | |
1092 | | /* finding the next delimiter */ |
1093 | 107k | i = 0; |
1094 | 73.0M | for (end = nb; end < size && i < nb; end++) { |
1095 | 72.9M | if (data[end] == '`') { |
1096 | 421k | if (end + 1 == size || !is_escaped(data, end)) { |
1097 | 420k | i++; |
1098 | 420k | } else { |
1099 | 727 | i = 0; |
1100 | 727 | } |
1101 | 421k | } |
1102 | 72.5M | else i = 0; |
1103 | 72.9M | } |
1104 | | |
1105 | 107k | if (i < nb && end >= size) |
1106 | 84.2k | return 0; /* no matching delimiter */ |
1107 | | |
1108 | | /* trimming outside whitespace */ |
1109 | 23.6k | f_begin = nb; |
1110 | 256k | while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\n')) |
1111 | 233k | f_begin++; |
1112 | | |
1113 | 23.6k | f_end = end - nb; |
1114 | 160k | while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\n')) |
1115 | 137k | f_end--; |
1116 | | |
1117 | | /* real code span */ |
1118 | 23.6k | if (f_begin < f_end) { |
1119 | | /* needed for parse_attribute functions as buffer functions do not work with |
1120 | | * buffers made on the stack */ |
1121 | 20.5k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
1122 | | |
1123 | 20.5k | work.data = data + f_begin; |
1124 | 20.5k | work.size = f_end - f_begin; |
1125 | | |
1126 | 20.5k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1127 | 20.5k | end += parse_inline_attributes(data + end, size - end, attr, doc->attr_activation); |
1128 | 20.5k | } |
1129 | | |
1130 | 20.5k | if (!doc->md.codespan(ob, &work, attr, &doc->data)) |
1131 | 0 | end = 0; |
1132 | 20.5k | popbuf(doc, BUFFER_ATTRIBUTE); |
1133 | 20.5k | } else { |
1134 | 3.06k | if (!doc->md.codespan(ob, 0, 0, &doc->data)) |
1135 | 0 | end = 0; |
1136 | 3.06k | } |
1137 | | |
1138 | 23.6k | return end; |
1139 | 107k | } |
1140 | | |
1141 | | /* char_quote • '"' parsing a quote */ |
1142 | | static size_t |
1143 | | char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1144 | 272k | { |
1145 | 272k | size_t end, nq = 0, i, f_begin, f_end; |
1146 | | |
1147 | | /* counting the number of quotes in the delimiter */ |
1148 | 168M | while (nq < size && data[nq] == '"') |
1149 | 168M | nq++; |
1150 | | |
1151 | | /* finding the next delimiter */ |
1152 | 272k | end = nq; |
1153 | 670k | while (1) { |
1154 | 670k | i = end; |
1155 | 670k | end += find_emph_char(data + end, size - end, '"'); |
1156 | 670k | if (end == i) return 0; /* no matching delimiter */ |
1157 | 496k | i = end; |
1158 | 84.4M | while (end < size && data[end] == '"' && end - i < nq) end++; |
1159 | 496k | if (end - i >= nq) break; |
1160 | 496k | } |
1161 | | |
1162 | | /* trimming outside spaces */ |
1163 | 98.3k | f_begin = nq; |
1164 | 846k | while (f_begin < end && data[f_begin] == ' ') |
1165 | 747k | f_begin++; |
1166 | | |
1167 | 98.3k | f_end = end - nq; |
1168 | 711k | while (f_end > nq && data[f_end-1] == ' ') |
1169 | 613k | f_end--; |
1170 | | |
1171 | | /* real quote */ |
1172 | 98.3k | if (f_begin < f_end) { |
1173 | 91.3k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
1174 | 91.3k | parse_inline(work, doc, data + f_begin, f_end - f_begin); |
1175 | | |
1176 | 91.3k | if (!doc->md.quote(ob, work, &doc->data)) |
1177 | 1.66k | end = 0; |
1178 | 91.3k | popbuf(doc, BUFFER_SPAN); |
1179 | 91.3k | } else { |
1180 | 6.96k | if (!doc->md.quote(ob, 0, &doc->data)) |
1181 | 6.96k | end = 0; |
1182 | 6.96k | } |
1183 | | |
1184 | 98.3k | return end; |
1185 | 272k | } |
1186 | | |
1187 | | |
1188 | | /* char_escape • '\\' backslash escape */ |
1189 | | static size_t |
1190 | | char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1191 | 104k | { |
1192 | 104k | static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$"; |
1193 | 104k | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
1194 | 104k | size_t w; |
1195 | | |
1196 | 104k | if (size > 1) { |
1197 | 101k | if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) && |
1198 | 101k | size > 2 && (data[2] == '(' || data[2] == '[')) { |
1199 | 2.70k | const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)"; |
1200 | 2.70k | w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '['); |
1201 | 2.70k | if (w) return w; |
1202 | 2.70k | } |
1203 | | |
1204 | 101k | if (strchr(escape_chars, data[1]) == NULL) |
1205 | 47.7k | return 0; |
1206 | | |
1207 | 53.5k | if (doc->md.normal_text) { |
1208 | 53.5k | work.data = data + 1; |
1209 | 53.5k | work.size = 1; |
1210 | 53.5k | doc->is_escape_char = 1; |
1211 | 53.5k | doc->md.normal_text(ob, &work, &doc->data); |
1212 | 53.5k | doc->is_escape_char = 0; |
1213 | 53.5k | } |
1214 | 0 | else hoedown_buffer_putc(ob, data[1]); |
1215 | 53.5k | } else if (size == 1) { |
1216 | 2.57k | if (doc->md.normal_text) { |
1217 | 2.57k | work.data = data; |
1218 | 2.57k | work.size = 1; |
1219 | 2.57k | doc->md.normal_text(ob, &work, &doc->data); |
1220 | 2.57k | } |
1221 | 0 | else hoedown_buffer_putc(ob, data[0]); |
1222 | 2.57k | } |
1223 | | |
1224 | 56.1k | return 2; |
1225 | 104k | } |
1226 | | |
1227 | | /* char_entity • '&' escaped when it doesn't belong to an entity */ |
1228 | | /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */ |
1229 | | static size_t |
1230 | | char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1231 | 62.3k | { |
1232 | 62.3k | size_t end = 1; |
1233 | 62.3k | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
1234 | | |
1235 | 62.3k | if (end < size && data[end] == '#') |
1236 | 409 | end++; |
1237 | | |
1238 | 357k | while (end < size && isalnum(data[end])) |
1239 | 294k | end++; |
1240 | | |
1241 | 62.3k | if (end < size && data[end] == ';') |
1242 | 965 | end++; /* real entity */ |
1243 | 61.3k | else |
1244 | 61.3k | return 0; /* lone '&' */ |
1245 | | |
1246 | 965 | if (doc->md.entity) { |
1247 | 0 | work.data = data; |
1248 | 0 | work.size = end; |
1249 | 0 | doc->md.entity(ob, &work, &doc->data); |
1250 | 0 | } |
1251 | 965 | else hoedown_buffer_put(ob, data, end); |
1252 | | |
1253 | 965 | return end; |
1254 | 62.3k | } |
1255 | | |
1256 | | /* char_langle_tag • '<' when tags or autolinks are allowed */ |
1257 | | static size_t |
1258 | | char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1259 | 199k | { |
1260 | 199k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1261 | 199k | hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE; |
1262 | 199k | size_t end = tag_length(data, size, &altype, doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS); |
1263 | 199k | int ret = 0; |
1264 | | |
1265 | 199k | work.data = data; |
1266 | 199k | work.size = end; |
1267 | | |
1268 | 199k | if (end > 2) { |
1269 | 60.0k | if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) { |
1270 | 7.58k | hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN); |
1271 | 7.58k | work.data = data + 1; |
1272 | 7.58k | work.size = end - 2; |
1273 | 7.58k | unscape_text(u_link, &work); |
1274 | 7.58k | ret = doc->md.autolink(ob, u_link, altype, &doc->data); |
1275 | 7.58k | popbuf(doc, BUFFER_SPAN); |
1276 | 7.58k | } |
1277 | 52.4k | else if (doc->md.raw_html) |
1278 | 52.4k | ret = doc->md.raw_html(ob, &work, &doc->data); |
1279 | 60.0k | } |
1280 | | |
1281 | 199k | if (!ret) return 0; |
1282 | 60.0k | else return end; |
1283 | 199k | } |
1284 | | |
1285 | | static size_t |
1286 | | char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1287 | 461k | { |
1288 | 461k | hoedown_buffer *link, *link_url, *link_text; |
1289 | 461k | size_t link_len, rewind; |
1290 | | |
1291 | 461k | if (!doc->md.link || doc->in_link_body) |
1292 | 1.81k | return 0; |
1293 | | |
1294 | 459k | link = newbuf(doc, BUFFER_SPAN); |
1295 | | |
1296 | 459k | if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) { |
1297 | 23.0k | link_url = newbuf(doc, BUFFER_SPAN); |
1298 | 23.0k | HOEDOWN_BUFPUTSL(link_url, "http://"); |
1299 | 23.0k | hoedown_buffer_put(link_url, link->data, link->size); |
1300 | | |
1301 | 23.0k | if (ob->size > rewind) |
1302 | 17.2k | ob->size -= rewind; |
1303 | 5.79k | else |
1304 | 5.79k | ob->size = 0; |
1305 | | |
1306 | 23.0k | if (doc->md.normal_text) { |
1307 | 23.0k | link_text = newbuf(doc, BUFFER_SPAN); |
1308 | 23.0k | doc->md.normal_text(link_text, link, &doc->data); |
1309 | 23.0k | doc->md.link(ob, link_text, link_url, NULL, NULL, &doc->data); |
1310 | 23.0k | popbuf(doc, BUFFER_SPAN); |
1311 | 23.0k | } else { |
1312 | 0 | doc->md.link(ob, link, link_url, NULL, NULL, &doc->data); |
1313 | 0 | } |
1314 | 23.0k | popbuf(doc, BUFFER_SPAN); |
1315 | 23.0k | } |
1316 | | |
1317 | 459k | popbuf(doc, BUFFER_SPAN); |
1318 | 459k | return link_len; |
1319 | 461k | } |
1320 | | |
1321 | | static size_t |
1322 | | char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1323 | 87.9k | { |
1324 | 87.9k | hoedown_buffer *link; |
1325 | 87.9k | size_t link_len, rewind; |
1326 | | |
1327 | 87.9k | if (!doc->md.autolink || doc->in_link_body) |
1328 | 591 | return 0; |
1329 | | |
1330 | 87.3k | link = newbuf(doc, BUFFER_SPAN); |
1331 | | |
1332 | 87.3k | if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) { |
1333 | 1.50k | if (ob->size > rewind) |
1334 | 838 | ob->size -= rewind; |
1335 | 665 | else |
1336 | 665 | ob->size = 0; |
1337 | | |
1338 | 1.50k | doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data); |
1339 | 1.50k | } |
1340 | | |
1341 | 87.3k | popbuf(doc, BUFFER_SPAN); |
1342 | 87.3k | return link_len; |
1343 | 87.9k | } |
1344 | | |
1345 | | static size_t |
1346 | | char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1347 | 1.41M | { |
1348 | 1.41M | hoedown_buffer *link; |
1349 | 1.41M | size_t link_len, rewind; |
1350 | | |
1351 | 1.41M | if (!doc->md.autolink || doc->in_link_body) |
1352 | 7.69k | return 0; |
1353 | | |
1354 | 1.40M | link = newbuf(doc, BUFFER_SPAN); |
1355 | | |
1356 | 1.40M | if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) { |
1357 | 4.77k | if (ob->size > rewind) |
1358 | 4.52k | ob->size -= rewind; |
1359 | 245 | else |
1360 | 245 | ob->size = 0; |
1361 | | |
1362 | 4.77k | doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data); |
1363 | 4.77k | } |
1364 | | |
1365 | 1.40M | popbuf(doc, BUFFER_SPAN); |
1366 | 1.40M | return link_len; |
1367 | 1.41M | } |
1368 | | |
1369 | | static size_t |
1370 | 142k | char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) { |
1371 | 142k | size_t ret; |
1372 | | |
1373 | 142k | if (size < 2 || data[1] != '[') return 0; |
1374 | | |
1375 | 57.7k | ret = char_link(ob, doc, data + 1, offset + 1, size - 1); |
1376 | 57.7k | if (!ret) return 0; |
1377 | 8.21k | return ret + 1; |
1378 | 57.7k | } |
1379 | | |
1380 | | /* char_link • '[': parsing a link, a footnote or an image */ |
1381 | | static size_t |
1382 | | char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1383 | 772k | { |
1384 | 772k | int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1)); |
1385 | 772k | int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && size > 1 && data[1] == '^'); |
1386 | 772k | size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; |
1387 | 772k | hoedown_buffer *content = NULL; |
1388 | 772k | hoedown_buffer *link = NULL; |
1389 | 772k | hoedown_buffer *title = NULL; |
1390 | 772k | hoedown_buffer *u_link = NULL; |
1391 | 772k | hoedown_buffer *inline_attr = NULL; |
1392 | 772k | hoedown_buffer *ref_attr = NULL; |
1393 | 772k | hoedown_buffer *attr = NULL; |
1394 | 772k | hoedown_buffer *id = NULL; |
1395 | 772k | size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size; |
1396 | 772k | int ret = 0, in_title = 0, qtype = 0; |
1397 | 772k | hoedown_link_type link_type = HOEDOWN_LINK_NONE; |
1398 | 772k | int ref_attr_exists = 0, inline_attr_exists = 0; |
1399 | | |
1400 | | /* checking whether the correct renderer exists */ |
1401 | 772k | if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image) |
1402 | 772k | || (!is_img && !is_footnote && !doc->md.link)) |
1403 | 0 | goto cleanup; |
1404 | | |
1405 | | /* looking for the matching closing bracket */ |
1406 | 772k | i += find_emph_char(data + i, size - i, ']'); |
1407 | 772k | txt_e = i; |
1408 | | |
1409 | 772k | if (i < size && data[i] == ']') i++; |
1410 | 201k | else goto cleanup; |
1411 | | |
1412 | | /* footnote link */ |
1413 | 571k | if (is_footnote) { |
1414 | 34.4k | hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1415 | 34.4k | struct footnote_ref *fr; |
1416 | | |
1417 | 34.4k | if (txt_e < 3) |
1418 | 19.8k | goto cleanup; |
1419 | | |
1420 | 14.6k | id.data = data + 2; |
1421 | 14.6k | id.size = txt_e - 2; |
1422 | | |
1423 | 14.6k | fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size); |
1424 | | |
1425 | | /* mark footnote used */ |
1426 | 14.6k | if (fr && !fr->is_used) { |
1427 | 862 | if(!add_footnote_ref(&doc->footnotes_used, fr)) |
1428 | 0 | goto cleanup; |
1429 | 862 | fr->is_used = 1; |
1430 | 862 | fr->num = doc->footnotes_used.count; |
1431 | | |
1432 | | /* render */ |
1433 | 862 | if (doc->md.footnote_ref) { |
1434 | 862 | doc->link_id = &id; |
1435 | 862 | ret = doc->md.footnote_ref(ob, fr->num, &doc->data); |
1436 | 862 | doc->link_id = NULL; |
1437 | 862 | } |
1438 | 862 | } |
1439 | | |
1440 | 14.6k | goto cleanup; |
1441 | 14.6k | } |
1442 | | |
1443 | | /* skip any amount of spacing */ |
1444 | | /* (this is much more laxist than original markdown syntax) */ |
1445 | 2.00M | while (i < size && _isspace(data[i])) |
1446 | 1.47M | i++; |
1447 | | |
1448 | | /* inline style link */ |
1449 | 537k | if (i < size && data[i] == '(') { |
1450 | 43.7k | size_t nb_p; |
1451 | | |
1452 | 43.7k | link_type = HOEDOWN_LINK_INLINE; |
1453 | | |
1454 | | /* skipping initial spacing */ |
1455 | 43.7k | i++; |
1456 | | |
1457 | 934k | while (i < size && _isspace(data[i])) |
1458 | 890k | i++; |
1459 | | |
1460 | 43.7k | link_b = i; |
1461 | | |
1462 | | /* looking for link end: ' " ) */ |
1463 | | /* Count the number of open parenthesis */ |
1464 | 43.7k | nb_p = 0; |
1465 | | |
1466 | 26.7M | while (i < size) { |
1467 | 26.7M | if (data[i] == '\\') i += 2; |
1468 | 26.6M | else if (data[i] == '(' && i != 0) { |
1469 | 133k | nb_p++; i++; |
1470 | 133k | } |
1471 | 26.5M | else if (data[i] == ')') { |
1472 | 12.8k | if (nb_p == 0) break; |
1473 | 4.85k | nb_p--; i++; |
1474 | 26.5M | } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; |
1475 | 26.4M | else i++; |
1476 | 26.7M | } |
1477 | | |
1478 | 43.7k | if (i >= size) goto cleanup; |
1479 | 30.9k | link_e = i; |
1480 | | |
1481 | | /* looking for title end if present */ |
1482 | 30.9k | if (data[i] == '\'' || data[i] == '"') { |
1483 | 22.8k | qtype = data[i]; |
1484 | 22.8k | in_title = 1; |
1485 | 22.8k | i++; |
1486 | 22.8k | title_b = i; |
1487 | | |
1488 | 514M | while (i < size) { |
1489 | 514M | if (data[i] == '\\') i += 2; |
1490 | 514M | else if (data[i] == qtype) {in_title = 0; i++;} |
1491 | 514M | else if ((data[i] == ')') && !in_title) break; |
1492 | 514M | else i++; |
1493 | 514M | } |
1494 | | |
1495 | 22.8k | if (i >= size) goto cleanup; |
1496 | | |
1497 | | /* skipping spacing after title */ |
1498 | 11.7k | title_e = i - 1; |
1499 | 341k | while (title_e > title_b && _isspace(data[title_e])) |
1500 | 329k | title_e--; |
1501 | | |
1502 | | /* checking for closing quote presence */ |
1503 | 11.7k | if (data[title_e] != '\'' && data[title_e] != '"') { |
1504 | 3.15k | title_b = title_e = 0; |
1505 | 3.15k | link_e = i; |
1506 | 3.15k | } |
1507 | 11.7k | } |
1508 | | |
1509 | | /* remove spacing at the end of the link */ |
1510 | 279k | while (link_e > link_b && _isspace(data[link_e - 1])) |
1511 | 259k | link_e--; |
1512 | | |
1513 | | /* remove optional angle brackets around the link */ |
1514 | 19.7k | if (data[link_b] == '<' && data[link_e - 1] == '>') { |
1515 | 580 | link_b++; |
1516 | 580 | link_e--; |
1517 | 580 | } |
1518 | | |
1519 | | /* building escaped link and title */ |
1520 | 19.7k | if (link_e > link_b) { |
1521 | 4.94k | link = newbuf(doc, BUFFER_SPAN); |
1522 | 4.94k | hoedown_buffer_put(link, data + link_b, link_e - link_b); |
1523 | 4.94k | } |
1524 | | |
1525 | 19.7k | if (title_e > title_b) { |
1526 | 2.23k | title = newbuf(doc, BUFFER_SPAN); |
1527 | 2.23k | hoedown_buffer_put(title, data + title_b, title_e - title_b); |
1528 | 2.23k | } |
1529 | | |
1530 | 19.7k | i++; |
1531 | 19.7k | } |
1532 | | |
1533 | | /* reference style link */ |
1534 | 493k | else if (i < size && data[i] == '[') { |
1535 | 59.6k | struct link_ref *lr; |
1536 | | |
1537 | 59.6k | id = newbuf(doc, BUFFER_SPAN); |
1538 | | |
1539 | | /* looking for the id */ |
1540 | 59.6k | i++; |
1541 | 59.6k | link_b = i; |
1542 | 10.8M | while (i < size && data[i] != ']') i++; |
1543 | 59.6k | if (i >= size) goto cleanup; |
1544 | 55.1k | link_e = i; |
1545 | | |
1546 | | /* finding the link_ref */ |
1547 | 55.1k | if (link_b == link_e) { |
1548 | 27.9k | link_type = HOEDOWN_LINK_EMPTY_REFERENCE; |
1549 | 27.9k | replace_spacing(id, data + 1, txt_e - 1); |
1550 | 27.9k | } else { |
1551 | 27.1k | link_type = HOEDOWN_LINK_REFERENCE; |
1552 | 27.1k | hoedown_buffer_put(id, data + link_b, link_e - link_b); |
1553 | 27.1k | } |
1554 | | |
1555 | 55.1k | lr = find_link_ref(doc->refs, id->data, id->size); |
1556 | 55.1k | if (!lr) |
1557 | 50.3k | goto cleanup; |
1558 | | |
1559 | | /* keeping link and title from link_ref */ |
1560 | 4.86k | link = lr->link; |
1561 | 4.86k | title = lr->title; |
1562 | 4.86k | ref_attr = lr->attr; |
1563 | 4.86k | i++; |
1564 | 4.86k | } |
1565 | | |
1566 | | /* shortcut reference style link */ |
1567 | 433k | else { |
1568 | 433k | struct link_ref *lr; |
1569 | | |
1570 | 433k | id = newbuf(doc, BUFFER_SPAN); |
1571 | | |
1572 | 433k | link_type = HOEDOWN_LINK_SHORTCUT; |
1573 | | |
1574 | | /* crafting the id */ |
1575 | 433k | replace_spacing(id, data + 1, txt_e - 1); |
1576 | | |
1577 | | /* finding the link_ref */ |
1578 | 433k | lr = find_link_ref(doc->refs, id->data, id->size); |
1579 | 433k | if (!lr) |
1580 | 174k | goto cleanup; |
1581 | | |
1582 | | /* keeping link and title from link_ref */ |
1583 | 259k | link = lr->link; |
1584 | 259k | title = lr->title; |
1585 | 259k | ref_attr = lr->attr; |
1586 | | |
1587 | | /* rewinding the spacing */ |
1588 | 259k | i = txt_e + 1; |
1589 | 259k | } |
1590 | | |
1591 | | /* building content: img alt is kept, only link content is parsed */ |
1592 | 283k | if (txt_e > 1) { |
1593 | 24.0k | content = newbuf(doc, BUFFER_SPAN); |
1594 | 24.0k | if (is_img) { |
1595 | 13.8k | hoedown_buffer_put(content, data + 1, txt_e - 1); |
1596 | 13.8k | } else { |
1597 | | /* disable autolinking when parsing inline the |
1598 | | * content of a link */ |
1599 | 10.1k | doc->in_link_body = 1; |
1600 | 10.1k | parse_inline(content, doc, data + 1, txt_e - 1); |
1601 | 10.1k | doc->in_link_body = 0; |
1602 | 10.1k | } |
1603 | 24.0k | } |
1604 | | |
1605 | 283k | if (link) { |
1606 | 269k | u_link = newbuf(doc, BUFFER_SPAN); |
1607 | 269k | unscape_text(u_link, link); |
1608 | 269k | } |
1609 | | |
1610 | | /* if special attributes are enabled, attempt to parse an inline one from |
1611 | | * the link */ |
1612 | 283k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1613 | | /* attr is a span because cleanup code depends on it being span */ |
1614 | 283k | inline_attr = newbuf(doc, BUFFER_SPAN); |
1615 | 283k | i += parse_inline_attributes(data + i, size - i, inline_attr, doc->attr_activation); |
1616 | 283k | } |
1617 | | |
1618 | | /* remove optional < and > around inline and ref special attributes */ |
1619 | 283k | if (ref_attr && ref_attr->size > 0) { |
1620 | 29.6k | if (ref_attr->size > 1) { |
1621 | 6.11k | if (ref_attr->data[0] == '<') { |
1622 | 244 | hoedown_buffer_slurp(ref_attr, 1); |
1623 | 244 | } |
1624 | 6.11k | if (ref_attr->data[ref_attr->size - 1] == '>') { |
1625 | 81 | ref_attr->size--; |
1626 | 81 | } |
1627 | 6.11k | } |
1628 | 29.6k | } |
1629 | 283k | if (inline_attr && inline_attr->size > 0) { |
1630 | 4.43k | if (inline_attr->size > 1) { |
1631 | 3.51k | if (inline_attr->data[0] == '<') { |
1632 | 585 | hoedown_buffer_slurp(inline_attr, 1); |
1633 | 585 | } |
1634 | 3.51k | if (inline_attr->data[inline_attr->size - 1] == '>') { |
1635 | 221 | inline_attr->size--; |
1636 | 221 | } |
1637 | 3.51k | } |
1638 | 4.43k | } |
1639 | | |
1640 | | /* construct the final attr that is actually applied to the link */ |
1641 | 283k | ref_attr_exists = ref_attr && ref_attr->size > 0; |
1642 | 283k | inline_attr_exists = inline_attr && inline_attr->size > 0; |
1643 | 283k | if (ref_attr_exists || inline_attr_exists) { |
1644 | 33.4k | attr = newbuf(doc, BUFFER_SPAN); |
1645 | 33.4k | if (ref_attr_exists) { |
1646 | 29.6k | hoedown_buffer_put(attr, ref_attr->data, ref_attr->size); |
1647 | 29.6k | } |
1648 | | /* if both inline and ref attrs exist, join them with a space to prevent |
1649 | | * conflicts */ |
1650 | 33.4k | if (ref_attr_exists && inline_attr_exists) { |
1651 | 669 | hoedown_buffer_putc(attr, ' '); |
1652 | 669 | } |
1653 | 33.4k | if (inline_attr_exists) { |
1654 | 4.42k | hoedown_buffer_put(attr, inline_attr->data, inline_attr->size); |
1655 | 4.42k | } |
1656 | 33.4k | } |
1657 | | |
1658 | | /* calling the relevant rendering function */ |
1659 | 283k | doc->link_id = id; |
1660 | 283k | doc->link_type = link_type; |
1661 | 283k | doc->link_ref_attr = ref_attr; |
1662 | 283k | doc->link_inline_attr = inline_attr; |
1663 | 283k | if (is_img) { |
1664 | 24.9k | ret = doc->md.image(ob, u_link, title, content, attr, &doc->data); |
1665 | 259k | } else { |
1666 | 259k | ret = doc->md.link(ob, content, u_link, title, attr, &doc->data); |
1667 | 259k | } |
1668 | 283k | doc->link_inline_attr = NULL; |
1669 | 283k | doc->link_ref_attr = NULL; |
1670 | 283k | doc->link_type = HOEDOWN_LINK_NONE; |
1671 | 283k | doc->link_id = NULL; |
1672 | | |
1673 | | /* cleanup */ |
1674 | 772k | cleanup: |
1675 | 772k | doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size; |
1676 | 772k | return ret ? i : 0; |
1677 | 283k | } |
1678 | | |
1679 | | static size_t |
1680 | | char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1681 | 266k | { |
1682 | 266k | size_t sup_start, sup_len; |
1683 | 266k | hoedown_buffer *sup; |
1684 | | |
1685 | 266k | if (!doc->md.superscript) |
1686 | 0 | return 0; |
1687 | | |
1688 | 266k | if (size < 2) |
1689 | 8.33k | return 0; |
1690 | | |
1691 | 257k | if (data[1] == '(') { |
1692 | 27.0k | sup_start = 2; |
1693 | 27.0k | sup_len = find_emph_char(data + 2, size - 2, ')') + 2; |
1694 | | |
1695 | 27.0k | if (sup_len == size) |
1696 | 4.11k | return 0; |
1697 | 230k | } else { |
1698 | 230k | sup_start = sup_len = 1; |
1699 | | |
1700 | 6.54M | while (sup_len < size && !_isspace(data[sup_len])) |
1701 | 6.31M | sup_len++; |
1702 | 230k | } |
1703 | | |
1704 | 253k | if (sup_len - sup_start == 0) |
1705 | 23.2k | return (sup_start == 2) ? 3 : 0; |
1706 | | |
1707 | 230k | sup = newbuf(doc, BUFFER_SPAN); |
1708 | 230k | parse_inline(sup, doc, data + sup_start, sup_len - sup_start); |
1709 | 230k | doc->md.superscript(ob, sup, &doc->data); |
1710 | 230k | popbuf(doc, BUFFER_SPAN); |
1711 | | |
1712 | 230k | return (sup_start == 2) ? sup_len + 1 : sup_len; |
1713 | 253k | } |
1714 | | |
1715 | | static size_t |
1716 | | char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1717 | 31.9k | { |
1718 | | /* double dollar */ |
1719 | 31.9k | if (size > 1 && data[1] == '$') |
1720 | 6.45k | return parse_math(ob, doc, data, offset, size, "$$", 2, 1); |
1721 | | |
1722 | | /* single dollar allowed only with MATH_EXPLICIT flag */ |
1723 | 25.4k | if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT) |
1724 | 25.4k | return parse_math(ob, doc, data, offset, size, "$", 1, 0); |
1725 | | |
1726 | 0 | return 0; |
1727 | 25.4k | } |
1728 | | |
1729 | | /********************************* |
1730 | | * BLOCK-LEVEL PARSING FUNCTIONS * |
1731 | | *********************************/ |
1732 | | |
1733 | | /* is_empty • returns the line length when it is empty, 0 otherwise */ |
1734 | | static size_t |
1735 | | is_empty(const uint8_t *data, size_t size) |
1736 | 8.78M | { |
1737 | 8.78M | size_t i; |
1738 | | |
1739 | 18.2M | for (i = 0; i < size && data[i] != '\n'; i++) |
1740 | 17.4M | if (data[i] != ' ') |
1741 | 7.96M | return 0; |
1742 | | |
1743 | 816k | return i + 1; |
1744 | 8.78M | } |
1745 | | |
1746 | | /* is_hrule • returns whether a line is a horizontal rule */ |
1747 | | static int |
1748 | | is_hrule(uint8_t *data, size_t size) |
1749 | 6.32M | { |
1750 | 6.32M | size_t i = 0, n = 0; |
1751 | 6.32M | uint8_t c; |
1752 | | |
1753 | | /* skipping initial spaces */ |
1754 | 6.32M | if (size < 3) return 0; |
1755 | 6.28M | if (data[0] == ' ') { i++; |
1756 | 251k | if (data[1] == ' ') { i++; |
1757 | 101k | if (data[2] == ' ') { i++; } } } |
1758 | | |
1759 | | /* looking at the hrule uint8_t */ |
1760 | 6.28M | if (i + 2 >= size |
1761 | 6.28M | || (data[i] != '*' && data[i] != '-' && data[i] != '_')) |
1762 | 3.14M | return 0; |
1763 | 3.13M | c = data[i]; |
1764 | | |
1765 | | /* the whole line must be the char or space */ |
1766 | 8.03M | while (i < size && data[i] != '\n') { |
1767 | 5.24M | if (data[i] == c) n++; |
1768 | 1.77M | else if (data[i] != ' ') |
1769 | 346k | return 0; |
1770 | | |
1771 | 4.89M | i++; |
1772 | 4.89M | } |
1773 | | |
1774 | 2.79M | return n >= 3; |
1775 | 3.13M | } |
1776 | | |
1777 | | /* check if a line is a code fence; return the |
1778 | | * end of the code fence. if passed, width of |
1779 | | * the fence rule and character will be returned */ |
1780 | | static size_t |
1781 | | is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr) |
1782 | 9.79M | { |
1783 | 9.79M | size_t i = 0, n = 1, j; |
1784 | 9.79M | uint8_t c; |
1785 | | |
1786 | | /* skipping initial spaces */ |
1787 | 9.79M | if (size < 3) |
1788 | 6.23M | return 0; |
1789 | | |
1790 | 3.56M | if (data[0] == ' ') { i++; |
1791 | 327k | if (data[1] == ' ') { i++; |
1792 | 213k | if (data[2] == ' ') { i++; } } } |
1793 | | |
1794 | | /* looking at the hrule uint8_t */ |
1795 | 3.56M | if (i + 2 >= size) |
1796 | 91.5k | return 0; |
1797 | 3.47M | c = data[i]; |
1798 | 3.47M | if (!(c=='~' || c=='`')) |
1799 | 3.39M | return 0; |
1800 | | |
1801 | | /* the fence must be that same character */ |
1802 | 301k | while (++i < size && data[i] == c) |
1803 | 227k | ++n; |
1804 | | |
1805 | 73.6k | if (n < 3) |
1806 | 31.1k | return 0; |
1807 | | |
1808 | 4.30M | for (j = i; j < size && data[j] != '\n'; ++j) { |
1809 | 4.26M | if (data[j] == c) { |
1810 | | /* Avoid parsing codespan as fence. */ |
1811 | 4.02k | return 0; |
1812 | 4.02k | } |
1813 | 4.26M | } |
1814 | | |
1815 | 38.4k | if (width) *width = n; |
1816 | 38.4k | if (chr) *chr = c; |
1817 | 38.4k | return i; |
1818 | 42.4k | } |
1819 | | |
1820 | | /* expects single line, checks if it's a codefence and extracts language */ |
1821 | | static int |
1822 | | parse_codefence(hoedown_document *doc, uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr, unsigned int flags, hoedown_buffer *attr) |
1823 | 5.58M | { |
1824 | 5.58M | size_t i, w, lang_start, attr_start = 0; |
1825 | | |
1826 | 5.58M | i = w = is_codefence(data, size, width, chr); |
1827 | 5.58M | if (i == 0) |
1828 | 5.57M | return 0; |
1829 | | |
1830 | 123k | while (i < size && _isspace(data[i])) |
1831 | 109k | i++; |
1832 | | |
1833 | 13.5k | lang_start = i; |
1834 | | |
1835 | 13.5k | if (flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1836 | 13.5k | attr_start = i + parse_attributes(data + i, size - i, attr, NULL, "", 0, doc->attr_activation); |
1837 | 69.0k | while (i < attr_start) { |
1838 | 56.8k | if (_isspace(data[i])) { |
1839 | 1.38k | break; |
1840 | 1.38k | } |
1841 | 55.5k | i++; |
1842 | 55.5k | } |
1843 | 13.5k | } else { |
1844 | 0 | while (i < size && !_isspace(data[i])) |
1845 | 0 | i++; |
1846 | 0 | } |
1847 | | |
1848 | 13.5k | lang->data = data + lang_start; |
1849 | 13.5k | lang->size = i - lang_start; |
1850 | | |
1851 | 13.5k | return w; |
1852 | 5.58M | } |
1853 | | |
1854 | | /* is_atxheader • returns whether the line is a hash-prefixed header */ |
1855 | | static int |
1856 | | is_atxheader(hoedown_document *doc, uint8_t *data, size_t size) |
1857 | 6.37M | { |
1858 | 6.37M | size_t level = 0, begin = 0, len; |
1859 | 6.37M | uint8_t *p; |
1860 | | |
1861 | 6.37M | if (data[0] != '#') |
1862 | 6.26M | return 0; |
1863 | | |
1864 | 231k | while (level < size && level < 6 && data[level] == '#') |
1865 | 127k | level++; |
1866 | | |
1867 | 104k | if (level >= size || data[level] == '\n') { |
1868 | 33.2k | return 0; |
1869 | 33.2k | } |
1870 | | |
1871 | 70.9k | len = size - level; |
1872 | 70.9k | p = memchr(data + level, '\n', len); |
1873 | 70.9k | if (p) { |
1874 | 68.6k | len = p - (data + level) + 1; |
1875 | 68.6k | } |
1876 | | |
1877 | | /* if the header is only whitespace, it is not a header */ |
1878 | 70.9k | if (len && is_empty_all(data + level, len)) { |
1879 | 1.34k | return 0; |
1880 | 1.34k | } |
1881 | | |
1882 | 69.5k | if ((doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) && level < size && data[level] != ' ') { |
1883 | 13.2k | return 0; |
1884 | 13.2k | } |
1885 | | |
1886 | | /* if the header is only special attribute, it is not a header */ |
1887 | 56.3k | if (len && (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) { |
1888 | 56.3k | p = memchr(data + level, '{', len); |
1889 | 56.3k | if (p) { |
1890 | | /* get number of characters from # to { */ |
1891 | 16.4k | begin = p - (data + level); |
1892 | 16.4k | if (begin > 0 && !is_empty_all(data + level, begin)) { |
1893 | 3.07k | return 1; |
1894 | 3.07k | } |
1895 | | /* check for special attributes after the # */ |
1896 | 13.3k | return !parse_inline_attributes(data + level + begin, len - begin, NULL, doc->attr_activation); |
1897 | 16.4k | } |
1898 | 56.3k | } |
1899 | | |
1900 | 39.8k | return 1; |
1901 | 56.3k | } |
1902 | | |
1903 | | /* is_headerline • returns whether the line is a setext-style hdr underline */ |
1904 | | static int |
1905 | | is_headerline(uint8_t *data, size_t size) |
1906 | 1.41M | { |
1907 | 1.41M | size_t i = 0; |
1908 | | |
1909 | | /* test of level 1 header */ |
1910 | 1.41M | if (data[i] == '=') { |
1911 | 84.5k | for (i = 1; i < size && data[i] == '='; i++); |
1912 | 348k | while (i < size && data[i] == ' ') i++; |
1913 | 80.1k | return (i >= size || data[i] == '\n') ? 1 : 0; } |
1914 | | |
1915 | | /* test of level 2 header */ |
1916 | 1.33M | if (data[i] == '-') { |
1917 | 409k | for (i = 1; i < size && data[i] == '-'; i++); |
1918 | 481k | while (i < size && data[i] == ' ') i++; |
1919 | 284k | return (i >= size || data[i] == '\n') ? 2 : 0; } |
1920 | | |
1921 | 1.04M | return 0; |
1922 | 1.33M | } |
1923 | | |
1924 | | static int |
1925 | | is_next_headerline(uint8_t *data, size_t size) |
1926 | 962k | { |
1927 | 962k | size_t i = 0; |
1928 | | |
1929 | 45.2M | while (i < size && data[i] != '\n') |
1930 | 44.3M | i++; |
1931 | | |
1932 | 962k | if (++i >= size) |
1933 | 310k | return 0; |
1934 | | |
1935 | 652k | return is_headerline(data + i, size - i); |
1936 | 962k | } |
1937 | | |
1938 | | /* prefix_quote • returns blockquote prefix length */ |
1939 | | static size_t |
1940 | | prefix_quote(uint8_t *data, size_t size) |
1941 | 6.38M | { |
1942 | 6.38M | size_t i = 0; |
1943 | 6.38M | if (i < size && data[i] == ' ') i++; |
1944 | 6.38M | if (i < size && data[i] == ' ') i++; |
1945 | 6.38M | if (i < size && data[i] == ' ') i++; |
1946 | | |
1947 | 6.38M | if (i < size && data[i] == '>') { |
1948 | 74.4k | if (i + 1 < size && data[i + 1] == ' ') |
1949 | 3.03k | return i + 2; |
1950 | | |
1951 | 71.4k | return i + 1; |
1952 | 74.4k | } |
1953 | | |
1954 | 6.31M | return 0; |
1955 | 6.38M | } |
1956 | | |
1957 | | /* prefix_code • returns prefix length for block code*/ |
1958 | | static size_t |
1959 | | prefix_code(uint8_t *data, size_t size) |
1960 | 0 | { |
1961 | 0 | if (size > 3 && data[0] == ' ' && data[1] == ' ' |
1962 | 0 | && data[2] == ' ' && data[3] == ' ') return 4; |
1963 | | |
1964 | 0 | return 0; |
1965 | 0 | } |
1966 | | |
1967 | | /* prefix_oli • returns ordered list item prefix */ |
1968 | | static size_t |
1969 | | prefix_oli(uint8_t *data, size_t size) |
1970 | 6.14M | { |
1971 | 6.14M | size_t i = 0; |
1972 | | |
1973 | 6.14M | if (i < size && data[i] == ' ') i++; |
1974 | 6.14M | if (i < size && data[i] == ' ') i++; |
1975 | 6.14M | if (i < size && data[i] == ' ') i++; |
1976 | | |
1977 | 6.14M | if (i >= size || data[i] < '0' || data[i] > '9') |
1978 | 5.81M | return 0; |
1979 | | |
1980 | 689k | while (i < size && data[i] >= '0' && data[i] <= '9') |
1981 | 361k | i++; |
1982 | | |
1983 | 327k | if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') |
1984 | 58.2k | return 0; |
1985 | | |
1986 | 269k | if (is_next_headerline(data + i, size - i)) |
1987 | 3.68k | return 0; |
1988 | | |
1989 | 265k | return i + 2; |
1990 | 269k | } |
1991 | | |
1992 | | /* prefix_uli • returns unordered list item prefix */ |
1993 | | static size_t |
1994 | | prefix_uli(uint8_t *data, size_t size) |
1995 | 6.43M | { |
1996 | 6.43M | size_t i = 0; |
1997 | | |
1998 | 6.43M | if (i < size && data[i] == ' ') i++; |
1999 | 6.43M | if (i < size && data[i] == ' ') i++; |
2000 | 6.43M | if (i < size && data[i] == ' ') i++; |
2001 | | |
2002 | 6.43M | if (i + 1 >= size || |
2003 | 6.43M | (data[i] != '*' && data[i] != '+' && data[i] != '-') || |
2004 | 6.43M | data[i + 1] != ' ') |
2005 | 5.94M | return 0; |
2006 | | |
2007 | 490k | if (is_next_headerline(data + i, size - i)) |
2008 | 64.3k | return 0; |
2009 | | |
2010 | 425k | return i + 2; |
2011 | 490k | } |
2012 | | |
2013 | | /* prefix_dt • returns dictionary definition prefix |
2014 | | * this is in the form of /\s{0,3}:/ (e.g. " :", where spacing is optional) */ |
2015 | | static size_t |
2016 | | prefix_dt(uint8_t *data, size_t size) |
2017 | 5.93M | { |
2018 | 5.93M | size_t i = 0; |
2019 | | |
2020 | | /* skip up to 3 whitespaces (since it's an indented codeblock at 4) */ |
2021 | 5.93M | if (i < size && data[i] == ' ') i++; |
2022 | 5.93M | if (i < size && data[i] == ' ') i++; |
2023 | 5.93M | if (i < size && data[i] == ' ') i++; |
2024 | | |
2025 | | /* if the first character after whitespaces isn't :, it isn't a dt */ |
2026 | 5.93M | if (i + 1 >= size || |
2027 | 5.93M | data[i] != ':' || |
2028 | 5.93M | data[i + 1] != ' ') |
2029 | 5.73M | return 0; |
2030 | | |
2031 | 203k | if (is_next_headerline(data + i, size - i)) |
2032 | 35.9k | return 0; |
2033 | | |
2034 | 167k | return i + 2; |
2035 | 203k | } |
2036 | | |
2037 | | /* is_paragraph • returns if the next block is a paragraph (doesn't follow any |
2038 | | * other special rules for other types of blocks) */ |
2039 | | static int |
2040 | | is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end); |
2041 | | |
2042 | | /* prefix_dli • returns dictionary definition prefix |
2043 | | * a dli looks like a block of text, followed by optional whitespace, followed |
2044 | | * by another block with : as the first non-whitespace character */ |
2045 | | static size_t |
2046 | | prefix_dli(hoedown_document *doc, uint8_t *data, size_t size) |
2047 | 341k | { |
2048 | | /* end is to keep track of the final return value */ |
2049 | 341k | size_t i = 0, j = 0, end = 0; |
2050 | 341k | int empty = 0; |
2051 | | |
2052 | | /* if the first line has a : in front of it, it can't be a definition list |
2053 | | * that starts at this point */ |
2054 | 341k | if (prefix_dt(data, size)) { |
2055 | 3.04k | return 0; |
2056 | 3.04k | } |
2057 | | |
2058 | | /* temporarily toggle definition lists off to prevent infinite loops */ |
2059 | 338k | doc->ext_flags &= ~HOEDOWN_EXT_DEFINITION_LISTS; |
2060 | | |
2061 | | /* check if it is a block of text with no double newlines inside, followed by |
2062 | | * another block of text starting with : */ |
2063 | 5.36M | while (i < size) { |
2064 | | /* if the line we are on is empty, flip the empty flag to indicate that |
2065 | | * the next block of text we see has to start with : to be considered |
2066 | | * a definition list; then skip to the next line */ |
2067 | 5.29M | j = is_empty(data + i, size - i); |
2068 | 5.29M | if(j != 0) { |
2069 | 201k | empty = 1; |
2070 | 201k | i += j; |
2071 | 201k | continue; |
2072 | 201k | } |
2073 | | |
2074 | | /* if anything special is found while parsing the definition term part, |
2075 | | * then return so that the main loop can deal with it */ |
2076 | 5.09M | if (!is_paragraph(doc, data + i, size - i)) { |
2077 | 119k | break; |
2078 | 119k | } |
2079 | | |
2080 | | /* check if the current line starts with :, returning the position of the |
2081 | | * beginning of the line if it does */ |
2082 | 4.97M | j = prefix_dt(data + i, size - i); |
2083 | 4.97M | if (j > 0) { |
2084 | 49.0k | end = i; |
2085 | 49.0k | break; |
2086 | 4.92M | } else if(empty) { |
2087 | | /* if an empty newline has been found, then since : was not the first |
2088 | | * character after whitespaces, it can't be a definition list */ |
2089 | 101k | break; |
2090 | 101k | } |
2091 | | /* scan characters until the next newline */ |
2092 | 194M | for (i = i + 1; i < size && data[i - 1] != '\n'; i++); |
2093 | 4.82M | } |
2094 | | |
2095 | 338k | doc->ext_flags |= HOEDOWN_EXT_DEFINITION_LISTS; |
2096 | 338k | return end; |
2097 | 341k | } |
2098 | | |
2099 | | /* parse_block • parsing of one block, returning next uint8_t to parse */ |
2100 | | static void parse_block(hoedown_buffer *ob, hoedown_document *doc, |
2101 | | uint8_t *data, size_t size); |
2102 | | |
2103 | | |
2104 | | /* parse_blockquote • handles parsing of a blockquote fragment */ |
2105 | | static size_t |
2106 | | parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2107 | 27.5k | { |
2108 | 27.5k | size_t beg, end = 0, pre, work_size = 0; |
2109 | 27.5k | uint8_t *work_data = 0; |
2110 | 27.5k | hoedown_buffer *out = 0; |
2111 | | |
2112 | 27.5k | doc->blockquote_depth++; |
2113 | | |
2114 | 27.5k | out = newbuf(doc, BUFFER_BLOCK); |
2115 | 27.5k | beg = 0; |
2116 | 264k | while (beg < size) { |
2117 | 21.9M | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); |
2118 | | |
2119 | 255k | pre = prefix_quote(data + beg, end - beg); |
2120 | | |
2121 | 255k | if (pre) |
2122 | 31.4k | beg += pre; /* skipping prefix */ |
2123 | | |
2124 | | /* empty line finished */ |
2125 | 223k | else if ((doc->ext_flags & HOEDOWN_EXT_BLOCKQUOTE_EMPTY_LINE) && |
2126 | 223k | (is_empty(data + beg, end - beg))) |
2127 | 17.7k | break; |
2128 | | |
2129 | | /* empty line followed by non-quote line */ |
2130 | 205k | else if (is_empty(data + beg, end - beg) && |
2131 | 205k | (end >= size || (prefix_quote(data + end, size - end) == 0 && |
2132 | 0 | !is_empty(data + end, size - end)))) |
2133 | 0 | break; |
2134 | | |
2135 | 237k | if (beg < end) { /* copy into the in-place working buffer */ |
2136 | | /* hoedown_buffer_put(work, data + beg, end - beg); */ |
2137 | 234k | if (!work_data) |
2138 | 24.7k | work_data = data + beg; |
2139 | 209k | else if (data + beg != work_data + work_size) |
2140 | 46.0k | memmove(work_data + work_size, data + beg, end - beg); |
2141 | 234k | work_size += end - beg; |
2142 | 234k | } |
2143 | 237k | beg = end; |
2144 | 237k | } |
2145 | | |
2146 | 27.5k | parse_block(out, doc, work_data, work_size); |
2147 | 27.5k | if (doc->md.blockquote) |
2148 | 27.5k | doc->md.blockquote(ob, out, &doc->data); |
2149 | 27.5k | popbuf(doc, BUFFER_BLOCK); |
2150 | | |
2151 | 27.5k | doc->blockquote_depth--; |
2152 | | |
2153 | 27.5k | return end; |
2154 | 27.5k | } |
2155 | | |
2156 | | static size_t |
2157 | | parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render); |
2158 | | |
2159 | | /* parse_paragraph • handles parsing of a regular paragraph */ |
2160 | | static size_t |
2161 | | parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2162 | 282k | { |
2163 | 282k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
2164 | 282k | size_t i = 0, end = 0; |
2165 | 282k | int level = 0; |
2166 | | |
2167 | 282k | work.data = data; |
2168 | | |
2169 | 865k | while (i < size) { |
2170 | 151M | for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; |
2171 | | |
2172 | 826k | if (is_empty(data + i, size - i)) |
2173 | 66.9k | break; |
2174 | | |
2175 | 759k | if ((level = is_headerline(data + i, size - i)) != 0) { |
2176 | 159k | if (i == 0) { |
2177 | 90.1k | level = 0; |
2178 | 90.1k | i = end; |
2179 | 90.1k | } |
2180 | 159k | break; |
2181 | 159k | } |
2182 | | |
2183 | 600k | if (is_atxheader(doc, data + i, size - i) || |
2184 | 600k | is_hrule(data + i, size - i) || |
2185 | 600k | prefix_quote(data + i, size - i)) { |
2186 | 16.6k | end = i; |
2187 | 16.6k | break; |
2188 | 16.6k | } |
2189 | | |
2190 | 583k | i = end; |
2191 | 583k | } |
2192 | | |
2193 | 282k | work.size = i; |
2194 | 553k | while (work.size && data[work.size - 1] == '\n') |
2195 | 271k | work.size--; |
2196 | | |
2197 | 282k | if (!level) { |
2198 | 212k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2199 | 212k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2200 | 212k | parse_attributes(work.data, work.size, NULL, attr, "paragraph", 1, doc->attr_activation); |
2201 | 212k | if (attr->size > 0) { |
2202 | | /* remove the length of the attribute from the work size - the 12 comes |
2203 | | * from the leading space (1), the paragraph (9), the @ symbol (1), and |
2204 | | * the {} (2) (any extra spaces in the attribute are included inside |
2205 | | * the attribute) */ |
2206 | 0 | work.size -= attr->size + 12; |
2207 | 0 | } |
2208 | 212k | } |
2209 | | |
2210 | 212k | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
2211 | 212k | parse_inline(tmp, doc, work.data, work.size); |
2212 | 212k | if (doc->md.paragraph) |
2213 | 212k | doc->md.paragraph(ob, tmp, attr, &doc->data); |
2214 | 212k | popbuf(doc, BUFFER_BLOCK); |
2215 | 212k | popbuf(doc, BUFFER_ATTRIBUTE); |
2216 | 212k | } else { |
2217 | 69.3k | hoedown_buffer *header_work; |
2218 | 69.3k | hoedown_buffer *attr_work; |
2219 | 69.3k | size_t len; |
2220 | | |
2221 | 69.3k | if (work.size) { |
2222 | 69.3k | size_t beg; |
2223 | 69.3k | i = work.size; |
2224 | 69.3k | work.size -= 1; |
2225 | | |
2226 | 9.59M | while (work.size && data[work.size] != '\n') |
2227 | 9.52M | work.size -= 1; |
2228 | | |
2229 | 69.3k | beg = work.size + 1; |
2230 | 69.3k | while (work.size && data[work.size - 1] == '\n') |
2231 | 0 | work.size -= 1; |
2232 | | |
2233 | 69.3k | if (work.size > 0) { |
2234 | 27.4k | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
2235 | 27.4k | parse_inline(tmp, doc, work.data, work.size); |
2236 | | |
2237 | 27.4k | if (doc->md.paragraph) |
2238 | 27.4k | doc->md.paragraph(ob, tmp, NULL, &doc->data); |
2239 | | |
2240 | 27.4k | popbuf(doc, BUFFER_BLOCK); |
2241 | 27.4k | work.data += beg; |
2242 | 27.4k | work.size = i - beg; |
2243 | 27.4k | } |
2244 | 41.8k | else work.size = i; |
2245 | 69.3k | } |
2246 | | |
2247 | 69.3k | header_work = newbuf(doc, BUFFER_SPAN); |
2248 | 69.3k | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
2249 | | |
2250 | 69.3k | len = work.size; |
2251 | 69.3k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2252 | 69.3k | len = parse_attributes(work.data, work.size, attr_work, NULL, "", 1, doc->attr_activation); |
2253 | 69.3k | } |
2254 | | |
2255 | 69.3k | parse_inline(header_work, doc, work.data, len); |
2256 | | |
2257 | 69.3k | if (doc->md.header) { |
2258 | 69.3k | doc->header_type = HOEDOWN_HEADER_SETEXT; |
2259 | 69.3k | doc->md.header(ob, header_work, attr_work, (int)level, &doc->data); |
2260 | 69.3k | doc->header_type = HOEDOWN_HEADER_NONE; |
2261 | 69.3k | } |
2262 | | |
2263 | 69.3k | popbuf(doc, BUFFER_SPAN); |
2264 | 69.3k | popbuf(doc, BUFFER_ATTRIBUTE); |
2265 | 69.3k | } |
2266 | | |
2267 | 282k | return end; |
2268 | 282k | } |
2269 | | |
2270 | | /* parse_fencedcode • handles parsing of a block-level code fragment */ |
2271 | | static size_t |
2272 | | parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, unsigned int flags) |
2273 | 5.58M | { |
2274 | 5.58M | hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL }; |
2275 | 5.58M | hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL }; |
2276 | 5.58M | size_t i = 0, text_start, line_start; |
2277 | 5.58M | size_t w, w2; |
2278 | 5.58M | size_t width, width2; |
2279 | 5.58M | uint8_t chr, chr2; |
2280 | | /* needed for parse_attribute functions as buffer functions do not work with |
2281 | | * buffers on the stack */ |
2282 | 5.58M | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2283 | | |
2284 | | |
2285 | | /* parse codefence line */ |
2286 | 270M | while (i < size && data[i] != '\n') |
2287 | 264M | i++; |
2288 | | |
2289 | 5.58M | w = parse_codefence(doc, data, i, &lang, &width, &chr, flags, attr); |
2290 | 5.58M | if (!w) { |
2291 | 5.57M | popbuf(doc, BUFFER_ATTRIBUTE); |
2292 | 5.57M | return 0; |
2293 | 5.57M | } |
2294 | | |
2295 | | /* search for end */ |
2296 | 13.5k | i++; |
2297 | 13.5k | text_start = i; |
2298 | 3.28M | while ((line_start = i) < size) { |
2299 | 88.0M | while (i < size && data[i] != '\n') |
2300 | 84.7M | i++; |
2301 | | |
2302 | 3.27M | w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2); |
2303 | 3.27M | if (w == w2 && width == width2 && chr == chr2 && |
2304 | 3.27M | is_empty(data + (line_start+w), i - (line_start+w))) |
2305 | 3.23k | break; |
2306 | | |
2307 | 3.26M | if (i < size) i++; |
2308 | 3.26M | } |
2309 | | |
2310 | 13.5k | text.data = data + text_start; |
2311 | 13.5k | text.size = line_start - text_start; |
2312 | | |
2313 | 13.5k | if (doc->md.blockcode) { |
2314 | 6.61k | doc->fencedcode_char = chr; |
2315 | 6.61k | doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, attr->size ? attr : NULL, &doc->data); |
2316 | 6.61k | doc->fencedcode_char = 0; |
2317 | 6.61k | } |
2318 | | |
2319 | 13.5k | popbuf(doc, BUFFER_ATTRIBUTE); |
2320 | | |
2321 | 13.5k | return i; |
2322 | 5.58M | } |
2323 | | |
2324 | | static size_t |
2325 | | parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2326 | 0 | { |
2327 | 0 | size_t beg, end, pre; |
2328 | 0 | hoedown_buffer *work = 0; |
2329 | 0 | hoedown_buffer *attr = 0; |
2330 | |
|
2331 | 0 | work = newbuf(doc, BUFFER_BLOCK); |
2332 | 0 | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2333 | |
|
2334 | 0 | beg = 0; |
2335 | 0 | while (beg < size) { |
2336 | 0 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; |
2337 | 0 | pre = prefix_code(data + beg, end - beg); |
2338 | |
|
2339 | 0 | if (pre) |
2340 | 0 | beg += pre; /* skipping prefix */ |
2341 | 0 | else if (!is_empty(data + beg, end - beg)) |
2342 | | /* non-empty non-prefixed line breaks the pre */ |
2343 | 0 | break; |
2344 | | |
2345 | 0 | if (beg < end) { |
2346 | | /* verbatim copy to the working buffer, |
2347 | | escaping entities */ |
2348 | 0 | if (is_empty(data + beg, end - beg)) |
2349 | 0 | hoedown_buffer_putc(work, '\n'); |
2350 | 0 | else hoedown_buffer_put(work, data + beg, end - beg); |
2351 | 0 | } |
2352 | 0 | beg = end; |
2353 | 0 | } |
2354 | |
|
2355 | 0 | while (work->size && work->data[work->size - 1] == '\n') |
2356 | 0 | work->size -= 1; |
2357 | |
|
2358 | 0 | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2359 | 0 | work->size = parse_attributes(work->data, work->size, NULL, attr, "", 0, doc->attr_activation); |
2360 | 0 | } |
2361 | |
|
2362 | 0 | hoedown_buffer_putc(work, '\n'); |
2363 | |
|
2364 | 0 | if (doc->md.blockcode) |
2365 | 0 | doc->md.blockcode(ob, work, NULL, attr, &doc->data); |
2366 | |
|
2367 | 0 | popbuf(doc, BUFFER_BLOCK); |
2368 | 0 | popbuf(doc, BUFFER_ATTRIBUTE); |
2369 | 0 | return beg; |
2370 | 0 | } |
2371 | | |
2372 | | /* parse_listitem • parsing of a single list item */ |
2373 | | /* assuming initial prefix is already removed */ |
2374 | | static size_t |
2375 | | parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute) |
2376 | 328k | { |
2377 | 328k | hoedown_buffer *work = 0, *inter = 0; |
2378 | 328k | hoedown_buffer *attr = 0; |
2379 | 328k | size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i, len, fence_pre = 0; |
2380 | 328k | int in_empty = 0, has_inside_empty = 0, in_fence = 0; |
2381 | 328k | uint8_t ul_item_char = '*'; |
2382 | 328k | hoedown_buffer *ol_numeral = NULL; |
2383 | | |
2384 | | /* keeping track of the first indentation prefix */ |
2385 | 335k | while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') |
2386 | 6.79k | orgpre++; |
2387 | | |
2388 | 328k | beg = prefix_uli(data, size); |
2389 | 328k | if (beg) ul_item_char = data[beg - 2]; |
2390 | 328k | if (!beg) { |
2391 | 178k | beg = prefix_oli(data, size); |
2392 | 178k | if (beg) { |
2393 | 90.2k | ol_numeral = hoedown_buffer_new(1024); |
2394 | | /* -2 to eliminate the trailing ". " */ |
2395 | 90.2k | hoedown_buffer_put(ol_numeral, data, beg - 2); |
2396 | 90.2k | } |
2397 | 178k | if (*flags & HOEDOWN_LIST_DEFINITION) { |
2398 | 89.1k | beg = prefix_dt(data, size); |
2399 | 89.1k | if (beg) ul_item_char = data[beg - 2]; |
2400 | 89.1k | } |
2401 | 178k | } |
2402 | | |
2403 | 328k | if (!beg) { |
2404 | 24.6k | if (ol_numeral) hoedown_buffer_free(ol_numeral); |
2405 | 24.6k | return 0; |
2406 | 24.6k | } |
2407 | | |
2408 | | /* skipping to the beginning of the following line */ |
2409 | 304k | end = beg; |
2410 | 16.9M | while (end < size && data[end - 1] != '\n') |
2411 | 16.6M | end++; |
2412 | | |
2413 | 304k | if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) { |
2414 | 304k | fence_pre = is_codefence(data + beg, end - beg, &len, NULL); |
2415 | 304k | if (fence_pre) { |
2416 | 4.24k | in_fence = 1; |
2417 | 4.24k | fence_pre = fence_pre + beg - len; |
2418 | 4.24k | } |
2419 | 304k | } |
2420 | | |
2421 | | /* getting working buffers */ |
2422 | 304k | work = newbuf(doc, BUFFER_SPAN); |
2423 | 304k | inter = newbuf(doc, BUFFER_SPAN); |
2424 | | |
2425 | | /* calculating the indentation */ |
2426 | 304k | i = 0; |
2427 | 453k | while (i < 4 && beg + i < end && data[beg + i] == ' ') |
2428 | 149k | i++; |
2429 | | |
2430 | 304k | beg += i; |
2431 | | |
2432 | | /* putting the first line into the working buffer */ |
2433 | 304k | hoedown_buffer_put(work, data + beg, end - beg); |
2434 | 304k | beg = end; |
2435 | | |
2436 | 304k | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2437 | | |
2438 | | /* process the following lines */ |
2439 | 872k | while (beg < size) { |
2440 | 852k | size_t has_next_uli = 0, has_next_oli = 0, has_next_dli = 0; |
2441 | | |
2442 | 852k | end++; |
2443 | | |
2444 | 55.7M | while (end < size && data[end - 1] != '\n') |
2445 | 54.8M | end++; |
2446 | | |
2447 | | /* process an empty line */ |
2448 | 852k | if (is_empty(data + beg, end - beg)) { |
2449 | 219k | in_empty = 1; |
2450 | 219k | beg = end; |
2451 | 219k | continue; |
2452 | 219k | } |
2453 | | |
2454 | | /* calculating the indentation */ |
2455 | 632k | i = 0; |
2456 | 780k | while (i < 4 && beg + i < end && data[beg + i] == ' ') |
2457 | 148k | i++; |
2458 | | |
2459 | 632k | if (in_fence && i > fence_pre) { |
2460 | 3.86k | i = fence_pre; |
2461 | 3.86k | } |
2462 | | |
2463 | 632k | pre = i; |
2464 | | |
2465 | 632k | if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) { |
2466 | 632k | if (is_codefence(data + beg + i, end - beg - i, NULL, NULL)) |
2467 | 3.68k | in_fence = !in_fence; |
2468 | 632k | if (in_fence && fence_pre == 0) { |
2469 | 19.0k | fence_pre = pre; |
2470 | 19.0k | } |
2471 | 632k | } |
2472 | | |
2473 | | /* Only check for new list items if we are **not** inside |
2474 | | * a fenced code block */ |
2475 | 632k | if (!in_fence) { |
2476 | 599k | has_next_uli = prefix_uli(data + beg + i, end - beg - i); |
2477 | 599k | has_next_oli = prefix_oli(data + beg + i, end - beg - i); |
2478 | | |
2479 | | /* only check for the next definition if it is same indentation or less |
2480 | | * since embedded definition lists need terms, so finding just a |
2481 | | * colon by itself does not mean anything */ |
2482 | 599k | if (pre <= orgpre) |
2483 | 533k | has_next_dli = prefix_dt(data + beg + i, end - beg - i); |
2484 | 599k | } |
2485 | | |
2486 | | /* checking for a new item */ |
2487 | 632k | if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || |
2488 | 632k | has_next_oli || (*flags & HOEDOWN_LI_DD && has_next_dli)) { |
2489 | 260k | if (in_empty) |
2490 | 160k | has_inside_empty = 1; |
2491 | | |
2492 | | /* the following item must have the same (or less) indentation */ |
2493 | 260k | if (pre <= orgpre) { |
2494 | | /* if the following item has different list type, we end this list */ |
2495 | 249k | if (in_empty && ( |
2496 | 158k | ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) || |
2497 | 158k | (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))) { |
2498 | 146k | *flags |= HOEDOWN_LI_END; |
2499 | 146k | has_inside_empty = 0; |
2500 | 146k | } |
2501 | 249k | break; |
2502 | 249k | } |
2503 | | |
2504 | 10.8k | if (!sublist) |
2505 | 5.18k | sublist = work->size; |
2506 | 10.8k | } |
2507 | | /* joining only indented stuff after empty lines; |
2508 | | * note that now we only require 1 space of indentation |
2509 | | * to continue a list */ |
2510 | 372k | else if (in_empty && pre == 0) { |
2511 | 33.3k | *flags |= HOEDOWN_LI_END; |
2512 | 33.3k | break; |
2513 | 33.3k | } |
2514 | | |
2515 | 349k | if (in_empty) { |
2516 | 9.24k | hoedown_buffer_putc(work, '\n'); |
2517 | 9.24k | has_inside_empty = 1; |
2518 | 9.24k | in_empty = 0; |
2519 | 9.24k | } |
2520 | | |
2521 | | /* adding the line without prefix into the working buffer */ |
2522 | 349k | hoedown_buffer_put(work, data + beg + i, end - beg - i); |
2523 | 349k | beg = end; |
2524 | 349k | } |
2525 | | |
2526 | | /* render of li contents */ |
2527 | 304k | if (has_inside_empty) |
2528 | 17.0k | *flags |= HOEDOWN_LI_BLOCK; |
2529 | | |
2530 | 304k | if (*flags & HOEDOWN_LI_BLOCK) { |
2531 | | /* intermediate render of block li */ |
2532 | 67.6k | pre = 0; |
2533 | 67.6k | if (sublist && sublist < work->size) { |
2534 | 2.64k | end = sublist; |
2535 | 64.9k | } else { |
2536 | 64.9k | end = work->size; |
2537 | 64.9k | } |
2538 | | |
2539 | 67.6k | do { |
2540 | 67.6k | if (!(doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) { |
2541 | 0 | break; |
2542 | 0 | } |
2543 | | |
2544 | 67.6k | i = 0; |
2545 | 9.39M | while (i < end && work->data[i] != '\n') { |
2546 | 9.32M | i++; |
2547 | 9.32M | } |
2548 | | |
2549 | 67.6k | len = parse_attributes(work->data, i, attr, attribute, "list", 0, doc->attr_activation); |
2550 | 67.6k | if (i == len) { |
2551 | 43.8k | break; |
2552 | 43.8k | } |
2553 | | |
2554 | 23.7k | pre = i; |
2555 | 23.7k | parse_block(inter, doc, work->data, len); |
2556 | 23.7k | } while (0); |
2557 | | |
2558 | 0 | parse_block(inter, doc, work->data + pre, end - pre); |
2559 | 67.6k | if (end == sublist) { |
2560 | 2.64k | parse_block(inter, doc, work->data + sublist, work->size - sublist); |
2561 | 2.64k | } |
2562 | 236k | } else { |
2563 | | /* intermediate render of inline li */ |
2564 | 236k | if (sublist && sublist < work->size) { |
2565 | 2.53k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2566 | 2.53k | len = parse_attributes(work->data, sublist, attr, attribute, "list", 0, doc->attr_activation); |
2567 | 2.53k | } else { |
2568 | 0 | len = sublist; |
2569 | 0 | } |
2570 | 2.53k | parse_inline(inter, doc, work->data, len); |
2571 | 2.53k | parse_block(inter, doc, work->data + sublist, work->size - sublist); |
2572 | 233k | } else { |
2573 | 233k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2574 | 233k | len = parse_attributes(work->data, work->size, attr, attribute, "list", 0, doc->attr_activation); |
2575 | 233k | } else { |
2576 | 0 | len = work->size; |
2577 | 0 | } |
2578 | 233k | parse_inline(inter, doc, work->data, len); |
2579 | 233k | } |
2580 | 236k | } |
2581 | | |
2582 | | /* render of li itself */ |
2583 | 304k | if (doc->md.listitem) { |
2584 | 304k | doc->ul_item_char = ul_item_char; |
2585 | 304k | doc->ol_numeral = ol_numeral; |
2586 | 304k | doc->md.listitem(ob, inter, attr, flags, &doc->data); |
2587 | 304k | doc->ol_numeral = NULL; |
2588 | 304k | doc->ul_item_char = 0; |
2589 | 304k | } |
2590 | | |
2591 | 304k | if (ol_numeral) hoedown_buffer_free(ol_numeral); |
2592 | | |
2593 | 304k | popbuf(doc, BUFFER_SPAN); |
2594 | 304k | popbuf(doc, BUFFER_SPAN); |
2595 | 304k | popbuf(doc, BUFFER_ATTRIBUTE); |
2596 | 304k | return beg; |
2597 | 328k | } |
2598 | | |
2599 | | /* parse_definition • parsing of a term/definition pair, assuming starting |
2600 | | * at start of line */ |
2601 | | static size_t |
2602 | | parse_definition(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute) |
2603 | 41.5k | { |
2604 | | /* end represents the position of the first line where definitions start */ |
2605 | 41.5k | size_t j = 0, k = 0, len = 0, end = prefix_dli(doc, data, size); |
2606 | 41.5k | if (end <= 0) { |
2607 | 10.2k | return 0; |
2608 | 10.2k | } |
2609 | 31.2k | hoedown_buffer *work = 0, *attr_work; |
2610 | | |
2611 | | |
2612 | | /* scan all the definition terms, rendering them to the output buffer |
2613 | | * the +1 is to account for the trailing newline on each term |
2614 | | * j is a counter keeping track of the beginning of each new term */ |
2615 | 31.2k | *flags |= HOEDOWN_LI_DT; |
2616 | 120k | while (j + 1 < end) { |
2617 | | /* find the end of the term (where the newline is) */ |
2618 | 6.33M | for(k = j + 1; k - 1 < end && data[k - 1] != '\n'; k++); |
2619 | | |
2620 | 89.7k | len = k - j; |
2621 | | |
2622 | 89.7k | if (is_empty(data + j, len)) { |
2623 | 1.74k | j = k; |
2624 | 1.74k | continue; |
2625 | 1.74k | } |
2626 | | |
2627 | 87.9k | work = newbuf(doc, BUFFER_BLOCK); |
2628 | 87.9k | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
2629 | | |
2630 | 87.9k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2631 | 87.9k | len = parse_attributes(data + j, len, attr_work, NULL, "", 1, doc->attr_activation); |
2632 | 87.9k | } |
2633 | | |
2634 | 87.9k | parse_inline(work, doc, data + j, len); |
2635 | | |
2636 | 87.9k | if (doc->md.listitem) { |
2637 | 87.9k | doc->md.listitem(ob, work, attr_work, flags, &doc->data); |
2638 | 87.9k | } |
2639 | | |
2640 | 87.9k | j = k; |
2641 | | |
2642 | 87.9k | popbuf(doc, BUFFER_BLOCK); |
2643 | 87.9k | popbuf(doc, BUFFER_ATTRIBUTE); |
2644 | 87.9k | } |
2645 | 31.2k | *flags &= ~HOEDOWN_LI_DT; |
2646 | | |
2647 | | /* scan all the definitions, rendering it to the output buffer */ |
2648 | 31.2k | *flags |= HOEDOWN_LI_DD; |
2649 | 118k | while (end < size) { |
2650 | 111k | j = parse_listitem(ob, doc, data + end, size - end, flags, attribute); |
2651 | 111k | if (j <= 0) { |
2652 | 23.6k | break; |
2653 | 23.6k | } |
2654 | 87.5k | end += j; |
2655 | 87.5k | } |
2656 | | |
2657 | 31.2k | *flags &= ~HOEDOWN_LI_DD; |
2658 | 31.2k | *flags &= ~HOEDOWN_LI_END; |
2659 | | |
2660 | 31.2k | return end; |
2661 | 41.5k | } |
2662 | | |
2663 | | /* parse_list • parsing ordered or unordered list block */ |
2664 | | static size_t |
2665 | | parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags) |
2666 | 197k | { |
2667 | 197k | hoedown_buffer *work = 0; |
2668 | 197k | hoedown_buffer *attr = 0; |
2669 | 197k | size_t i = 0, j; |
2670 | | |
2671 | 197k | doc->list_depth++; |
2672 | | |
2673 | 197k | work = newbuf(doc, BUFFER_BLOCK); |
2674 | 197k | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2675 | | |
2676 | 279k | while (i < size) { |
2677 | 259k | if (flags & HOEDOWN_LIST_DEFINITION) { |
2678 | 41.5k | j = parse_definition(work, doc, data + i, size - i, &flags, attr); |
2679 | 217k | } else { |
2680 | 217k | j = parse_listitem(work, doc, data + i, size - i, &flags, attr); |
2681 | 217k | } |
2682 | 259k | i += j; |
2683 | | |
2684 | 259k | if (!j || (flags & HOEDOWN_LI_END)) |
2685 | 177k | break; |
2686 | 259k | } |
2687 | | |
2688 | 197k | if (doc->md.list) |
2689 | 197k | doc->md.list(ob, work, attr, flags, &doc->data); |
2690 | 197k | popbuf(doc, BUFFER_BLOCK); |
2691 | 197k | popbuf(doc, BUFFER_ATTRIBUTE); |
2692 | | |
2693 | 197k | doc->list_depth--; |
2694 | | |
2695 | 197k | return i; |
2696 | 197k | } |
2697 | | |
2698 | | /* parse_atxheader • parsing of atx-style headers */ |
2699 | | static size_t |
2700 | | parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2701 | 16.8k | { |
2702 | 16.8k | size_t level = 0; |
2703 | 16.8k | size_t i, end, skip; |
2704 | | |
2705 | 42.3k | while (level < size && level < 6 && data[level] == '#') |
2706 | 25.4k | level++; |
2707 | | |
2708 | 599k | for (i = level; i < size && data[i] == ' '; i++); |
2709 | | |
2710 | 6.81M | for (end = i; end < size && data[end] != '\n'; end++); |
2711 | 16.8k | skip = end; |
2712 | | |
2713 | 18.7k | while (end && data[end - 1] == '#') |
2714 | 1.92k | end--; |
2715 | | |
2716 | 54.9k | while (end && data[end - 1] == ' ') |
2717 | 38.0k | end--; |
2718 | | |
2719 | 16.8k | if (end > i) { |
2720 | 15.2k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
2721 | 15.2k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2722 | 15.2k | size_t len; |
2723 | | |
2724 | 15.2k | len = end - i; |
2725 | 15.2k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2726 | 15.2k | len = parse_attributes(data + i, end - i, attr, NULL, "", 1, doc->attr_activation); |
2727 | 15.2k | } |
2728 | | |
2729 | 15.2k | parse_inline(work, doc, data + i, len); |
2730 | | |
2731 | 15.2k | if (doc->md.header) { |
2732 | 15.2k | doc->header_type = HOEDOWN_HEADER_ATX; |
2733 | 15.2k | doc->md.header(ob, work, attr, (int)level, &doc->data); |
2734 | 15.2k | doc->header_type = HOEDOWN_HEADER_NONE; |
2735 | 15.2k | } |
2736 | | |
2737 | 15.2k | popbuf(doc, BUFFER_SPAN); |
2738 | 15.2k | popbuf(doc, BUFFER_ATTRIBUTE); |
2739 | 15.2k | } else { |
2740 | 1.60k | doc->md.header(ob, NULL, NULL, (int)level, &doc->data); |
2741 | 1.60k | } |
2742 | | |
2743 | 16.8k | return skip; |
2744 | 16.8k | } |
2745 | | |
2746 | | /* parse_footnote_def • parse a single footnote definition */ |
2747 | | static void |
2748 | | parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, const hoedown_buffer *name, uint8_t *data, size_t size) |
2749 | 862 | { |
2750 | 862 | hoedown_buffer *work = 0; |
2751 | 862 | work = newbuf(doc, BUFFER_SPAN); |
2752 | 862 | doc->footnote_id = name; |
2753 | | |
2754 | 862 | parse_block(work, doc, data, size); |
2755 | | |
2756 | 862 | if (doc->md.footnote_def) |
2757 | 862 | doc->md.footnote_def(ob, work, num, &doc->data); |
2758 | | |
2759 | 862 | doc->footnote_id = NULL; |
2760 | 862 | popbuf(doc, BUFFER_SPAN); |
2761 | 862 | } |
2762 | | |
2763 | | /* parse_footnote_list • render the contents of the footnotes */ |
2764 | | static void |
2765 | | parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes) |
2766 | 8.92k | { |
2767 | 8.92k | hoedown_buffer *work = 0; |
2768 | 8.92k | struct footnote_item *item; |
2769 | 8.92k | struct footnote_ref *ref; |
2770 | | |
2771 | 8.92k | if (footnotes->count == 0) |
2772 | 8.25k | return; |
2773 | | |
2774 | 669 | work = newbuf(doc, BUFFER_BLOCK); |
2775 | | |
2776 | 669 | item = footnotes->head; |
2777 | 1.53k | while (item) { |
2778 | 862 | ref = item->ref; |
2779 | 862 | parse_footnote_def(work, doc, ref->num, ref->name, ref->contents->data, ref->contents->size); |
2780 | 862 | item = item->next; |
2781 | 862 | } |
2782 | | |
2783 | 669 | if (doc->md.footnotes) |
2784 | 669 | doc->md.footnotes(ob, work, &doc->data); |
2785 | 669 | popbuf(doc, BUFFER_BLOCK); |
2786 | 669 | } |
2787 | | |
2788 | | /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */ |
2789 | | /* returns tag length on match, 0 otherwise */ |
2790 | | /* assumes data starts with "<" */ |
2791 | | static size_t |
2792 | | htmlblock_is_end( |
2793 | | const char *tag, |
2794 | | size_t tag_len, |
2795 | | hoedown_document *doc, |
2796 | | uint8_t *data, |
2797 | | size_t size) |
2798 | 7.90M | { |
2799 | 7.90M | size_t i = tag_len + 3, w; |
2800 | | |
2801 | | /* try to match the end tag */ |
2802 | | /* note: we're not considering tags like "</tag >" which are still valid */ |
2803 | 7.90M | if (i > size || |
2804 | 7.90M | data[1] != '/' || |
2805 | 7.90M | strncasecmp((char *)data + 2, tag, tag_len) != 0 || |
2806 | 7.90M | data[tag_len + 2] != '>') |
2807 | 7.80M | return 0; |
2808 | | |
2809 | | /* rest of the line must be empty */ |
2810 | 105k | if ((w = is_empty(data + i, size - i)) == 0 && i < size) |
2811 | 86.4k | return 0; |
2812 | | |
2813 | 18.7k | return i + w; |
2814 | 105k | } |
2815 | | |
2816 | | /* htmlblock_find_end • try to find HTML block ending tag */ |
2817 | | /* returns the length on match, 0 otherwise */ |
2818 | | static size_t |
2819 | | htmlblock_find_end( |
2820 | | const char *tag, |
2821 | | size_t tag_len, |
2822 | | hoedown_document *doc, |
2823 | | uint8_t *data, |
2824 | | size_t size) |
2825 | 16.5M | { |
2826 | 16.5M | size_t i = 0, w; |
2827 | | |
2828 | 24.4M | while (1) { |
2829 | 915M | while (i < size && data[i] != '<') i++; |
2830 | 24.4M | if (i >= size) return 0; |
2831 | | |
2832 | 7.90M | w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i); |
2833 | 7.90M | if (w) return i + w; |
2834 | 7.89M | i++; |
2835 | 7.89M | } |
2836 | 16.5M | } |
2837 | | |
2838 | | /* htmlblock_find_end_strict • try to find end of HTML block in strict mode */ |
2839 | | /* (it must have a blank line or a new HTML tag afterwards) */ |
2840 | | /* returns the length on match, 0 otherwise */ |
2841 | | static size_t |
2842 | | htmlblock_find_end_strict( |
2843 | | const char *tag, |
2844 | | size_t tag_len, |
2845 | | hoedown_document *doc, |
2846 | | uint8_t *data, |
2847 | | size_t size) |
2848 | 15.9k | { |
2849 | 15.9k | size_t i = 0, mark; |
2850 | | |
2851 | 16.5M | while (1) { |
2852 | 16.5M | mark = i; |
2853 | 523M | while (i < size && data[i] != '\n') i++; |
2854 | 16.5M | if (i < size) i++; |
2855 | 16.5M | if (i == mark) return 0; |
2856 | | |
2857 | 16.5M | mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark); |
2858 | 16.5M | if (mark == i && (is_empty(data + i, size - i) || (i + 1 < size && data[i] == '<' && data[i + 1] != '/') || i >= size)) break; |
2859 | 16.5M | } |
2860 | | |
2861 | 1.81k | return i; |
2862 | 15.9k | } |
2863 | | |
2864 | | /* parse_htmlblock • parsing of inline HTML block */ |
2865 | | static size_t |
2866 | | parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render) |
2867 | 336k | { |
2868 | 336k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
2869 | 336k | size_t i, j = 0, tag_len, tag_end; |
2870 | 336k | const char *curtag = NULL; |
2871 | 336k | int meta = 0; |
2872 | | |
2873 | 336k | work.data = data; |
2874 | | |
2875 | | /* identification of the opening tag */ |
2876 | 336k | if (size < 2 || data[0] != '<') |
2877 | 250 | return 0; |
2878 | | |
2879 | 335k | i = 1; |
2880 | 10.6M | while (i < size && data[i] != '>' && data[i] != ' ') |
2881 | 10.2M | i++; |
2882 | | |
2883 | 335k | if (i < size) { |
2884 | 324k | if (doc->ext_flags & HOEDOWN_EXT_HTML5_BLOCKS) |
2885 | 324k | curtag = hoedown_find_html5_block_tag((char *)data + 1, (int)i - 1); |
2886 | 0 | else |
2887 | 0 | curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1); |
2888 | 324k | } |
2889 | | |
2890 | | /* handling of special cases */ |
2891 | 335k | if (!curtag) { |
2892 | | |
2893 | | /* HTML comment, laxist form */ |
2894 | 319k | if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { |
2895 | 24.1k | i = 5; |
2896 | | |
2897 | 24.1k | if (data[4] == '*') { |
2898 | 953 | meta++; |
2899 | 953 | } |
2900 | | |
2901 | 210M | while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) |
2902 | 210M | i++; |
2903 | | |
2904 | 24.1k | if (data[i - 3] == '*') { |
2905 | 255 | meta++; |
2906 | 255 | } |
2907 | | |
2908 | 24.1k | i++; |
2909 | | |
2910 | 24.1k | if (i < size) |
2911 | 19.1k | j = is_empty(data + i, size - i); |
2912 | | |
2913 | 24.1k | if (j) { |
2914 | 1.37k | work.size = i + j; |
2915 | | |
2916 | 1.37k | if (do_render && doc->ext_flags & HOEDOWN_EXT_META_BLOCK && |
2917 | 1.37k | meta == 2 && doc->meta) { |
2918 | 0 | size_t org, sz; |
2919 | |
|
2920 | 0 | sz = work.size - 5; |
2921 | 0 | while (sz > 0 && work.data[sz - 1] == '\n') { |
2922 | 0 | sz--; |
2923 | 0 | } |
2924 | |
|
2925 | 0 | org = 5; |
2926 | 0 | while (org < sz && work.data[org] == '\n') { |
2927 | 0 | org++; |
2928 | 0 | } |
2929 | |
|
2930 | 0 | if (org < sz) { |
2931 | 0 | hoedown_buffer_put(doc->meta, work.data + org, sz - org); |
2932 | 0 | hoedown_buffer_putc(doc->meta, '\n'); |
2933 | 0 | } |
2934 | 1.37k | } else if (do_render && doc->md.blockhtml) { |
2935 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2936 | 0 | } |
2937 | 1.37k | return work.size; |
2938 | 1.37k | } |
2939 | 24.1k | } |
2940 | | |
2941 | | /* HR, which is the only self-closing block tag considered */ |
2942 | 318k | if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { |
2943 | 5.36k | i = 3; |
2944 | 22.0M | while (i < size && data[i] != '>') |
2945 | 22.0M | i++; |
2946 | | |
2947 | 5.36k | if (i + 1 < size) { |
2948 | 4.74k | i++; |
2949 | 4.74k | j = is_empty(data + i, size - i); |
2950 | 4.74k | if (j) { |
2951 | 476 | work.size = i + j; |
2952 | 476 | if (do_render && doc->md.blockhtml) |
2953 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2954 | 476 | return work.size; |
2955 | 476 | } |
2956 | 4.74k | } |
2957 | 5.36k | } |
2958 | | |
2959 | | /* Extension script tags */ |
2960 | 317k | if (doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS) { |
2961 | 317k | i = script_tag_length(data, size); |
2962 | 317k | if (i) { |
2963 | 7.60k | if (i < size) { |
2964 | 3.01k | j = is_empty(data + i, size - i); |
2965 | 3.01k | } |
2966 | | |
2967 | 7.60k | if (j) { |
2968 | 1.50k | work.size = i + j; |
2969 | 1.50k | if (do_render && doc->md.blockhtml) { |
2970 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2971 | 0 | } |
2972 | 1.50k | return work.size; |
2973 | 1.50k | } |
2974 | 7.60k | } |
2975 | | |
2976 | 317k | } |
2977 | | |
2978 | | /* no special case recognised */ |
2979 | 316k | return 0; |
2980 | 317k | } |
2981 | | |
2982 | | /* looking for a matching closing tag in strict mode */ |
2983 | 15.9k | tag_len = strlen(curtag); |
2984 | 15.9k | tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size); |
2985 | | |
2986 | | /* if not found, trying a second pass looking for indented match */ |
2987 | | /* but not if tag is "ins" or "del" (following original Markdown.pl) */ |
2988 | 15.9k | if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) |
2989 | 12.9k | tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size); |
2990 | | |
2991 | 15.9k | if (!tag_end) |
2992 | 11.6k | return 0; |
2993 | | |
2994 | | /* the end of the block has been found */ |
2995 | 4.29k | work.size = tag_end; |
2996 | 4.29k | if (do_render && doc->md.blockhtml) |
2997 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2998 | | |
2999 | 4.29k | return tag_end; |
3000 | 15.9k | } |
3001 | | |
3002 | | /* Common function to parse table main rows and continued rows. */ |
3003 | | static size_t |
3004 | | parse_table_cell_line( |
3005 | | hoedown_buffer *ob, |
3006 | | uint8_t *data, |
3007 | | size_t size, |
3008 | | size_t offset, |
3009 | | char separator, |
3010 | 64.1k | int is_continuation) { |
3011 | 64.1k | size_t pos, line_end, cell_start, cell_end, len, copy_start, copy_end; |
3012 | | |
3013 | 64.1k | pos = offset; |
3014 | | |
3015 | 138k | while (pos < size && _isspace(data[pos])) pos++; |
3016 | | |
3017 | 64.1k | cell_start = pos; |
3018 | | |
3019 | 64.1k | line_end = pos; |
3020 | 15.0M | while (line_end < size && data[line_end] != '\n') line_end++; |
3021 | 64.1k | len = find_separator_char(data + pos, line_end - pos, separator); |
3022 | | |
3023 | | /* Two possibilities for len == 0: |
3024 | | 1) No more separator char found in the current line. |
3025 | | 2) The next separator is right after the current one, i.e. empty cell. |
3026 | | For case 1, we skip to the end of line; for case 2 we just continue. |
3027 | | */ |
3028 | 64.1k | if (len == 0 && pos < size && data[pos] != separator) { |
3029 | 3.10M | while (pos + len < size && data[pos + len] != '\n') len++; |
3030 | 24.4k | } |
3031 | 64.1k | pos += len; |
3032 | | |
3033 | 64.1k | cell_end = pos - 1; |
3034 | | |
3035 | 514k | while (cell_end > cell_start && _isspace(data[cell_end])) |
3036 | 450k | cell_end--; |
3037 | | |
3038 | | /* If this isn't the first line of the cell, add a new line before the |
3039 | | extra cell contents, to separate them (and make backslash linebreaks |
3040 | | work). |
3041 | | */ |
3042 | 64.1k | if (is_continuation) hoedown_buffer_putc(ob, '\n'); |
3043 | | |
3044 | | /* Remove escaping from pipes */ |
3045 | 64.1k | copy_start = copy_end = cell_start; |
3046 | 12.2M | while (copy_end < cell_end + 1) { |
3047 | 12.2M | if (data[copy_end] == separator && copy_end > copy_start && data[copy_end - 1] == '\\') { |
3048 | 2.81k | hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start - 1); |
3049 | 2.81k | copy_start = copy_end; |
3050 | 2.81k | } |
3051 | 12.2M | copy_end++; |
3052 | 12.2M | } |
3053 | 64.1k | hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start); |
3054 | | |
3055 | 64.1k | return pos - offset; |
3056 | 64.1k | } |
3057 | | |
3058 | | static void |
3059 | | parse_table_row( |
3060 | | hoedown_buffer *ob, |
3061 | | hoedown_document *doc, |
3062 | | uint8_t *data, |
3063 | | size_t size, |
3064 | | size_t columns, |
3065 | | size_t rows, |
3066 | | hoedown_table_flags *col_data, |
3067 | | hoedown_table_flags header_flag) |
3068 | 394k | { |
3069 | 394k | size_t i = 0, col; |
3070 | 394k | hoedown_buffer *row_work = 0; |
3071 | | |
3072 | 394k | if (!doc->md.table_cell || !doc->md.table_row) |
3073 | 370k | return; |
3074 | | |
3075 | 23.9k | row_work = newbuf(doc, BUFFER_SPAN); |
3076 | | |
3077 | | /* skip optional first pipe */ |
3078 | 23.9k | if (i < size && data[i] == '|') |
3079 | 11.8k | i++; |
3080 | | |
3081 | 57.4k | for (col = 0; col < columns && i < size; ++col) { |
3082 | 33.4k | size_t pos, extra_rows_in_cell; |
3083 | 33.4k | hoedown_buffer *cell_content; |
3084 | 33.4k | hoedown_buffer *cell_work; |
3085 | | |
3086 | | /* cell_content is the text that is inline parsed into cell_work. It |
3087 | | consists of the values of this cell from each row, concatenated and |
3088 | | separated by new lines. |
3089 | | */ |
3090 | 33.4k | cell_content = newbuf(doc, BUFFER_SPAN); |
3091 | 33.4k | cell_work = newbuf(doc, BUFFER_SPAN); |
3092 | | |
3093 | 33.4k | i += parse_table_cell_line(cell_content, data, size, i, '|', 0 /* is_contination */); |
3094 | | |
3095 | | /* Add extra rows of the cell. This only occurs if rows is greater than 0, |
3096 | | which only happens when multiline tables are enabled. |
3097 | | |
3098 | | Each extra row is a colon, followed by cell contents for the continued |
3099 | | row, separated by colons. |
3100 | | */ |
3101 | 33.4k | extra_rows_in_cell = rows - 1; |
3102 | 33.4k | pos = i; |
3103 | 64.1k | while (extra_rows_in_cell > 0 && pos < size) { |
3104 | 30.7k | size_t c; |
3105 | | |
3106 | | /* seek to the end of the current row */ |
3107 | 2.50M | while (pos < size && data[pos] != '\n') { |
3108 | 2.47M | pos++; |
3109 | 2.47M | } |
3110 | | |
3111 | | /* skip new line and leading colon */ |
3112 | 30.7k | if (pos < size) pos++; |
3113 | 30.7k | if (pos < size) pos++; |
3114 | | |
3115 | | /* Seek to the beginning of the correct column on the continuation line. |
3116 | | * The continuation line should have the expected number of columns, and |
3117 | | * so we never expect pos >= size or data[pos] == '\n'. These checks serve |
3118 | | * as defense in depth against wrong preconditions. */ |
3119 | 57.0k | for (c = 0; c < col; c++) { |
3120 | 4.63M | while (pos < size && data[pos] != '\n' && (is_backslashed(data, pos) || data[pos] != ':')) |
3121 | 4.60M | pos++; |
3122 | 26.3k | if (pos < size && data[pos] == ':') pos++; /* skip colon */ |
3123 | 26.3k | } |
3124 | | |
3125 | 30.7k | parse_table_cell_line(cell_content, data, size, pos, ':', 1 /* is_contination */); |
3126 | | |
3127 | 30.7k | extra_rows_in_cell--; |
3128 | 30.7k | } |
3129 | | |
3130 | 33.4k | parse_inline(cell_work, doc, cell_content->data, cell_content->size); |
3131 | | |
3132 | 33.4k | doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data); |
3133 | | |
3134 | 33.4k | popbuf(doc, BUFFER_SPAN); |
3135 | 33.4k | popbuf(doc, BUFFER_SPAN); |
3136 | 33.4k | i++; |
3137 | 33.4k | } |
3138 | | |
3139 | 27.2k | for (; col < columns; ++col) { |
3140 | 3.27k | hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL }; |
3141 | 3.27k | doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data); |
3142 | 3.27k | } |
3143 | | |
3144 | 23.9k | doc->md.table_row(ob, row_work, &doc->data); |
3145 | | |
3146 | 23.9k | popbuf(doc, BUFFER_SPAN); |
3147 | 23.9k | } |
3148 | | |
3149 | | static size_t |
3150 | | parse_table_header( |
3151 | | hoedown_buffer *ob, |
3152 | | hoedown_buffer *attr, |
3153 | | hoedown_document *doc, |
3154 | | uint8_t *data, |
3155 | | size_t size, |
3156 | | size_t *columns, |
3157 | | hoedown_table_flags **column_data) |
3158 | 5.57M | { |
3159 | 5.57M | int pipes, rows; |
3160 | 5.57M | size_t i = 0, col, header_end, under_end; |
3161 | 5.57M | hoedown_buffer *header_contents = 0; |
3162 | | |
3163 | 5.57M | pipes = 0; |
3164 | 267M | while (i < size && data[i] != '\n') { |
3165 | 261M | if (!is_backslashed(data, i) && data[i] == '|') { |
3166 | 9.76M | pipes++; |
3167 | 9.76M | } |
3168 | 261M | i++; |
3169 | 261M | } |
3170 | | |
3171 | 5.57M | if (i == size || pipes == 0) |
3172 | 5.25M | return 0; |
3173 | | |
3174 | 314k | header_end = i; |
3175 | | |
3176 | 714k | while (header_end > 0 && _isspace(data[header_end - 1])) |
3177 | 400k | header_end--; |
3178 | | |
3179 | 314k | if (data[0] == '|') |
3180 | 115k | pipes--; |
3181 | | |
3182 | 314k | if (header_end && data[header_end - 1] == '|' && !is_backslashed(data, header_end - 1)) |
3183 | 97.5k | pipes--; |
3184 | | |
3185 | 314k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
3186 | 314k | size_t n = parse_attributes(data, header_end, attr, NULL, "", 1, doc->attr_activation); |
3187 | | /* n == header_end when no attribute is found */ |
3188 | 314k | if (n != header_end) { |
3189 | 40.1k | while (n > 0 && _isspace(data[n - 1])) |
3190 | 0 | n--; |
3191 | 40.1k | if (attr->size && n && data[n - 1] == '|' && !is_backslashed(data, n - 1)) |
3192 | 1.96k | pipes--; |
3193 | | |
3194 | 40.1k | header_end = n + 1; |
3195 | 40.1k | } |
3196 | 314k | } |
3197 | | |
3198 | 314k | if (pipes < 0) |
3199 | 29.2k | return 0; |
3200 | | |
3201 | | /* header_contents will have the lines of the header copied into it, and then |
3202 | | is passed to parse_table_row. We need a separate buffer to avoid passing |
3203 | | the attribute to parse_table_row. |
3204 | | */ |
3205 | 285k | header_contents = newbuf(doc, BUFFER_SPAN); |
3206 | 285k | hoedown_buffer_put(header_contents, data, header_end); |
3207 | | |
3208 | 285k | *columns = pipes + 1; |
3209 | 285k | *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags)); |
3210 | | |
3211 | | /* If the multiline table extension is enabled, check the next lines for |
3212 | | continuation markers, to find the number of text rows that make up this |
3213 | | logical row, and copy the contents of each row to header_contents, |
3214 | | separated by new lines. |
3215 | | */ |
3216 | 285k | rows = 1; |
3217 | 285k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3218 | 295k | while (i < size) { |
3219 | 295k | size_t j = i + 1; |
3220 | 295k | int colons = 0; |
3221 | | |
3222 | | /* Require that the continuation line starts with a colon */ |
3223 | 295k | if (j >= size || data[j] != ':') break; |
3224 | | /* Skip the leading colon to match the pipe counting behavior above */ |
3225 | 59.8k | j++; |
3226 | | |
3227 | | /* Require that the continuation line start with ": ", to |
3228 | | distinguish from ":-" which could start a left-aligned header |
3229 | | bar. |
3230 | | */ |
3231 | 59.8k | if (j >= size || data[j] != ' ') break; |
3232 | | |
3233 | 7.39M | while (j < size && data[j] != '\n') { |
3234 | 7.37M | j++; |
3235 | 7.37M | if (j < size && !is_backslashed(data, j) && data[j] == ':') |
3236 | 2.74M | colons++; |
3237 | 7.37M | } |
3238 | | |
3239 | | /* Allow a trailing colon to match the pipe counting behavior above */ |
3240 | 14.1k | if (!is_backslashed(data, j - 1) && data[j - 1] == ':') |
3241 | 1.06k | colons--; |
3242 | | |
3243 | 14.1k | if (colons != pipes) break; |
3244 | | |
3245 | 10.5k | hoedown_buffer_putc(header_contents, '\n'); |
3246 | | /* data[i] is the previous new line, and data[j] is the next new |
3247 | | line. This copies all the text between the new lines. |
3248 | | */ |
3249 | 10.5k | hoedown_buffer_put(header_contents, data + i + 1, j - i - 1); |
3250 | | |
3251 | 10.5k | rows++; |
3252 | 10.5k | i = j; |
3253 | 10.5k | header_end = j; |
3254 | 10.5k | } |
3255 | 285k | } |
3256 | | |
3257 | | /* Parse the header underline */ |
3258 | 285k | i++; |
3259 | 285k | if (i < size && data[i] == '|') |
3260 | 23.1k | i++; |
3261 | | |
3262 | 285k | under_end = i; |
3263 | 40.4M | while (under_end < size && data[under_end] != '\n') |
3264 | 40.2M | under_end++; |
3265 | | |
3266 | 337k | for (col = 0; col < *columns && i < under_end; ++col) { |
3267 | 286k | size_t dashes = 0; |
3268 | | |
3269 | 593k | while (i < under_end && data[i] == ' ') |
3270 | 306k | i++; |
3271 | | |
3272 | 286k | if (i < under_end && data[i] == ':') { |
3273 | 64.6k | i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT; |
3274 | 64.6k | dashes++; |
3275 | 64.6k | } |
3276 | | |
3277 | 501k | while (i < under_end && data[i] == '-') { |
3278 | 214k | i++; dashes++; |
3279 | 214k | } |
3280 | | |
3281 | 286k | if (i < under_end && data[i] == ':') { |
3282 | 40.3k | i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT; |
3283 | 40.3k | dashes++; |
3284 | 40.3k | } |
3285 | | |
3286 | 578k | while (i < under_end && data[i] == ' ') |
3287 | 292k | i++; |
3288 | | |
3289 | 286k | if (i < under_end && data[i] != '|' && data[i] != '+') |
3290 | 153k | break; |
3291 | | |
3292 | 133k | if (dashes < 3) |
3293 | 80.9k | break; |
3294 | | |
3295 | 52.4k | i++; |
3296 | 52.4k | } |
3297 | | |
3298 | 285k | if (col < *columns) { |
3299 | | /* clean up header_contents */ |
3300 | 254k | popbuf(doc, BUFFER_SPAN); |
3301 | 254k | return 0; |
3302 | 254k | } |
3303 | | |
3304 | 30.8k | parse_table_row( |
3305 | 30.8k | ob, doc, header_contents->data, |
3306 | 30.8k | header_contents->size, |
3307 | 30.8k | *columns, |
3308 | 30.8k | rows, |
3309 | 30.8k | *column_data, |
3310 | 30.8k | HOEDOWN_TABLE_HEADER |
3311 | 30.8k | ); |
3312 | | |
3313 | | /* clean up header_contents */ |
3314 | 30.8k | popbuf(doc, BUFFER_SPAN); |
3315 | | |
3316 | 30.8k | return under_end + 1; |
3317 | 285k | } |
3318 | | |
3319 | | static size_t |
3320 | | parse_table( |
3321 | | hoedown_buffer *ob, |
3322 | | hoedown_document *doc, |
3323 | | uint8_t *data, |
3324 | | size_t size) |
3325 | 5.57M | { |
3326 | 5.57M | size_t i; |
3327 | | |
3328 | 5.57M | hoedown_buffer *work = 0; |
3329 | 5.57M | hoedown_buffer *header_work = 0; |
3330 | 5.57M | hoedown_buffer *body_work = 0; |
3331 | 5.57M | hoedown_buffer *attr_work = 0; |
3332 | | |
3333 | 5.57M | size_t columns; |
3334 | 5.57M | hoedown_table_flags *col_data = NULL; |
3335 | | |
3336 | 5.57M | work = newbuf(doc, BUFFER_BLOCK); |
3337 | 5.57M | header_work = newbuf(doc, BUFFER_SPAN); |
3338 | 5.57M | body_work = newbuf(doc, BUFFER_BLOCK); |
3339 | 5.57M | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
3340 | 5.57M | i = parse_table_header(header_work, attr_work, doc, data, size, &columns, &col_data); |
3341 | 5.57M | if (i > 0) { |
3342 | | |
3343 | 394k | while (i < size) { |
3344 | 385k | size_t row_start; |
3345 | 385k | size_t pipes = 0; |
3346 | 385k | size_t rows = 1; |
3347 | | |
3348 | 385k | row_start = i; |
3349 | | |
3350 | 113M | while (i < size && data[i] != '\n') { |
3351 | 112M | if (data[i] == '|' && !is_backslashed(data, i)) pipes++; |
3352 | 112M | i++; |
3353 | 112M | } |
3354 | | |
3355 | 385k | if (pipes == 0 || i == size) { |
3356 | 22.3k | i = row_start; |
3357 | 22.3k | break; |
3358 | 22.3k | } |
3359 | | |
3360 | | /* Don't count a leading pipe. */ |
3361 | 363k | if (data[row_start] == '|') |
3362 | 330k | pipes--; |
3363 | | |
3364 | | /* Don't count a trailing pipe. */ |
3365 | 363k | if (data[i - 1] == '|' && !is_backslashed(data, i - 1)) |
3366 | 104k | pipes--; |
3367 | | |
3368 | | /* If the multiline table extension is enabled, check the next |
3369 | | lines for continuation markers, to find the number of text rows |
3370 | | that make up this logical row. |
3371 | | */ |
3372 | 363k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3373 | 391k | while (i < size) { |
3374 | 391k | size_t j = i + 1; |
3375 | 391k | size_t colons = 0; |
3376 | | |
3377 | | /* Require that a continued row starts with a colon. */ |
3378 | 391k | if (j >= size || data[j] != ':') break; |
3379 | | |
3380 | | /* Don't count leading colon for comparison to pipes. */ |
3381 | 36.8k | j++; |
3382 | | |
3383 | 27.5M | while (j < size && data[j] != '\n') { |
3384 | 27.5M | if (!is_backslashed(data, j) && data[j] == ':') |
3385 | 3.45M | colons++; |
3386 | 27.5M | j++; |
3387 | 27.5M | } |
3388 | | |
3389 | | /* Don't count a trailing colon for comparison to pipes. */ |
3390 | 36.8k | if (!is_backslashed(data, j - 1) && data[j - 1] == ':') |
3391 | 3.56k | colons--; |
3392 | | |
3393 | | /* Hoedown allows table rows where the number of cells is different |
3394 | | * from `columns`. In this case, `parse_table_row` will add empty |
3395 | | * cells. However, the code does not work in the multi-line case, so |
3396 | | * we require the right number of columns. */ |
3397 | 36.8k | if (colons != pipes || colons + 1 != columns) break; |
3398 | | |
3399 | 28.2k | rows++; |
3400 | 28.2k | i = j; |
3401 | 28.2k | } |
3402 | 363k | } |
3403 | | |
3404 | 363k | parse_table_row( |
3405 | 363k | body_work, |
3406 | 363k | doc, |
3407 | 363k | data + row_start, |
3408 | 363k | i - row_start, |
3409 | 363k | columns, |
3410 | 363k | rows, |
3411 | 363k | col_data, 0 |
3412 | 363k | ); |
3413 | | |
3414 | 363k | i++; |
3415 | | |
3416 | | /* Skip an optional row separator, if it's there. */ |
3417 | 363k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3418 | | /* Use j instead of i, and set i to j only if this is actually a row separator. */ |
3419 | 363k | size_t j = i, next_line_end = i, col; |
3420 | | |
3421 | | /* Seek next_line_end to the position of the terminating new line. */ |
3422 | 107M | while (next_line_end < size && data[next_line_end] != '\n') |
3423 | 106M | next_line_end++; |
3424 | | |
3425 | | /* Skip leading pipe, if any. */ |
3426 | 363k | if (j < next_line_end && data[j] == '|') |
3427 | 233k | j++; |
3428 | | |
3429 | | /* Ensure that there are at least columns pipe/plus separated |
3430 | | runs of dashes, each at least 3 long. The pipes may be |
3431 | | padded with spaces, and the line may end in a pipe. |
3432 | | */ |
3433 | 452k | for (col = 0; col < columns && j < next_line_end; col++) { |
3434 | 352k | size_t dashes = 0; |
3435 | | |
3436 | 2.13M | while (j < next_line_end && data[j] == ' ') |
3437 | 1.78M | j++; |
3438 | | |
3439 | 650k | while (j < next_line_end && data[j] == '-') { |
3440 | 298k | j++; |
3441 | 298k | dashes++; |
3442 | 298k | } |
3443 | | |
3444 | 720k | while (j < next_line_end && data[j] == ' ') |
3445 | 368k | j++; |
3446 | | |
3447 | 352k | if (j < next_line_end && data[j] != '|' && data[j] != '+') |
3448 | 145k | break; |
3449 | | |
3450 | 206k | if (dashes < 3) |
3451 | 117k | break; |
3452 | | |
3453 | 89.0k | j++; |
3454 | 89.0k | } |
3455 | | |
3456 | | /* Skip i past the row separator, if it was valid. */ |
3457 | 363k | if (col == columns) |
3458 | 88.6k | i = next_line_end + 1; |
3459 | 363k | } |
3460 | 363k | } |
3461 | | |
3462 | 30.8k | if (doc->md.table_header) |
3463 | 9.12k | doc->md.table_header(work, header_work, &doc->data); |
3464 | | |
3465 | 30.8k | if (doc->md.table_body) |
3466 | 9.12k | doc->md.table_body(work, body_work, &doc->data); |
3467 | | |
3468 | 30.8k | if (doc->md.table) |
3469 | 9.12k | doc->md.table(ob, work, attr_work, &doc->data); |
3470 | 30.8k | } |
3471 | | |
3472 | 5.57M | free(col_data); |
3473 | 5.57M | popbuf(doc, BUFFER_SPAN); |
3474 | 5.57M | popbuf(doc, BUFFER_BLOCK); |
3475 | 5.57M | popbuf(doc, BUFFER_BLOCK); |
3476 | 5.57M | popbuf(doc, BUFFER_ATTRIBUTE); |
3477 | 5.57M | return i; |
3478 | 5.57M | } |
3479 | | |
3480 | | /* parse_userblock • parsing of user block */ |
3481 | | static size_t |
3482 | | parse_userblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
3483 | 0 | { |
3484 | 0 | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
3485 | 0 | size_t len = doc->user_block(data, size, &doc->data); |
3486 | |
|
3487 | 0 | if (!len) { |
3488 | 0 | return 0; |
3489 | 0 | } |
3490 | | |
3491 | 0 | work.data = data; |
3492 | 0 | work.size = len; |
3493 | |
|
3494 | 0 | if (doc->md.user_block) { |
3495 | 0 | doc->md.user_block(ob, &work, &doc->data); |
3496 | 0 | } else { |
3497 | 0 | hoedown_buffer_put(ob, work.data, work.size); |
3498 | 0 | } |
3499 | 0 | return len; |
3500 | 0 | } |
3501 | | |
3502 | | /* is_paragraph • returns if the next block is a paragraph (doesn't follow any |
3503 | | * other special rules for other types of blocks) */ |
3504 | | static int |
3505 | | is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end) |
3506 | 5.09M | { |
3507 | | /* temporary buffer for results of checking special blocks */ |
3508 | 5.09M | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
3509 | | /* temporary renderer that has no rendering function */ |
3510 | 5.09M | hoedown_renderer temp_renderer; |
3511 | | /* ensure all callbacks are NULL */ |
3512 | 5.09M | memset(&temp_renderer, 0, sizeof(hoedown_renderer)); |
3513 | | /* store the old renderer */ |
3514 | 5.09M | hoedown_renderer old_renderer; |
3515 | 5.09M | memcpy(&old_renderer, &doc->md, sizeof(hoedown_renderer)); |
3516 | | /* copy the new renderer over to the document */ |
3517 | 5.09M | memcpy(&doc->md, &temp_renderer, sizeof(hoedown_renderer)); |
3518 | | /* these are all the if branches inside parse_block, wrapped into one bool, |
3519 | | * with minimal parsing, and completely idempotent */ |
3520 | 5.09M | int result = !(is_atxheader(doc, txt_data, end) || |
3521 | 5.09M | (doc->user_block && parse_userblock(tmp, doc, txt_data, end)) || |
3522 | 5.09M | (txt_data[0] == '<' && |
3523 | 5.07M | parse_htmlblock(tmp, doc, txt_data, end, 0)) || |
3524 | 5.09M | is_hrule(txt_data, end) || |
3525 | 5.09M | ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) && |
3526 | 5.06M | parse_fencedcode(tmp, doc, txt_data, end, doc->ext_flags)) || |
3527 | 5.09M | ((doc->ext_flags & HOEDOWN_EXT_TABLES) && |
3528 | 5.05M | parse_table(tmp, doc, txt_data, end)) || |
3529 | 5.09M | prefix_quote(txt_data, end) || |
3530 | 5.09M | (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && |
3531 | 5.02M | prefix_code(txt_data, end)) || |
3532 | 5.09M | prefix_uli(txt_data, end) || |
3533 | 5.09M | prefix_oli(txt_data, end) || |
3534 | 5.09M | ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && |
3535 | 4.97M | prefix_dli(doc, txt_data, end))); |
3536 | 5.09M | popbuf(doc, BUFFER_BLOCK); |
3537 | 5.09M | memcpy(&doc->md, &old_renderer, sizeof(hoedown_renderer)); |
3538 | 5.09M | return result; |
3539 | 5.09M | } |
3540 | | |
3541 | | /* parse_block • parsing of one block, returning next uint8_t to parse */ |
3542 | | static void |
3543 | | parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
3544 | 133k | { |
3545 | 133k | size_t beg, end, i; |
3546 | 133k | uint8_t *txt_data; |
3547 | 133k | beg = 0; |
3548 | | |
3549 | 133k | if (doc->work_bufs[BUFFER_SPAN].size + |
3550 | 133k | doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting) |
3551 | 333 | return; |
3552 | | |
3553 | 814k | while (beg < size) { |
3554 | 680k | txt_data = data + beg; |
3555 | 680k | end = size - beg; |
3556 | | |
3557 | 680k | if (is_atxheader(doc, txt_data, end)) |
3558 | 16.8k | beg += parse_atxheader(ob, doc, txt_data, end); |
3559 | | |
3560 | 663k | else if (doc->user_block && |
3561 | 663k | (i = parse_userblock(ob, doc, txt_data, end)) != 0) |
3562 | 0 | beg += i; |
3563 | | |
3564 | 663k | else if (data[beg] == '<' && doc->md.blockhtml && |
3565 | 663k | (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0) |
3566 | 0 | beg += i; |
3567 | | |
3568 | 663k | else if ((i = is_empty(txt_data, end)) != 0) |
3569 | 134k | beg += i; |
3570 | | |
3571 | 528k | else if (is_hrule(txt_data, end)) { |
3572 | 85.3k | while (beg < size && data[beg] != '\n') |
3573 | 79.6k | beg++; |
3574 | | |
3575 | 5.66k | if (doc->md.hrule) { |
3576 | 5.66k | doc->hrule_char = data[beg - 1]; |
3577 | 5.66k | doc->md.hrule(ob, &doc->data); |
3578 | 5.66k | doc->hrule_char = 0; |
3579 | 5.66k | } |
3580 | | |
3581 | 5.66k | beg++; |
3582 | 5.66k | } |
3583 | | |
3584 | 523k | else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 && |
3585 | 523k | (i = parse_fencedcode(ob, doc, txt_data, end, doc->ext_flags)) != 0) |
3586 | 6.61k | beg += i; |
3587 | | |
3588 | 516k | else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 && |
3589 | 516k | (i = parse_table(ob, doc, txt_data, end)) != 0) |
3590 | 9.12k | beg += i; |
3591 | | |
3592 | 507k | else if (prefix_quote(txt_data, end)) |
3593 | 27.5k | beg += parse_blockquote(ob, doc, txt_data, end); |
3594 | | |
3595 | 479k | else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end)) |
3596 | 0 | beg += parse_blockcode(ob, doc, txt_data, end); |
3597 | | |
3598 | 479k | else if (prefix_uli(txt_data, end)) |
3599 | 95.6k | beg += parse_list(ob, doc, txt_data, end, 0); |
3600 | | |
3601 | 384k | else if (prefix_oli(txt_data, end)) |
3602 | 84.4k | beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED); |
3603 | | |
3604 | 299k | else if ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && prefix_dli(doc, txt_data, end)) |
3605 | 17.8k | beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_DEFINITION); |
3606 | | |
3607 | 282k | else |
3608 | 282k | beg += parse_paragraph(ob, doc, txt_data, end); |
3609 | 680k | } |
3610 | 133k | } |
3611 | | |
3612 | | |
3613 | | |
3614 | | /********************* |
3615 | | * REFERENCE PARSING * |
3616 | | *********************/ |
3617 | | |
3618 | | /* is_footnote • returns whether a line is a footnote definition or not */ |
3619 | | static int |
3620 | | is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list) |
3621 | 1.56M | { |
3622 | 1.56M | size_t i = 0; |
3623 | 1.56M | hoedown_buffer *contents = NULL; |
3624 | 1.56M | hoedown_buffer *name = NULL; |
3625 | 1.56M | size_t ind = 0; |
3626 | 1.56M | int in_empty = 0; |
3627 | 1.56M | size_t start = 0; |
3628 | | |
3629 | 1.56M | size_t id_offset, id_end; |
3630 | 1.56M | size_t id_indent = 0, content_line = 0, content_indent = 0; |
3631 | | |
3632 | | /* up to 3 optional leading spaces */ |
3633 | 1.56M | if (beg + 3 >= end) return 0; |
3634 | 1.56M | if (data[beg] == ' ') { i = 1; |
3635 | 131k | if (data[beg + 1] == ' ') { i = 2; |
3636 | 23.4k | if (data[beg + 2] == ' ') { i = 3; |
3637 | 14.7k | if (data[beg + 3] == ' ') return 0; } } } |
3638 | 1.55M | i += beg; |
3639 | | |
3640 | | /* id part: caret followed by anything between brackets */ |
3641 | 1.55M | if (data[i] != '[') return 0; |
3642 | 347k | i++; |
3643 | 347k | if (i >= end || data[i] != '^') return 0; |
3644 | 205k | i++; |
3645 | 205k | id_offset = i; |
3646 | 714k | while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') |
3647 | 508k | i++; |
3648 | 205k | if (i >= end || data[i] != ']') return 0; |
3649 | 199k | id_end = i; |
3650 | | |
3651 | | /* spacer: colon (space | tab)* newline? (space | tab)* */ |
3652 | 199k | i++; |
3653 | 199k | if (i >= end || data[i] != ':') return 0; |
3654 | 192k | i++; |
3655 | 192k | if (i >= end) return 0; |
3656 | | |
3657 | | /* getting content and name buffers */ |
3658 | 192k | contents = hoedown_buffer_new(64); |
3659 | 192k | name = hoedown_buffer_new(64); |
3660 | | |
3661 | 192k | start = i; |
3662 | | |
3663 | | /* getting item indent size */ |
3664 | 754k | while (id_indent != start && data[start - id_indent] != '\n' && data[start - id_indent] != '\r') { |
3665 | 561k | id_indent++; |
3666 | 561k | } |
3667 | | |
3668 | | /* process lines similar to a list item */ |
3669 | 472k | while (i < end) { |
3670 | 9.69M | while (i < end && data[i] != '\n' && data[i] != '\r') i++; |
3671 | | |
3672 | | /* process an empty line */ |
3673 | 471k | if (is_empty(data + start, i - start)) { |
3674 | 148k | in_empty = 1; |
3675 | 148k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3676 | 148k | i++; |
3677 | 148k | if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; |
3678 | 148k | } |
3679 | 148k | start = i; |
3680 | 148k | continue; |
3681 | 148k | } |
3682 | | |
3683 | | /* calculating the indentation */ |
3684 | 323k | ind = 0; |
3685 | 543k | while (ind < 4 && start + ind < end && data[start + ind] == ' ') |
3686 | 220k | ind++; |
3687 | | |
3688 | 323k | content_line++; |
3689 | | |
3690 | | /* joining only indented stuff after empty lines; |
3691 | | * note that now we only require 1 space of indentation |
3692 | | * to continue, just like lists */ |
3693 | 323k | if (ind == 0) { |
3694 | 230k | if (start == id_end + 2 && data[start] == '\t') {} |
3695 | 192k | else break; |
3696 | 230k | } |
3697 | 93.4k | else if (in_empty) { |
3698 | 2.91k | hoedown_buffer_putc(contents, '\n'); |
3699 | 2.91k | } |
3700 | | |
3701 | 130k | in_empty = 0; |
3702 | | |
3703 | | /* re-calculating the indentation */ |
3704 | 130k | if (content_line == 2 && data[start + ind] == ' ') { |
3705 | 53.8k | while (ind < id_indent && data[start + ind] == ' ') { |
3706 | 27.0k | ind++; |
3707 | 27.0k | } |
3708 | 26.7k | content_indent = ind; |
3709 | 26.7k | } |
3710 | 130k | if (content_indent > ind) { |
3711 | 28.5k | while (ind < content_indent && data[start + ind] == ' ') { |
3712 | 9.28k | ind++; |
3713 | 9.28k | } |
3714 | 19.2k | } |
3715 | | |
3716 | | /* adding the line into the content buffer */ |
3717 | 130k | hoedown_buffer_put(contents, data + start + ind, i - start - ind); |
3718 | | /* add carriage return */ |
3719 | 130k | if (i < end) { |
3720 | 130k | hoedown_buffer_putc(contents, '\n'); |
3721 | 130k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3722 | 130k | i++; |
3723 | 130k | if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; |
3724 | 130k | } |
3725 | 130k | } |
3726 | 130k | start = i; |
3727 | 130k | } |
3728 | | |
3729 | 192k | if (last) |
3730 | 192k | *last = start; |
3731 | | |
3732 | 192k | if (list) { |
3733 | 192k | struct footnote_ref *ref; |
3734 | 192k | ref = create_footnote_ref(list, data + id_offset, id_end - id_offset); |
3735 | 192k | if (!ref) |
3736 | 0 | return 0; |
3737 | 192k | if (!add_footnote_ref(list, ref)) { |
3738 | 0 | free_footnote_ref(ref); |
3739 | 0 | return 0; |
3740 | 0 | } |
3741 | 192k | ref->contents = contents; |
3742 | 192k | hoedown_buffer_put(name, data + id_offset, id_end - id_offset); |
3743 | 192k | ref->name = name; |
3744 | 192k | } |
3745 | | |
3746 | 192k | return 1; |
3747 | 192k | } |
3748 | | |
3749 | | /* is_html_comment • returns whether a html comment or not */ |
3750 | | static int |
3751 | | is_html_comment(const uint8_t *data, size_t beg, size_t end, size_t *last) |
3752 | 1.37M | { |
3753 | 1.37M | size_t i = 0; |
3754 | | |
3755 | 1.37M | if (beg + 5 >= end) return 0; |
3756 | 1.37M | if (!(data[beg] == '<' && data[beg + 1] == '!' && data[beg + 2] == '-' && data[beg + 3] == '-')) return 0; |
3757 | | |
3758 | 28.1k | i = 5; |
3759 | 41.8M | while (beg + i < end && !(data[beg + i - 2] == '-' && data[beg + i - 1] == '-' && data[beg + i] == '>')) i++; |
3760 | | /* i can only ever be beyond the end if the ending --> is not found */ |
3761 | 28.1k | if (beg + i >= end) return 0; |
3762 | 26.2k | i++; |
3763 | | |
3764 | 26.2k | if (beg + i < end && (data[beg + i] == '\n' || data[beg + i] == '\r')) { |
3765 | 21.5k | i++; |
3766 | 21.5k | if (beg + i < end && data[beg + i] == '\r' && data[beg + i - 1] == '\n') i++; |
3767 | 21.5k | } |
3768 | | |
3769 | 26.2k | if (last) |
3770 | 26.2k | *last = beg + i; |
3771 | | |
3772 | 26.2k | return 1; |
3773 | 28.1k | } |
3774 | | |
3775 | | /* is_ref • returns whether a line is a reference or not */ |
3776 | | static int |
3777 | | is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) |
3778 | 1.35M | { |
3779 | | /* int n; */ |
3780 | 1.35M | size_t i = 0; |
3781 | 1.35M | size_t id_offset, id_end; |
3782 | 1.35M | size_t link_offset, link_end; |
3783 | 1.35M | size_t title_offset, title_end; |
3784 | 1.35M | size_t line_end; |
3785 | 1.35M | size_t attr_offset = 0, attr_end = 0; |
3786 | | |
3787 | | /* up to 3 optional leading spaces */ |
3788 | 1.35M | if (beg + 3 >= end) return 0; |
3789 | 1.34M | if (data[beg] == ' ') { i = 1; |
3790 | 130k | if (data[beg + 1] == ' ') { i = 2; |
3791 | 23.4k | if (data[beg + 2] == ' ') { i = 3; |
3792 | 14.7k | if (data[beg + 3] == ' ') return 0; } } } |
3793 | 1.33M | i += beg; |
3794 | | |
3795 | | /* id part: anything but a newline between brackets */ |
3796 | 1.33M | if (data[i] != '[') return 0; |
3797 | 154k | i++; |
3798 | 154k | id_offset = i; |
3799 | 2.63M | while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') |
3800 | 2.47M | i++; |
3801 | 154k | if (i >= end || data[i] != ']') return 0; |
3802 | 138k | id_end = i; |
3803 | | |
3804 | | /* spacer: colon (space | tab)* newline? (space | tab)* */ |
3805 | 138k | i++; |
3806 | 138k | if (i >= end || data[i] != ':') return 0; |
3807 | 119k | i++; |
3808 | 121k | while (i < end && data[i] == ' ') i++; |
3809 | 119k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3810 | 16.3k | i++; |
3811 | 16.3k | if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } |
3812 | 120k | while (i < end && data[i] == ' ') i++; |
3813 | 119k | if (i >= end) return 0; |
3814 | | |
3815 | | /* link: spacing-free sequence, optionally between angle brackets */ |
3816 | 119k | if (data[i] == '<') |
3817 | 1.97k | i++; |
3818 | | |
3819 | 119k | link_offset = i; |
3820 | | |
3821 | 1.59M | while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') |
3822 | 1.47M | i++; |
3823 | | |
3824 | 119k | if (data[i - 1] == '>') link_end = i - 1; |
3825 | 118k | else link_end = i; |
3826 | | |
3827 | | /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ |
3828 | 176k | while (i < end && data[i] == ' ') i++; |
3829 | 119k | if (i < end && data[i] != '\n' && data[i] != '\r' |
3830 | 119k | && data[i] != '\'' && data[i] != '"' && data[i] != '(') |
3831 | 5.20k | return 0; |
3832 | 113k | line_end = 0; |
3833 | | /* computing end-of-line */ |
3834 | 113k | if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; |
3835 | 113k | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') |
3836 | 667 | line_end = i + 1; |
3837 | | |
3838 | | /* optional (space|tab)* spacer after a newline */ |
3839 | 113k | if (line_end) { |
3840 | 64.3k | i = line_end + 1; |
3841 | 77.9k | while (i < end && data[i] == ' ') i++; } |
3842 | | |
3843 | | /* optional title: any non-newline sequence enclosed in '"() |
3844 | | alone on its line */ |
3845 | 113k | title_offset = title_end = 0; |
3846 | 113k | if (i + 1 < end |
3847 | 113k | && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { |
3848 | 77.4k | char d = data[i++]; |
3849 | 77.4k | title_offset = i; |
3850 | | |
3851 | | /* looking for end of tile */ |
3852 | 1.99M | while (i < end && data[i] != d && data[i] != '\n' && data[i] != '\r') { |
3853 | 1.91M | ++i; |
3854 | 1.91M | } |
3855 | | |
3856 | 77.4k | if (i + 1 < end && data[i] == d) { |
3857 | 44.9k | title_end = i++; |
3858 | 44.9k | attr_offset = i; |
3859 | | |
3860 | | /* looking for EOL */ |
3861 | 2.88M | while (i < end && data[i] != '\n' && data[i] != '\r') { |
3862 | 2.83M | i++; |
3863 | 2.83M | } |
3864 | | |
3865 | | /* looking for attribute */ |
3866 | 44.9k | if (data[i-1] == '}' && |
3867 | 44.9k | memchr(&data[attr_offset], '{', i - attr_offset)) { |
3868 | 127k | while (attr_offset < i && data[attr_offset] != '{') { |
3869 | 92.8k | ++attr_offset; |
3870 | 92.8k | } |
3871 | 34.2k | ++attr_offset; |
3872 | 34.2k | attr_end = i - 1; |
3873 | 34.2k | } else { |
3874 | 10.6k | if (data[i-1] == d) { |
3875 | 5.24k | title_end = i - 1; |
3876 | 5.44k | } else { |
3877 | 5.44k | title_end = i; |
3878 | 5.44k | } |
3879 | 10.6k | attr_offset = 0; |
3880 | 10.6k | attr_end = 0; |
3881 | 10.6k | } |
3882 | 44.9k | if (i + 1 < end && data[i] == '\r' && data[i + 1] == '\n') { |
3883 | 2.66k | ++i; |
3884 | 2.66k | } |
3885 | | |
3886 | 44.9k | line_end = i; |
3887 | 44.9k | } else { |
3888 | | /* looking for EOL */ |
3889 | 32.4k | while (i < end && data[i] != '\n' && data[i] != '\r') { |
3890 | 6 | i++; |
3891 | 6 | } |
3892 | 32.4k | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') { |
3893 | 476 | title_end = i + 1; |
3894 | 31.9k | } else { |
3895 | 31.9k | title_end = i; |
3896 | 31.9k | } |
3897 | | /* stepping back */ |
3898 | 32.4k | i -= 1; |
3899 | 33.7k | while (i > title_offset && data[i] == ' ') { |
3900 | 1.33k | i -= 1; |
3901 | 1.33k | } |
3902 | 32.4k | if (i > title_offset && |
3903 | 32.4k | (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { |
3904 | 24.9k | line_end = title_end; |
3905 | 24.9k | title_end = i; |
3906 | 24.9k | } |
3907 | 32.4k | } |
3908 | 77.4k | } |
3909 | | |
3910 | 113k | if (!line_end || link_end == link_offset) |
3911 | 12.5k | return 0; /* garbage after the link empty link */ |
3912 | | |
3913 | | /* a valid ref has been found, filling-in return structures */ |
3914 | 101k | if (last) |
3915 | 101k | *last = line_end; |
3916 | | |
3917 | 101k | if (refs) { |
3918 | 101k | struct link_ref *ref; |
3919 | | |
3920 | 101k | ref = add_link_ref(refs, data + id_offset, id_end - id_offset); |
3921 | 101k | if (!ref) |
3922 | 0 | return 0; |
3923 | | |
3924 | 101k | ref->link = hoedown_buffer_new(link_end - link_offset); |
3925 | 101k | hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset); |
3926 | | |
3927 | 101k | if (title_end > title_offset) { |
3928 | 34.9k | ref->title = hoedown_buffer_new(title_end - title_offset); |
3929 | 34.9k | hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset); |
3930 | 34.9k | } |
3931 | 101k | if (attr_end > attr_offset) { |
3932 | 34.2k | ref->attr = hoedown_buffer_new(attr_end - attr_offset); |
3933 | 34.2k | hoedown_buffer_put(ref->attr, data + attr_offset, attr_end - attr_offset); |
3934 | 34.2k | } |
3935 | 101k | } |
3936 | | |
3937 | 101k | return 1; |
3938 | 101k | } |
3939 | | |
3940 | | static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size) |
3941 | 1.14M | { |
3942 | | /* This code makes two assumptions: |
3943 | | * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped, |
3944 | | * whether or not it is a valid UTF-8 continuation byte.) |
3945 | | * - Input contains no combining characters. (Combining characters |
3946 | | * should be skipped but are not.) |
3947 | | */ |
3948 | 1.14M | size_t i = 0, tab = 0; |
3949 | | |
3950 | 38.3M | while (i < size) { |
3951 | 38.2M | size_t org = i; |
3952 | | |
3953 | 82.5M | while (i < size && line[i] != '\t') { |
3954 | | /* ignore UTF-8 continuation bytes */ |
3955 | 44.3M | if ((line[i] & 0xc0) != 0x80) |
3956 | 42.9M | tab++; |
3957 | 44.3M | i++; |
3958 | 44.3M | } |
3959 | | |
3960 | 38.2M | if (i > org) |
3961 | 1.55M | hoedown_buffer_put(ob, line + org, i - org); |
3962 | | |
3963 | 38.2M | if (i >= size) |
3964 | 1.05M | break; |
3965 | | |
3966 | 147M | do { |
3967 | 147M | hoedown_buffer_putc(ob, ' '); tab++; |
3968 | 147M | } while (tab % 4); |
3969 | | |
3970 | 37.1M | i++; |
3971 | 37.1M | } |
3972 | 1.14M | } |
3973 | | |
3974 | | /********************** |
3975 | | * EXPORTED FUNCTIONS * |
3976 | | **********************/ |
3977 | | |
3978 | | hoedown_document * |
3979 | | hoedown_document_new( |
3980 | | const hoedown_renderer *renderer, |
3981 | | hoedown_extensions extensions, |
3982 | | size_t max_nesting, |
3983 | | uint8_t attr_activation, |
3984 | | hoedown_user_block user_block, |
3985 | | hoedown_buffer *meta) |
3986 | 8.92k | { |
3987 | 8.92k | hoedown_document *doc = NULL; |
3988 | | |
3989 | 8.92k | assert(max_nesting > 0 && renderer); |
3990 | | |
3991 | 8.92k | doc = hoedown_malloc(sizeof(hoedown_document)); |
3992 | 8.92k | memcpy(&doc->md, renderer, sizeof(hoedown_renderer)); |
3993 | | |
3994 | 8.92k | doc->data.opaque = renderer->opaque; |
3995 | | |
3996 | 8.92k | hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4); |
3997 | 8.92k | hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8); |
3998 | 8.92k | hoedown_stack_init(&doc->work_bufs[BUFFER_ATTRIBUTE], 8); |
3999 | | |
4000 | 8.92k | memset(doc->active_char, 0x0, 256); |
4001 | | |
4002 | 8.92k | if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) { |
4003 | 8.92k | doc->active_char['_'] = MD_CHAR_EMPHASIS; |
4004 | 8.92k | } |
4005 | | |
4006 | 8.92k | if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) { |
4007 | 8.92k | doc->active_char['*'] = MD_CHAR_EMPHASIS; |
4008 | 8.92k | doc->active_char['_'] = MD_CHAR_EMPHASIS; |
4009 | 8.92k | if (extensions & HOEDOWN_EXT_STRIKETHROUGH) |
4010 | 8.92k | doc->active_char['~'] = MD_CHAR_EMPHASIS; |
4011 | 8.92k | if (extensions & HOEDOWN_EXT_HIGHLIGHT) |
4012 | 8.92k | doc->active_char['='] = MD_CHAR_EMPHASIS; |
4013 | 8.92k | } |
4014 | | |
4015 | 8.92k | if (doc->md.codespan) |
4016 | 8.92k | doc->active_char['`'] = MD_CHAR_CODESPAN; |
4017 | | |
4018 | 8.92k | if (doc->md.linebreak) |
4019 | 8.92k | doc->active_char['\n'] = MD_CHAR_LINEBREAK; |
4020 | | |
4021 | 8.92k | if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) { |
4022 | 8.92k | doc->active_char['['] = MD_CHAR_LINK; |
4023 | 8.92k | doc->active_char['!'] = MD_CHAR_IMAGE; |
4024 | 8.92k | } |
4025 | | |
4026 | 8.92k | doc->active_char['<'] = MD_CHAR_LANGLE; |
4027 | 8.92k | doc->active_char['\\'] = MD_CHAR_ESCAPE; |
4028 | 8.92k | doc->active_char['&'] = MD_CHAR_ENTITY; |
4029 | | |
4030 | 8.92k | if (extensions & HOEDOWN_EXT_AUTOLINK) { |
4031 | 8.92k | doc->active_char[':'] = MD_CHAR_AUTOLINK_URL; |
4032 | 8.92k | doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; |
4033 | 8.92k | doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW; |
4034 | 8.92k | } |
4035 | | |
4036 | 8.92k | if (extensions & HOEDOWN_EXT_SUPERSCRIPT) |
4037 | 8.92k | doc->active_char['^'] = MD_CHAR_SUPERSCRIPT; |
4038 | | |
4039 | 8.92k | if (extensions & HOEDOWN_EXT_QUOTE) |
4040 | 8.92k | doc->active_char['"'] = MD_CHAR_QUOTE; |
4041 | | |
4042 | 8.92k | if (extensions & HOEDOWN_EXT_MATH) |
4043 | 8.92k | doc->active_char['$'] = MD_CHAR_MATH; |
4044 | | |
4045 | | /* Extension data */ |
4046 | 8.92k | doc->ext_flags = extensions; |
4047 | 8.92k | doc->max_nesting = max_nesting; |
4048 | 8.92k | doc->attr_activation = attr_activation; |
4049 | 8.92k | doc->in_link_body = 0; |
4050 | 8.92k | doc->link_id = NULL; |
4051 | 8.92k | doc->link_ref_attr = NULL; |
4052 | 8.92k | doc->link_inline_attr = NULL; |
4053 | 8.92k | doc->is_escape_char = 0; |
4054 | 8.92k | doc->header_type = HOEDOWN_HEADER_NONE; |
4055 | 8.92k | doc->link_type = HOEDOWN_LINK_NONE; |
4056 | 8.92k | doc->footnote_id = NULL; |
4057 | 8.92k | doc->list_depth = 0; |
4058 | 8.92k | doc->blockquote_depth = 0; |
4059 | 8.92k | doc->ul_item_char = 0; |
4060 | 8.92k | doc->hrule_char = 0; |
4061 | 8.92k | doc->fencedcode_char = 0; |
4062 | 8.92k | doc->ol_numeral = NULL; |
4063 | 8.92k | doc->user_block = user_block; |
4064 | 8.92k | doc->meta = meta; |
4065 | | |
4066 | 8.92k | return doc; |
4067 | 8.92k | } |
4068 | | |
4069 | | void |
4070 | | hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size) |
4071 | 8.92k | { |
4072 | 8.92k | static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; |
4073 | | |
4074 | 8.92k | hoedown_buffer *text; |
4075 | 8.92k | size_t beg, end; |
4076 | | |
4077 | 8.92k | int footnotes_enabled; |
4078 | | |
4079 | 8.92k | text = hoedown_buffer_new(64); |
4080 | | |
4081 | | /* Preallocate enough space for our buffer to avoid expanding while copying */ |
4082 | 8.92k | hoedown_buffer_grow(text, size); |
4083 | | |
4084 | | /* reset the references table */ |
4085 | 8.92k | memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); |
4086 | | |
4087 | 8.92k | footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES; |
4088 | | |
4089 | | /* reset the footnotes lists */ |
4090 | 8.92k | if (footnotes_enabled) { |
4091 | 8.92k | memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found)); |
4092 | 8.92k | memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used)); |
4093 | 8.92k | } |
4094 | | |
4095 | | /* first pass: looking for references, copying everything else */ |
4096 | 8.92k | beg = 0; |
4097 | | |
4098 | | /* Skip a possible UTF-8 BOM, even though the Unicode standard |
4099 | | * discourages having these in UTF-8 documents */ |
4100 | 8.92k | if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0) |
4101 | 62 | beg += 3; |
4102 | | |
4103 | 1.57M | while (beg < size) /* iterating over lines */ |
4104 | 1.56M | if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found)) { |
4105 | 192k | if (doc->md.footnote_ref_def) { |
4106 | 0 | hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
4107 | 0 | original.data = (uint8_t*) (data + beg); |
4108 | 0 | original.size = end - beg; |
4109 | 0 | doc->md.footnote_ref_def(&original, &doc->data); |
4110 | 0 | } |
4111 | 192k | beg = end; |
4112 | 1.37M | } else if (is_html_comment(data, beg, size, &end)) { |
4113 | 26.2k | size_t i = 0; |
4114 | 9.72M | while (i < (end - beg) && beg + i < size) { |
4115 | 9.69M | if (data[beg + i] == '\t' && (data[beg + i] & 0xc0) != 0x80) { |
4116 | 3.24M | hoedown_buffer_put(text, (uint8_t*)" ", 4); |
4117 | 6.45M | } else { |
4118 | 6.45M | hoedown_buffer_putc(text, data[beg + i]); |
4119 | 6.45M | } |
4120 | 9.69M | i++; |
4121 | 9.69M | } |
4122 | 26.2k | beg = end; |
4123 | 1.35M | } else if (is_ref(data, beg, size, &end, doc->refs)) { |
4124 | 101k | if (doc->md.ref) { |
4125 | 0 | hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
4126 | 0 | original.data = (uint8_t*) (data + beg); |
4127 | 0 | original.size = end - beg; |
4128 | 0 | doc->md.ref(&original, &doc->data); |
4129 | 0 | } |
4130 | 101k | beg = end; |
4131 | 1.24M | } else { /* skipping to the next line */ |
4132 | 1.24M | end = beg; |
4133 | 82.7M | while (end < size && data[end] != '\n' && data[end] != '\r') |
4134 | 81.4M | end++; |
4135 | | |
4136 | | /* adding the line body if present */ |
4137 | 1.24M | if (end > beg) |
4138 | 1.14M | expand_tabs(text, data + beg, end - beg); |
4139 | | |
4140 | 2.74M | while (end < size && (data[end] == '\n' || data[end] == '\r')) { |
4141 | | /* add one \n per newline */ |
4142 | 1.49M | if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n')) |
4143 | 1.49M | hoedown_buffer_putc(text, '\n'); |
4144 | 1.49M | end++; |
4145 | 1.49M | } |
4146 | | |
4147 | 1.24M | beg = end; |
4148 | 1.24M | } |
4149 | | |
4150 | | /* pre-grow the output buffer to minimize allocations */ |
4151 | 8.92k | hoedown_buffer_grow(ob, text->size + (text->size >> 1)); |
4152 | | |
4153 | | /* second pass: actual rendering */ |
4154 | 8.92k | if (doc->md.doc_header) |
4155 | 0 | doc->md.doc_header(ob, 0, &doc->data); |
4156 | | |
4157 | 8.92k | if (text->size) { |
4158 | | /* adding a final newline if not already present */ |
4159 | 8.82k | if (text->data[text->size - 1] != '\n') |
4160 | 7.73k | hoedown_buffer_putc(text, '\n'); |
4161 | | |
4162 | 8.82k | parse_block(ob, doc, text->data, text->size); |
4163 | 8.82k | } |
4164 | | |
4165 | | /* footnotes */ |
4166 | 8.92k | if (footnotes_enabled) |
4167 | 8.92k | parse_footnote_list(ob, doc, &doc->footnotes_used); |
4168 | | |
4169 | 8.92k | if (doc->md.doc_footer) |
4170 | 0 | doc->md.doc_footer(ob, 0, &doc->data); |
4171 | | |
4172 | | /* clean-up */ |
4173 | 8.92k | hoedown_buffer_free(text); |
4174 | 8.92k | free_link_refs(doc->refs); |
4175 | 8.92k | if (footnotes_enabled) { |
4176 | 8.92k | free_footnote_list(&doc->footnotes_found, 1); |
4177 | 8.92k | free_footnote_list(&doc->footnotes_used, 0); |
4178 | 8.92k | } |
4179 | | |
4180 | 8.92k | assert(doc->work_bufs[BUFFER_SPAN].size == 0); |
4181 | 8.92k | assert(doc->work_bufs[BUFFER_BLOCK].size == 0); |
4182 | 8.92k | assert(doc->work_bufs[BUFFER_ATTRIBUTE].size == 0); |
4183 | 8.92k | } |
4184 | | |
4185 | | void |
4186 | | hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size) |
4187 | 0 | { |
4188 | 0 | size_t i = 0, mark; |
4189 | 0 | hoedown_buffer *text = hoedown_buffer_new(64); |
4190 | | |
4191 | | /* reset the references table */ |
4192 | 0 | memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); |
4193 | | |
4194 | | /* first pass: expand tabs and process newlines */ |
4195 | 0 | hoedown_buffer_grow(text, size); |
4196 | 0 | while (1) { |
4197 | 0 | mark = i; |
4198 | 0 | while (i < size && data[i] != '\n' && data[i] != '\r') |
4199 | 0 | i++; |
4200 | |
|
4201 | 0 | expand_tabs(text, data + mark, i - mark); |
4202 | |
|
4203 | 0 | if (i >= size) |
4204 | 0 | break; |
4205 | | |
4206 | 0 | while (i < size && (data[i] == '\n' || data[i] == '\r')) { |
4207 | | /* add one \n per newline */ |
4208 | 0 | if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n')) |
4209 | 0 | hoedown_buffer_putc(text, '\n'); |
4210 | 0 | i++; |
4211 | 0 | } |
4212 | 0 | } |
4213 | | |
4214 | | /* second pass: actual rendering */ |
4215 | 0 | hoedown_buffer_grow(ob, text->size + (text->size >> 1)); |
4216 | |
|
4217 | 0 | if (doc->md.doc_header) |
4218 | 0 | doc->md.doc_header(ob, 1, &doc->data); |
4219 | |
|
4220 | 0 | parse_inline(ob, doc, text->data, text->size); |
4221 | |
|
4222 | 0 | if (doc->md.doc_footer) |
4223 | 0 | doc->md.doc_footer(ob, 1, &doc->data); |
4224 | | |
4225 | | /* clean-up */ |
4226 | 0 | hoedown_buffer_free(text); |
4227 | |
|
4228 | 0 | assert(doc->work_bufs[BUFFER_SPAN].size == 0); |
4229 | 0 | assert(doc->work_bufs[BUFFER_BLOCK].size == 0); |
4230 | 0 | } |
4231 | | |
4232 | | void |
4233 | | hoedown_document_free(hoedown_document *doc) |
4234 | 8.92k | { |
4235 | 8.92k | size_t i; |
4236 | | |
4237 | 86.7k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i) |
4238 | 77.8k | hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]); |
4239 | | |
4240 | 49.7k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i) |
4241 | 40.8k | hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]); |
4242 | | |
4243 | 80.8k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_ATTRIBUTE].asize; ++i) |
4244 | 71.8k | hoedown_buffer_free(doc->work_bufs[BUFFER_ATTRIBUTE].item[i]); |
4245 | | |
4246 | 8.92k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]); |
4247 | 8.92k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]); |
4248 | 8.92k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_ATTRIBUTE]); |
4249 | | |
4250 | 8.92k | free(doc); |
4251 | 8.92k | } |
4252 | | |
4253 | | const hoedown_buffer* |
4254 | | hoedown_document_link_id(hoedown_document* document) |
4255 | 0 | { |
4256 | 0 | return document->link_id; |
4257 | 0 | } |
4258 | | |
4259 | | const hoedown_buffer* |
4260 | | hoedown_document_link_ref_attr(hoedown_document* document) |
4261 | 0 | { |
4262 | 0 | return document->link_ref_attr; |
4263 | 0 | } |
4264 | | |
4265 | | const hoedown_buffer* |
4266 | | hoedown_document_link_inline_attr(hoedown_document* document) |
4267 | 0 | { |
4268 | 0 | return document->link_inline_attr; |
4269 | 0 | } |
4270 | | |
4271 | | int |
4272 | | hoedown_document_is_escaped(hoedown_document* document) |
4273 | 0 | { |
4274 | 0 | return document->is_escape_char; |
4275 | 0 | } |
4276 | | |
4277 | | hoedown_header_type |
4278 | | hoedown_document_header_type(hoedown_document* document) |
4279 | 0 | { |
4280 | 0 | return document->header_type; |
4281 | 0 | } |
4282 | | |
4283 | | hoedown_link_type |
4284 | | hoedown_document_link_type(hoedown_document* document) |
4285 | 0 | { |
4286 | 0 | return document->link_type; |
4287 | 0 | } |
4288 | | |
4289 | | const hoedown_buffer* |
4290 | | hoedown_document_footnote_id(hoedown_document* document) |
4291 | 0 | { |
4292 | 0 | return document->footnote_id; |
4293 | 0 | } |
4294 | | |
4295 | | int |
4296 | | hoedown_document_list_depth(hoedown_document* document) |
4297 | 0 | { |
4298 | 0 | return document->list_depth; |
4299 | 0 | } |
4300 | | |
4301 | | int |
4302 | | hoedown_document_blockquote_depth(hoedown_document* document) |
4303 | 0 | { |
4304 | 0 | return document->blockquote_depth; |
4305 | 0 | } |
4306 | | |
4307 | | uint8_t |
4308 | | hoedown_document_ul_item_char(hoedown_document* document) |
4309 | 0 | { |
4310 | 0 | return document->ul_item_char; |
4311 | 0 | } |
4312 | | |
4313 | | uint8_t |
4314 | | hoedown_document_hrule_char(hoedown_document* document) |
4315 | 0 | { |
4316 | 0 | return document->hrule_char; |
4317 | 0 | } |
4318 | | |
4319 | | uint8_t |
4320 | | hoedown_document_fencedcode_char(hoedown_document* document) |
4321 | 0 | { |
4322 | 0 | return document->fencedcode_char; |
4323 | 0 | } |
4324 | | |
4325 | | const hoedown_buffer* |
4326 | | hoedown_document_ol_numeral(hoedown_document* document) |
4327 | 0 | { |
4328 | 0 | return document->ol_numeral; |
4329 | 0 | } |