/src/hoextdown/src/document.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "document.h" |
2 | | |
3 | | #include <assert.h> |
4 | | #include <string.h> |
5 | | #include <ctype.h> |
6 | | #include <stdio.h> |
7 | | |
8 | | #include "stack.h" |
9 | | |
10 | | #ifndef _MSC_VER |
11 | | #include <strings.h> |
12 | | #else |
13 | | #define strncasecmp _strnicmp |
14 | | #endif |
15 | | |
16 | 922k | #define REF_TABLE_SIZE 8 |
17 | | |
18 | 28.3M | #define BUFFER_BLOCK 0 |
19 | 20.4M | #define BUFFER_SPAN 1 |
20 | 19.6M | #define BUFFER_ATTRIBUTE 2 |
21 | | |
22 | | const char *hoedown_find_block_tag(const char *str, unsigned int len); |
23 | | const char *hoedown_find_html5_block_tag(const char *str, unsigned int len); |
24 | | |
25 | | /*************** |
26 | | * LOCAL TYPES * |
27 | | ***************/ |
28 | | |
29 | | /* link_ref: reference to a link */ |
30 | | struct link_ref { |
31 | | unsigned int id; |
32 | | |
33 | | hoedown_buffer *link; |
34 | | hoedown_buffer *title; |
35 | | hoedown_buffer *attr; |
36 | | |
37 | | struct link_ref *next; |
38 | | }; |
39 | | |
40 | | /* footnote_ref: reference to a footnote */ |
41 | | struct footnote_ref { |
42 | | unsigned int id; |
43 | | |
44 | | int is_used; |
45 | | unsigned int num; |
46 | | |
47 | | hoedown_buffer *contents; |
48 | | |
49 | | /* the original string id of the footnote, before conversion to an int */ |
50 | | hoedown_buffer *name; |
51 | | }; |
52 | | |
53 | | /* footnote_item: an item in a footnote_list */ |
54 | | struct footnote_item { |
55 | | struct footnote_ref *ref; |
56 | | struct footnote_item *next; |
57 | | }; |
58 | | |
59 | | /* footnote_list: linked list of footnote_item */ |
60 | | struct footnote_list { |
61 | | unsigned int count; |
62 | | struct footnote_item *head; |
63 | | struct footnote_item *tail; |
64 | | }; |
65 | | |
66 | | /* char_trigger: function pointer to render active chars */ |
67 | | /* returns the number of chars taken care of */ |
68 | | /* data is the pointer of the beginning of the span */ |
69 | | /* offset is the number of valid chars before data */ |
70 | | typedef size_t |
71 | | (*char_trigger)(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
72 | | |
73 | | static size_t char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
74 | | static size_t char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
75 | | static size_t char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
76 | | static size_t char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
77 | | static size_t char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
78 | | static size_t char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
79 | | static size_t char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
80 | | static size_t char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
81 | | static size_t char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
82 | | static size_t char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
83 | | static size_t char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
84 | | static size_t char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
85 | | static size_t char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
86 | | static size_t char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size); |
87 | | |
88 | | enum markdown_char_t { |
89 | | MD_CHAR_NONE = 0, |
90 | | MD_CHAR_EMPHASIS, |
91 | | MD_CHAR_CODESPAN, |
92 | | MD_CHAR_LINEBREAK, |
93 | | MD_CHAR_LINK, |
94 | | MD_CHAR_IMAGE, |
95 | | MD_CHAR_LANGLE, |
96 | | MD_CHAR_ESCAPE, |
97 | | MD_CHAR_ENTITY, |
98 | | MD_CHAR_AUTOLINK_URL, |
99 | | MD_CHAR_AUTOLINK_EMAIL, |
100 | | MD_CHAR_AUTOLINK_WWW, |
101 | | MD_CHAR_SUPERSCRIPT, |
102 | | MD_CHAR_QUOTE, |
103 | | MD_CHAR_MATH |
104 | | }; |
105 | | |
106 | | static char_trigger markdown_char_ptrs[] = { |
107 | | NULL, |
108 | | &char_emphasis, |
109 | | &char_codespan, |
110 | | &char_linebreak, |
111 | | &char_link, |
112 | | &char_image, |
113 | | &char_langle_tag, |
114 | | &char_escape, |
115 | | &char_entity, |
116 | | &char_autolink_url, |
117 | | &char_autolink_email, |
118 | | &char_autolink_www, |
119 | | &char_superscript, |
120 | | &char_quote, |
121 | | &char_math |
122 | | }; |
123 | | |
124 | | struct hoedown_document { |
125 | | hoedown_renderer md; |
126 | | hoedown_renderer_data data; |
127 | | |
128 | | uint8_t attr_activation; |
129 | | |
130 | | struct link_ref *refs[REF_TABLE_SIZE]; |
131 | | struct footnote_list footnotes_found; |
132 | | struct footnote_list footnotes_used; |
133 | | uint8_t active_char[256]; |
134 | | hoedown_stack work_bufs[3]; |
135 | | hoedown_extensions ext_flags; |
136 | | size_t max_nesting; |
137 | | int in_link_body; |
138 | | |
139 | | /* extra information provided to callbacks */ |
140 | | const hoedown_buffer *link_id; |
141 | | const hoedown_buffer *link_inline_attr; |
142 | | const hoedown_buffer *link_ref_attr; |
143 | | int is_escape_char; |
144 | | hoedown_header_type header_type; |
145 | | hoedown_link_type link_type; |
146 | | const hoedown_buffer *footnote_id; |
147 | | int list_depth; |
148 | | int blockquote_depth; |
149 | | uint8_t ul_item_char; |
150 | | uint8_t hrule_char; |
151 | | uint8_t fencedcode_char; |
152 | | const hoedown_buffer *ol_numeral; |
153 | | |
154 | | hoedown_user_block user_block; |
155 | | hoedown_buffer *meta; |
156 | | }; |
157 | | |
158 | | /*************************** |
159 | | * HELPER FUNCTIONS * |
160 | | ***************************/ |
161 | | |
162 | | static hoedown_buffer * |
163 | | newbuf(hoedown_document *doc, int type) |
164 | 32.5M | { |
165 | 32.5M | static const size_t buf_size[3] = {256, 64, 64}; |
166 | 32.5M | hoedown_buffer *work = NULL; |
167 | 32.5M | hoedown_stack *pool = &doc->work_bufs[type]; |
168 | | |
169 | 32.5M | if (pool->size < pool->asize && |
170 | 32.5M | pool->item[pool->size] != NULL) { |
171 | 32.5M | work = pool->item[pool->size++]; |
172 | 32.5M | work->size = 0; |
173 | 32.5M | } else { |
174 | 75.6k | work = hoedown_buffer_new(buf_size[type]); |
175 | 75.6k | hoedown_stack_push(pool, work); |
176 | 75.6k | } |
177 | | |
178 | 32.5M | return work; |
179 | 32.5M | } |
180 | | |
181 | | static void |
182 | | popbuf(hoedown_document *doc, int type) |
183 | 31.3M | { |
184 | 31.3M | doc->work_bufs[type].size--; |
185 | 31.3M | } |
186 | | |
187 | | static void |
188 | | unscape_text(hoedown_buffer *ob, hoedown_buffer *src) |
189 | 305k | { |
190 | 305k | size_t i = 0, org; |
191 | 311k | while (i < src->size) { |
192 | 310k | org = i; |
193 | 18.6M | while (i < src->size && src->data[i] != '\\') |
194 | 18.3M | i++; |
195 | | |
196 | 310k | if (i > org) |
197 | 226k | hoedown_buffer_put(ob, src->data + org, i - org); |
198 | | |
199 | 310k | if (i + 1 >= src->size) |
200 | 305k | break; |
201 | | |
202 | 5.94k | hoedown_buffer_putc(ob, src->data[i + 1]); |
203 | 5.94k | i += 2; |
204 | 5.94k | } |
205 | 305k | } |
206 | | |
207 | | static unsigned int |
208 | | hash_link_ref(const uint8_t *link_ref, size_t length) |
209 | 1.19M | { |
210 | 1.19M | size_t i; |
211 | 1.19M | unsigned int hash = 0; |
212 | | |
213 | 96.5M | for (i = 0; i < length; ++i) |
214 | 95.3M | hash = tolower(link_ref[i]) + (hash << 6) + (hash << 16) - hash; |
215 | | |
216 | 1.19M | return hash; |
217 | 1.19M | } |
218 | | |
219 | | static struct link_ref * |
220 | | add_link_ref( |
221 | | struct link_ref **references, |
222 | | const uint8_t *name, size_t name_size) |
223 | 132k | { |
224 | 132k | struct link_ref *ref = hoedown_calloc(1, sizeof(struct link_ref)); |
225 | | |
226 | 132k | ref->id = hash_link_ref(name, name_size); |
227 | 132k | ref->next = references[ref->id % REF_TABLE_SIZE]; |
228 | | |
229 | 132k | references[ref->id % REF_TABLE_SIZE] = ref; |
230 | 132k | return ref; |
231 | 132k | } |
232 | | |
233 | | static struct link_ref * |
234 | | find_link_ref(struct link_ref **references, uint8_t *name, size_t length) |
235 | 555k | { |
236 | 555k | unsigned int hash = hash_link_ref(name, length); |
237 | 555k | struct link_ref *ref = NULL; |
238 | | |
239 | 555k | ref = references[hash % REF_TABLE_SIZE]; |
240 | | |
241 | 30.8M | while (ref != NULL) { |
242 | 30.6M | if (ref->id == hash) |
243 | 294k | return ref; |
244 | | |
245 | 30.3M | ref = ref->next; |
246 | 30.3M | } |
247 | | |
248 | 260k | return NULL; |
249 | 555k | } |
250 | | |
251 | | static void |
252 | | free_link_refs(struct link_ref **references) |
253 | 10.1k | { |
254 | 10.1k | size_t i; |
255 | | |
256 | 91.7k | for (i = 0; i < REF_TABLE_SIZE; ++i) { |
257 | 81.5k | struct link_ref *r = references[i]; |
258 | 81.5k | struct link_ref *next; |
259 | | |
260 | 214k | while (r) { |
261 | 132k | next = r->next; |
262 | 132k | hoedown_buffer_free(r->link); |
263 | 132k | hoedown_buffer_free(r->title); |
264 | 132k | hoedown_buffer_free(r->attr); |
265 | 132k | free(r); |
266 | 132k | r = next; |
267 | 132k | } |
268 | 81.5k | } |
269 | 10.1k | } |
270 | | |
271 | | static struct footnote_ref * |
272 | | create_footnote_ref(struct footnote_list *list, const uint8_t *name, size_t name_size) |
273 | 497k | { |
274 | 497k | struct footnote_ref *ref = hoedown_calloc(1, sizeof(struct footnote_ref)); |
275 | | |
276 | 497k | ref->id = hash_link_ref(name, name_size); |
277 | | |
278 | 497k | return ref; |
279 | 497k | } |
280 | | |
281 | | static int |
282 | | add_footnote_ref(struct footnote_list *list, struct footnote_ref *ref) |
283 | 497k | { |
284 | 497k | struct footnote_item *item = hoedown_calloc(1, sizeof(struct footnote_item)); |
285 | 497k | if (!item) |
286 | 0 | return 0; |
287 | 497k | item->ref = ref; |
288 | | |
289 | 497k | if (list->head == NULL) { |
290 | 1.61k | list->head = list->tail = item; |
291 | 496k | } else { |
292 | 496k | list->tail->next = item; |
293 | 496k | list->tail = item; |
294 | 496k | } |
295 | 497k | list->count++; |
296 | | |
297 | 497k | return 1; |
298 | 497k | } |
299 | | |
300 | | static struct footnote_ref * |
301 | | find_footnote_ref(struct footnote_list *list, uint8_t *name, size_t length) |
302 | 14.6k | { |
303 | 14.6k | unsigned int hash = hash_link_ref(name, length); |
304 | 14.6k | struct footnote_item *item = NULL; |
305 | | |
306 | 14.6k | item = list->head; |
307 | | |
308 | 26.5M | while (item != NULL) { |
309 | 26.5M | if (item->ref->id == hash) |
310 | 6.45k | return item->ref; |
311 | 26.5M | item = item->next; |
312 | 26.5M | } |
313 | | |
314 | 8.16k | return NULL; |
315 | 14.6k | } |
316 | | |
317 | | static void |
318 | | free_footnote_ref(struct footnote_ref *ref) |
319 | 497k | { |
320 | 497k | hoedown_buffer_free(ref->contents); |
321 | 497k | hoedown_buffer_free(ref->name); |
322 | 497k | free(ref); |
323 | 497k | } |
324 | | |
325 | | static void |
326 | | free_footnote_list(struct footnote_list *list, int free_refs) |
327 | 20.3k | { |
328 | 20.3k | struct footnote_item *item = list->head; |
329 | 20.3k | struct footnote_item *next; |
330 | | |
331 | 518k | while (item) { |
332 | 497k | next = item->next; |
333 | 497k | if (free_refs) |
334 | 497k | free_footnote_ref(item->ref); |
335 | 497k | free(item); |
336 | 497k | item = next; |
337 | 497k | } |
338 | 20.3k | } |
339 | | |
340 | | |
341 | | /* |
342 | | * Check whether a char is a Markdown spacing char. |
343 | | |
344 | | * Right now we only consider spaces the actual |
345 | | * space and a newline: tabs and carriage returns |
346 | | * are filtered out during the preprocessing phase. |
347 | | * |
348 | | * If we wanted to actually be UTF-8 compliant, we |
349 | | * should instead extract an Unicode codepoint from |
350 | | * this character and check for space properties. |
351 | | */ |
352 | | static int |
353 | | _isspace(int c) |
354 | 61.8M | { |
355 | 61.8M | return c == ' ' || c == '\n'; |
356 | 61.8M | } |
357 | | |
358 | | /* is_empty_all: verify that all the data is spacing */ |
359 | | static int |
360 | | is_empty_all(const uint8_t *data, size_t size) |
361 | 41.7k | { |
362 | 41.7k | size_t i = 0; |
363 | 171k | while (i < size && _isspace(data[i])) i++; |
364 | 41.7k | return i == size; |
365 | 41.7k | } |
366 | | |
367 | | /* |
368 | | * Replace all spacing characters in data with spaces. As a special |
369 | | * case, this collapses a newline with the previous space, if possible. |
370 | | */ |
371 | | static void |
372 | | replace_spacing(hoedown_buffer *ob, const uint8_t *data, size_t size) |
373 | 525k | { |
374 | 525k | size_t i = 0, mark; |
375 | 525k | hoedown_buffer_grow(ob, size); |
376 | 1.21M | while (1) { |
377 | 1.21M | mark = i; |
378 | 85.3M | while (i < size && data[i] != '\n') i++; |
379 | 1.21M | hoedown_buffer_put(ob, data + mark, i - mark); |
380 | | |
381 | 1.21M | if (i >= size) break; |
382 | | |
383 | 690k | if (!(i > 0 && data[i-1] == ' ')) |
384 | 217k | hoedown_buffer_putc(ob, ' '); |
385 | 690k | i++; |
386 | 690k | } |
387 | 525k | } |
388 | | |
389 | | /**************************** |
390 | | * INLINE PARSING FUNCTIONS * |
391 | | ****************************/ |
392 | | |
393 | | /* is_mail_autolink • looks for the address part of a mail autolink and '>' */ |
394 | | /* this is less strict than the original markdown e-mail address matching */ |
395 | | static size_t |
396 | | is_mail_autolink(uint8_t *data, size_t size) |
397 | 8.63k | { |
398 | 8.63k | size_t i = 0, nb = 0; |
399 | | |
400 | | /* address is assumed to be: [-@._a-zA-Z0-9]+ with exactly one '@' */ |
401 | 120k | for (i = 0; i < size; ++i) { |
402 | 118k | if (isalnum(data[i])) |
403 | 89.8k | continue; |
404 | | |
405 | 28.6k | switch (data[i]) { |
406 | 19.2k | case '@': |
407 | 19.2k | nb++; |
408 | | |
409 | 20.0k | case '-': |
410 | 21.3k | case '.': |
411 | 21.7k | case '_': |
412 | 21.7k | break; |
413 | | |
414 | 3.81k | case '>': |
415 | 3.81k | return (nb == 1) ? i + 1 : 0; |
416 | | |
417 | 3.06k | default: |
418 | 3.06k | return 0; |
419 | 28.6k | } |
420 | 28.6k | } |
421 | | |
422 | 1.75k | return 0; |
423 | 8.63k | } |
424 | | |
425 | | static size_t |
426 | | script_tag_length(uint8_t *data, size_t size) |
427 | 285k | { |
428 | 285k | size_t i = 2; |
429 | 285k | char comment = 0; |
430 | | |
431 | 285k | if (size < 3 || data[0] != '<' || data[1] != '?') { |
432 | 269k | return 0; |
433 | 269k | } |
434 | | |
435 | 15.9k | i = 2; |
436 | | |
437 | 208M | while (i < size) { |
438 | 208M | if (data[i - 1] == '?' && data[i] == '>' && comment == 0) { |
439 | 7.75k | break; |
440 | 7.75k | } |
441 | | |
442 | 208M | if (data[i] == '\'' || data[i] == '"') { |
443 | 5.69M | if (comment != 0) { |
444 | 3.63M | if (data[i] == comment && data[i - 1] != '\\') { |
445 | 2.05M | comment = 0; |
446 | 2.05M | } |
447 | 3.63M | } else { |
448 | 2.05M | comment = data[i]; |
449 | 2.05M | } |
450 | 5.69M | } |
451 | | |
452 | 208M | ++i; |
453 | 208M | } |
454 | | |
455 | 15.9k | if (i >= size) return i; |
456 | | |
457 | 7.75k | return i + 1; |
458 | 15.9k | } |
459 | | |
460 | | /* tag_length • returns the length of the given tag, or 0 is it's not valid */ |
461 | | static size_t |
462 | | tag_length(uint8_t *data, size_t size, hoedown_autolink_type *autolink, int script_tag) |
463 | 234k | { |
464 | 234k | size_t i, j; |
465 | | |
466 | | /* a valid tag can't be shorter than 3 chars */ |
467 | 234k | if (size < 3) return 0; |
468 | | |
469 | 218k | if (data[0] != '<') return 0; |
470 | | |
471 | | /* HTML comment, laxist form */ |
472 | 218k | if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { |
473 | 39.4k | i = 5; |
474 | | |
475 | 9.25M | while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) |
476 | 9.21M | i++; |
477 | | |
478 | 39.4k | i++; |
479 | | |
480 | 39.4k | if (i <= size) |
481 | 17.7k | return i; |
482 | 39.4k | } |
483 | | |
484 | | /* begins with a '<' optionally followed by '/', followed by letter or number */ |
485 | 201k | i = (data[1] == '/') ? 2 : 1; |
486 | | |
487 | 201k | if (!isalnum(data[i])) { |
488 | 96.9k | if (script_tag) { |
489 | 96.9k | return script_tag_length(data, size); |
490 | 96.9k | } |
491 | 0 | return 0; |
492 | 96.9k | } |
493 | | |
494 | | /* scheme test */ |
495 | 104k | *autolink = HOEDOWN_AUTOLINK_NONE; |
496 | | |
497 | | /* try to find the beginning of an URI */ |
498 | 691k | while (i < size && (isalnum(data[i]) || data[i] == '.' || data[i] == '+' || data[i] == '-')) |
499 | 587k | i++; |
500 | | |
501 | 104k | if (i > 1 && i < size && data[i] == '@') { |
502 | 8.63k | if ((j = is_mail_autolink(data + i, size - i)) != 0) { |
503 | 1.08k | *autolink = HOEDOWN_AUTOLINK_EMAIL; |
504 | 1.08k | return i + j; |
505 | 1.08k | } |
506 | 8.63k | } |
507 | | |
508 | 103k | if (i > 2 && i < size && data[i] == ':') { |
509 | 27.2k | *autolink = HOEDOWN_AUTOLINK_NORMAL; |
510 | 27.2k | i++; |
511 | 27.2k | } |
512 | | |
513 | | /* completing autolink test: no spacing or ' or " */ |
514 | 103k | if (i >= size) |
515 | 5.57k | *autolink = HOEDOWN_AUTOLINK_NONE; |
516 | | |
517 | 97.4k | else if (*autolink) { |
518 | 27.2k | j = i; |
519 | | |
520 | 45.8M | while (i < size) { |
521 | 45.8M | if (data[i] == '\\') i += 2; |
522 | 45.8M | else if (data[i] == '>' || data[i] == '\'' || |
523 | 45.8M | data[i] == '"' || data[i] == ' ' || data[i] == '\n') |
524 | 18.5k | break; |
525 | 45.8M | else i++; |
526 | 45.8M | } |
527 | | |
528 | 27.2k | if (i >= size) return 0; |
529 | 18.5k | if (i > j && data[i] == '>') return i + 1; |
530 | | /* one of the forbidden chars has been found */ |
531 | 11.2k | *autolink = HOEDOWN_AUTOLINK_NONE; |
532 | 11.2k | } |
533 | | |
534 | | /* looking for something looking like a tag end */ |
535 | 159M | while (i < size && data[i] != '>') i++; |
536 | 87.0k | if (i >= size) return 0; |
537 | 25.7k | return i + 1; |
538 | 87.0k | } |
539 | | |
540 | | /* parse_inline • parses inline markdown elements */ |
541 | | static void |
542 | | parse_inline(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
543 | 1.05M | { |
544 | 1.05M | size_t i = 0, end = 0, consumed = 0; |
545 | 1.05M | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
546 | 1.05M | uint8_t *active_char = doc->active_char; |
547 | | |
548 | 1.05M | if (doc->work_bufs[BUFFER_SPAN].size + |
549 | 1.05M | doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting) |
550 | 3.35k | return; |
551 | | |
552 | 7.44M | while (i < size) { |
553 | 7.22M | size_t user_block = 0; |
554 | 134M | while (end < size) { |
555 | 133M | if (doc->user_block) { |
556 | 0 | user_block = doc->user_block(data+end, size - end, &doc->data); |
557 | 0 | if (user_block) { |
558 | 0 | break; |
559 | 0 | } |
560 | 0 | } |
561 | | /* copying inactive chars into the output */ |
562 | 133M | if (active_char[data[end]] != 0) { |
563 | 6.39M | break; |
564 | 6.39M | } |
565 | 127M | end++; |
566 | 127M | } |
567 | | |
568 | 7.22M | if (doc->md.normal_text) { |
569 | 7.22M | work.data = data + i; |
570 | 7.22M | work.size = end - i; |
571 | 7.22M | doc->md.normal_text(ob, &work, &doc->data); |
572 | 7.22M | } |
573 | 0 | else |
574 | 0 | hoedown_buffer_put(ob, data + i, end - i); |
575 | | |
576 | 7.22M | if (end >= size) { |
577 | 830k | break; |
578 | 830k | } |
579 | 6.39M | i = end; |
580 | | |
581 | 6.39M | if (user_block) { |
582 | 0 | work.data = data + i; |
583 | 0 | work.size = user_block; |
584 | 0 | end = user_block; |
585 | 0 | if (doc->md.user_block) { |
586 | 0 | doc->md.user_block(ob, &work, &doc->data); |
587 | 0 | } else { |
588 | 0 | hoedown_buffer_put(ob, data + i, size - i); |
589 | 0 | } |
590 | 0 | if (!end) { |
591 | 0 | end = i + 1; |
592 | 0 | } else { |
593 | 0 | i += end; |
594 | 0 | end = i; |
595 | 0 | consumed = i; |
596 | 0 | } |
597 | 6.39M | } else { |
598 | 6.39M | end = markdown_char_ptrs[ (int)active_char[data[end]] ](ob, doc, data + i, i - consumed, size - i); |
599 | 6.39M | if (!end) /* no action from the callback */ |
600 | 5.42M | end = i + 1; |
601 | 966k | else { |
602 | 966k | i += end; |
603 | 966k | end = i; |
604 | 966k | consumed = i; |
605 | 966k | } |
606 | 6.39M | } |
607 | 6.39M | } |
608 | 1.05M | } |
609 | | |
610 | | /* parse_inline_attributes • parses inline attributes, returning the end position of the |
611 | | * attributes. attributes must be in the start. differs from parse_attributes in |
612 | | * that parses_attributes assumes attributes are at the end of data.*/ |
613 | | static size_t parse_inline_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, uint8_t attr_activation) |
614 | 329k | { |
615 | 329k | size_t attr_start, i = 0; |
616 | | |
617 | 329k | if (size < 1) |
618 | 70.3k | return 0; |
619 | | |
620 | 259k | if (data[i] == '{' && (!attr_activation || (i + 1 < size && data[i + 1] == attr_activation))) { |
621 | 11.2k | attr_start = i + 1; |
622 | | /* skip an extra character to skip over the activation character if any */ |
623 | 11.2k | if (attr_activation) attr_start++; |
624 | 247k | } else { |
625 | 247k | return 0; |
626 | 247k | } |
627 | | |
628 | 12.3M | while (i < size) { |
629 | | /* ignore escaped characters */ |
630 | 12.3M | if (data[i] == '\\') { |
631 | 2.33k | i += 2; |
632 | 12.3M | } else if (data[i] == '}') { |
633 | 9.24k | if (attr != NULL) { |
634 | 5.08k | hoedown_buffer_put(attr, data + attr_start, i - attr_start); |
635 | 5.08k | } |
636 | 9.24k | return i + 1; |
637 | 12.3M | } else { |
638 | 12.3M | i++; |
639 | 12.3M | } |
640 | 12.3M | } |
641 | 2.04k | return 0; |
642 | 11.2k | } |
643 | | |
644 | | |
645 | | /* parse_attributes • parses special attributes at the end of the data */ |
646 | | static size_t parse_attributes(uint8_t *data, size_t size, struct hoedown_buffer *attr, struct hoedown_buffer *block_attr, const char *block_id, int is_header, uint8_t attr_activation) |
647 | 891k | { |
648 | 891k | size_t i, len, begin = 0, end = 0; |
649 | | |
650 | 891k | if (size < 1) |
651 | 36.2k | return 0; |
652 | | |
653 | 854k | i = size; |
654 | 1.09M | while (i && data[i-1] == '\n') { |
655 | 242k | i--; |
656 | 242k | } |
657 | 854k | len = i; |
658 | | |
659 | 854k | if (i && data[i-1] == '}') { |
660 | 31.1M | do { |
661 | 31.1M | i--; |
662 | 31.1M | } while (i && data[i] != '{'); |
663 | | |
664 | 88.3k | begin = i + 1; |
665 | 88.3k | end = len - 1; |
666 | 423k | while (i && data[i-1] == ' ') { |
667 | 334k | i--; |
668 | 334k | } |
669 | 88.3k | } |
670 | | |
671 | 854k | if (is_header && i && data[i-1] == '#') { |
672 | 16.1k | while (i && data[i-1] == '#') { |
673 | 8.41k | i--; |
674 | 8.41k | } |
675 | 15.8k | while (i && data[i-1] == ' ') { |
676 | 8.11k | i--; |
677 | 8.11k | } |
678 | 7.76k | } |
679 | | |
680 | 854k | if (begin && end && data[begin-1] == '{' && data[end] == '}') { |
681 | 77.2k | if (begin >=2 && data[begin-2] == '\\' && data[end-1] == '\\') { |
682 | 230 | return len; |
683 | 230 | } |
684 | | |
685 | 77.0k | if (block_attr && data[begin] == '@') { |
686 | | /* skip the @ by incrementing past it */ |
687 | 12.8k | begin++; |
688 | 12.8k | if (*block_id) { |
689 | | /* if a block_id was fed in, check to make sure the string until the |
690 | | * space is identical */ |
691 | 51.1k | while (begin < end && *block_id) { |
692 | 41.4k | if (data[begin] != (uint8_t)(*block_id)) { |
693 | 3.15k | return len; |
694 | 3.15k | } |
695 | 38.3k | begin++; |
696 | 38.3k | block_id++; |
697 | 38.3k | } |
698 | | /* it might have matched only the first portion of block_id; make sure |
699 | | * there's no more to it here */ |
700 | 9.68k | if (*block_id) { |
701 | 919 | return len; |
702 | 919 | } |
703 | 9.68k | } |
704 | 8.76k | if (begin < end && data[begin] != ' ') { |
705 | 924 | return len; |
706 | 924 | } |
707 | 7.84k | if (block_attr) { |
708 | 7.84k | if (block_attr->size) { |
709 | 5.69k | hoedown_buffer_reset(block_attr); |
710 | 5.69k | } |
711 | 7.84k | hoedown_buffer_put(block_attr, data + begin, end - begin); |
712 | 7.84k | } |
713 | 7.84k | len = i; |
714 | 7.84k | if (attr) { |
715 | 7.84k | len = parse_attributes(data, len, attr, NULL, "", is_header, attr_activation); |
716 | 7.84k | } |
717 | 64.1k | } else if (attr && (!attr_activation || attr_activation == data[begin])) { |
718 | 50.6k | if (attr->size) { |
719 | 0 | hoedown_buffer_reset(attr); |
720 | 0 | } |
721 | 50.6k | if (attr_activation) { |
722 | 0 | begin++; |
723 | 0 | } |
724 | 50.6k | hoedown_buffer_put(attr, data + begin, end - begin); |
725 | 50.6k | len = i; |
726 | 50.6k | } |
727 | 77.0k | } |
728 | | |
729 | 849k | return len; |
730 | 854k | } |
731 | | |
732 | | /* is_escaped • returns whether special char at data[loc] is escaped by '\\' */ |
733 | | static int |
734 | | is_escaped(uint8_t *data, size_t loc) |
735 | 5.53M | { |
736 | 5.53M | size_t i = loc; |
737 | 9.51M | while (i >= 1 && data[i - 1] == '\\') |
738 | 3.97M | i--; |
739 | | |
740 | | /* odd numbers of backslashes escapes data[loc] */ |
741 | 5.53M | return (loc - i) % 2; |
742 | 5.53M | } |
743 | | |
744 | | /* is_backslashed • returns whether special char at data[loc] is preceded by '\\', a stricter interpretation of escaping than is_escaped. */ |
745 | | static int |
746 | | is_backslashed(uint8_t *data, size_t loc) |
747 | 366M | { |
748 | 366M | return loc >= 1 && data[loc - 1] == '\\'; |
749 | 366M | } |
750 | | |
751 | | /* find_emph_char • looks for the next emph uint8_t, skipping other constructs */ |
752 | | static size_t |
753 | | find_emph_char(uint8_t *data, size_t size, uint8_t c) |
754 | 1.55M | { |
755 | 1.55M | size_t i = 0; |
756 | | |
757 | 4.72M | while (i < size) { |
758 | 259M | while (i < size && data[i] != c && data[i] != '[' && data[i] != '`') |
759 | 254M | i++; |
760 | | |
761 | 4.69M | if (i == size) |
762 | 247k | return 0; |
763 | | |
764 | | /* not counting escaped chars */ |
765 | 4.44M | if (is_escaped(data, i)) { |
766 | 12.6k | i++; continue; |
767 | 12.6k | } |
768 | | |
769 | 4.43M | if (data[i] == c) |
770 | 1.05M | return i; |
771 | | |
772 | | /* skipping a codespan */ |
773 | 3.38M | if (data[i] == '`') { |
774 | 31.0k | size_t span_nb = 0, bt; |
775 | 31.0k | size_t tmp_i = 0; |
776 | | |
777 | | /* counting the number of opening backticks */ |
778 | 92.3k | while (i < size && data[i] == '`') { |
779 | 61.2k | i++; span_nb++; |
780 | 61.2k | } |
781 | | |
782 | 31.0k | if (i >= size) return 0; |
783 | | |
784 | | /* finding the matching closing sequence */ |
785 | 30.1k | bt = 0; |
786 | 28.5M | while (i < size && bt < span_nb) { |
787 | 28.5M | if (!tmp_i && data[i] == c) tmp_i = i; |
788 | 28.5M | if (data[i] == '`') bt++; |
789 | 28.4M | else bt = 0; |
790 | 28.5M | i++; |
791 | 28.5M | } |
792 | | |
793 | | /* not a well-formed codespan; use found matching emph char */ |
794 | 30.1k | if (bt < span_nb && i >= size) return tmp_i; |
795 | 30.1k | } |
796 | | /* skipping a link */ |
797 | 3.35M | else if (data[i] == '[') { |
798 | 3.35M | size_t tmp_i = 0; |
799 | 3.35M | uint8_t cc; |
800 | | |
801 | 3.35M | i++; |
802 | 521M | while (i < size && data[i] != ']') { |
803 | 517M | if (!tmp_i && data[i] == c) tmp_i = i; |
804 | 517M | i++; |
805 | 517M | } |
806 | | |
807 | 3.35M | i++; |
808 | 7.15M | while (i < size && _isspace(data[i])) |
809 | 3.79M | i++; |
810 | | |
811 | 3.35M | if (i >= size) |
812 | 179k | return tmp_i; |
813 | | |
814 | 3.17M | switch (data[i]) { |
815 | 584k | case '[': |
816 | 584k | cc = ']'; break; |
817 | | |
818 | 22.5k | case '(': |
819 | 22.5k | cc = ')'; break; |
820 | | |
821 | 2.56M | default: |
822 | 2.56M | if (tmp_i) |
823 | 21.3k | return tmp_i; |
824 | 2.54M | else |
825 | 2.54M | continue; |
826 | 3.17M | } |
827 | | |
828 | 606k | i++; |
829 | 108M | while (i < size && data[i] != cc) { |
830 | 107M | if (!tmp_i && data[i] == c) tmp_i = i; |
831 | 107M | i++; |
832 | 107M | } |
833 | | |
834 | 606k | if (i >= size) |
835 | 12.5k | return tmp_i; |
836 | | |
837 | 594k | i++; |
838 | 594k | } |
839 | 3.38M | } |
840 | | |
841 | 27.8k | return 0; |
842 | 1.55M | } |
843 | | |
844 | | /* find_separator_char • looks for the next unbackslashed separator character c */ |
845 | | static size_t |
846 | | find_separator_char(uint8_t *data, size_t size, uint8_t c) |
847 | 57.5k | { |
848 | 57.5k | size_t i = 0; |
849 | | |
850 | 60.5k | while (i < size) { |
851 | 13.7M | while (i < size && data[i] != c) |
852 | 13.7M | i++; |
853 | | |
854 | 56.5k | if (i == size) |
855 | 23.3k | return 0; |
856 | | |
857 | | /* not counting backslashed separators */ |
858 | 33.1k | if (is_backslashed(data, i)) { |
859 | 3.01k | i++; continue; |
860 | 3.01k | } |
861 | | |
862 | 30.1k | if (data[i] == c) |
863 | 30.1k | return i; |
864 | 30.1k | } |
865 | | |
866 | 4.06k | return 0; |
867 | 57.5k | } |
868 | | |
869 | | /* parse_emph1 • parsing single emphase */ |
870 | | /* closed by a symbol not preceded by spacing and not followed by symbol */ |
871 | | static size_t |
872 | | parse_emph1(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
873 | 90.0k | { |
874 | 90.0k | size_t i = 0, len; |
875 | 90.0k | hoedown_buffer *work = 0; |
876 | 90.0k | int r; |
877 | | |
878 | | /* skipping one symbol if coming from emph3 */ |
879 | 90.0k | if (size > 1 && data[0] == c && data[1] == c) i = 1; |
880 | | |
881 | 108k | while (i < size) { |
882 | 108k | len = find_emph_char(data + i, size - i, c); |
883 | 108k | if (!len) return 0; |
884 | 51.7k | i += len; |
885 | 51.7k | if (i >= size) return 0; |
886 | | |
887 | 51.7k | if (data[i] == c && !_isspace(data[i - 1])) { |
888 | | |
889 | 34.1k | if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS || |
890 | 34.1k | (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_UNDERLINE_EMPHASIS && c == '_')) { |
891 | 34.1k | if (i + 1 < size && isalnum(data[i + 1])) |
892 | 368 | continue; |
893 | 34.1k | } |
894 | | |
895 | 33.8k | work = newbuf(doc, BUFFER_SPAN); |
896 | 33.8k | parse_inline(work, doc, data, i); |
897 | | |
898 | 33.8k | if (doc->ext_flags & HOEDOWN_EXT_UNDERLINE && c == '_') |
899 | 25.6k | r = doc->md.underline(ob, work, &doc->data); |
900 | 8.10k | else |
901 | 8.10k | r = doc->md.emphasis(ob, work, &doc->data); |
902 | | |
903 | 33.8k | popbuf(doc, BUFFER_SPAN); |
904 | 33.8k | return r ? i + 1 : 0; |
905 | 34.1k | } |
906 | 51.7k | } |
907 | | |
908 | 0 | return 0; |
909 | 90.0k | } |
910 | | |
911 | | /* parse_emph2 • parsing single emphase */ |
912 | | static size_t |
913 | | parse_emph2(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
914 | 40.6k | { |
915 | 40.6k | size_t i = 0, len; |
916 | 40.6k | hoedown_buffer *work = 0; |
917 | 40.6k | int r; |
918 | | |
919 | 55.9k | while (i < size) { |
920 | 55.6k | len = find_emph_char(data + i, size - i, c); |
921 | 55.6k | if (!len) return 0; |
922 | 36.1k | i += len; |
923 | | |
924 | 36.1k | if (i + 1 < size && data[i] == c && data[i + 1] == c && i && !_isspace(data[i - 1])) { |
925 | 20.8k | work = newbuf(doc, BUFFER_SPAN); |
926 | 20.8k | parse_inline(work, doc, data, i); |
927 | | |
928 | 20.8k | if (c == '~') |
929 | 3.09k | r = doc->md.strikethrough(ob, work, &doc->data); |
930 | 17.7k | else if (c == '=') |
931 | 553 | r = doc->md.highlight(ob, work, &doc->data); |
932 | 17.2k | else |
933 | 17.2k | r = doc->md.double_emphasis(ob, work, &doc->data); |
934 | | |
935 | 20.8k | popbuf(doc, BUFFER_SPAN); |
936 | 20.8k | return r ? i + 2 : 0; |
937 | 20.8k | } |
938 | 15.3k | i++; |
939 | 15.3k | } |
940 | 342 | return 0; |
941 | 40.6k | } |
942 | | |
943 | | /* parse_emph3 • parsing single emphase */ |
944 | | /* finds the first closing tag, and delegates to the other emph */ |
945 | | static size_t |
946 | | parse_emph3(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, uint8_t c) |
947 | 43.2k | { |
948 | 43.2k | size_t i = 0, len; |
949 | 43.2k | int r; |
950 | | |
951 | 68.6k | while (i < size) { |
952 | 68.6k | len = find_emph_char(data + i, size - i, c); |
953 | 68.6k | if (!len) return 0; |
954 | 38.1k | i += len; |
955 | | |
956 | | /* skip spacing preceded symbols */ |
957 | 38.1k | if (data[i] != c || _isspace(data[i - 1])) |
958 | 25.3k | continue; |
959 | | |
960 | 12.7k | if (i + 2 < size && data[i + 1] == c && data[i + 2] == c && doc->md.triple_emphasis) { |
961 | | /* triple symbol found */ |
962 | 2.32k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
963 | | |
964 | 2.32k | parse_inline(work, doc, data, i); |
965 | 2.32k | r = doc->md.triple_emphasis(ob, work, &doc->data); |
966 | 2.32k | popbuf(doc, BUFFER_SPAN); |
967 | 2.32k | return r ? i + 3 : 0; |
968 | | |
969 | 10.4k | } else if (i + 1 < size && data[i + 1] == c) { |
970 | | /* double symbol found, handing over to emph1 */ |
971 | 5.19k | len = parse_emph1(ob, doc, data - 2, size + 2, c); |
972 | 5.19k | if (!len) return 0; |
973 | 0 | else return len - 2; |
974 | | |
975 | 5.25k | } else { |
976 | | /* single symbol found, handing over to emph2 */ |
977 | 5.25k | len = parse_emph2(ob, doc, data - 1, size + 1, c); |
978 | 5.25k | if (!len) return 0; |
979 | 0 | else return len - 1; |
980 | 5.25k | } |
981 | 12.7k | } |
982 | 0 | return 0; |
983 | 43.2k | } |
984 | | |
985 | | /* parse_math • parses a math span until the given ending delimiter */ |
986 | | static size_t |
987 | | parse_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size, const char *end, size_t delimsz, int displaymode) |
988 | 32.6k | { |
989 | 32.6k | hoedown_buffer text = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
990 | 32.6k | size_t i = delimsz; |
991 | | |
992 | 32.6k | if (!doc->md.math) |
993 | 0 | return 0; |
994 | | |
995 | | /* find ending delimiter */ |
996 | 866k | while (1) { |
997 | 73.6M | while (i < size && data[i] != (uint8_t)end[0]) |
998 | 72.8M | i++; |
999 | | |
1000 | 866k | if (i >= size) |
1001 | 19.4k | return 0; |
1002 | | |
1003 | 846k | if (!is_escaped(data, i) && !(i + delimsz > size) |
1004 | 846k | && memcmp(data + i, end, delimsz) == 0) |
1005 | 13.2k | break; |
1006 | | |
1007 | 833k | i++; |
1008 | 833k | } |
1009 | | |
1010 | | /* prepare buffers */ |
1011 | 13.2k | text.data = data + delimsz; |
1012 | 13.2k | text.size = i - delimsz; |
1013 | | |
1014 | | /* if this is a $$ and MATH_EXPLICIT is not active, |
1015 | | * guess whether displaymode should be enabled from the context */ |
1016 | 13.2k | i += delimsz; |
1017 | 13.2k | if (delimsz == 2 && !(doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT)) |
1018 | 0 | displaymode = is_empty_all(data - offset, offset) && is_empty_all(data + i, size - i); |
1019 | | |
1020 | | /* call callback */ |
1021 | 13.2k | if (doc->md.math(ob, &text, displaymode, &doc->data)) |
1022 | 13.2k | return i; |
1023 | | |
1024 | 0 | return 0; |
1025 | 13.2k | } |
1026 | | |
1027 | | /* char_emphasis • single and double emphasis parsing */ |
1028 | | static size_t |
1029 | | char_emphasis(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1030 | 1.27M | { |
1031 | 1.27M | uint8_t c = data[0]; |
1032 | 1.27M | size_t ret; |
1033 | | |
1034 | 1.27M | if (doc->ext_flags & HOEDOWN_EXT_NO_INTRA_EMPHASIS) { |
1035 | 1.27M | if (offset > 0 && !_isspace(data[-1]) && data[-1] != '>' && data[-1] != '(') |
1036 | 987k | return 0; |
1037 | 1.27M | } |
1038 | | |
1039 | 291k | if (size > 2 && data[1] != c) { |
1040 | | /* spacing cannot follow an opening emphasis; |
1041 | | * strikethrough and highlight only takes two characters '~~' */ |
1042 | 127k | if (c == '~' || c == '=' || _isspace(data[1]) || (ret = parse_emph1(ob, doc, data + 1, size - 1, c)) == 0) |
1043 | 95.5k | return 0; |
1044 | | |
1045 | 32.1k | return ret + 1; |
1046 | 127k | } |
1047 | | |
1048 | 164k | if (size > 3 && data[1] == c && data[2] != c) { |
1049 | 38.0k | if (_isspace(data[2]) || (ret = parse_emph2(ob, doc, data + 2, size - 2, c)) == 0) |
1050 | 25.3k | return 0; |
1051 | | |
1052 | 12.7k | return ret + 2; |
1053 | 38.0k | } |
1054 | | |
1055 | 126k | if (size > 4 && data[1] == c && data[2] == c && data[3] != c) { |
1056 | 52.2k | if (c == '~' || c == '=' || _isspace(data[3]) || (ret = parse_emph3(ob, doc, data + 3, size - 3, c)) == 0) |
1057 | 51.1k | return 0; |
1058 | | |
1059 | 1.04k | return ret + 3; |
1060 | 52.2k | } |
1061 | | |
1062 | 73.8k | return 0; |
1063 | 126k | } |
1064 | | |
1065 | | |
1066 | | /* char_linebreak • '\n' preceded by two spaces (assuming linebreak != 0) */ |
1067 | | static size_t |
1068 | | char_linebreak(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1069 | 536k | { |
1070 | 536k | if (offset < 2 || data[-1] != ' ' || data[-2] != ' ') |
1071 | 509k | return 0; |
1072 | | |
1073 | | /* removing the last space from ob and rendering */ |
1074 | 1.20M | while (ob->size && ob->data[ob->size - 1] == ' ') |
1075 | 1.17M | ob->size--; |
1076 | | |
1077 | 27.0k | return doc->md.linebreak(ob, &doc->data) ? 1 : 0; |
1078 | 536k | } |
1079 | | |
1080 | | |
1081 | | /* char_codespan • '`' parsing a code span (assuming codespan != 0) */ |
1082 | | static size_t |
1083 | | char_codespan(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1084 | 102k | { |
1085 | 102k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1086 | 102k | size_t end, nb = 0, i, f_begin, f_end; |
1087 | | |
1088 | | /* counting the number of backticks in the delimiter */ |
1089 | 761k | while (nb < size && data[nb] == '`') |
1090 | 659k | nb++; |
1091 | | |
1092 | | /* finding the next delimiter */ |
1093 | 102k | i = 0; |
1094 | 42.5M | for (end = nb; end < size && i < nb; end++) { |
1095 | 42.4M | if (data[end] == '`') { |
1096 | 160k | if (end + 1 == size || !is_escaped(data, end)) { |
1097 | 159k | i++; |
1098 | 159k | } else { |
1099 | 516 | i = 0; |
1100 | 516 | } |
1101 | 160k | } |
1102 | 42.2M | else i = 0; |
1103 | 42.4M | } |
1104 | | |
1105 | 102k | if (i < nb && end >= size) |
1106 | 82.4k | return 0; /* no matching delimiter */ |
1107 | | |
1108 | | /* trimming outside whitespace */ |
1109 | 19.9k | f_begin = nb; |
1110 | 292k | while (f_begin < end && (data[f_begin] == ' ' || data[f_begin] == '\n')) |
1111 | 272k | f_begin++; |
1112 | | |
1113 | 19.9k | f_end = end - nb; |
1114 | 130k | while (f_end > nb && (data[f_end-1] == ' ' || data[f_end-1] == '\n')) |
1115 | 110k | f_end--; |
1116 | | |
1117 | | /* real code span */ |
1118 | 19.9k | if (f_begin < f_end) { |
1119 | | /* needed for parse_attribute functions as buffer functions do not work with |
1120 | | * buffers made on the stack */ |
1121 | 13.7k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
1122 | | |
1123 | 13.7k | work.data = data + f_begin; |
1124 | 13.7k | work.size = f_end - f_begin; |
1125 | | |
1126 | 13.7k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1127 | 13.7k | end += parse_inline_attributes(data + end, size - end, attr, doc->attr_activation); |
1128 | 13.7k | } |
1129 | | |
1130 | 13.7k | if (!doc->md.codespan(ob, &work, attr, &doc->data)) |
1131 | 0 | end = 0; |
1132 | 13.7k | popbuf(doc, BUFFER_ATTRIBUTE); |
1133 | 13.7k | } else { |
1134 | 6.20k | if (!doc->md.codespan(ob, 0, 0, &doc->data)) |
1135 | 0 | end = 0; |
1136 | 6.20k | } |
1137 | | |
1138 | 19.9k | return end; |
1139 | 102k | } |
1140 | | |
1141 | | /* char_quote • '"' parsing a quote */ |
1142 | | static size_t |
1143 | | char_quote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1144 | 263k | { |
1145 | 263k | size_t end, nq = 0, i, f_begin, f_end; |
1146 | | |
1147 | | /* counting the number of quotes in the delimiter */ |
1148 | 34.8M | while (nq < size && data[nq] == '"') |
1149 | 34.6M | nq++; |
1150 | | |
1151 | | /* finding the next delimiter */ |
1152 | 263k | end = nq; |
1153 | 464k | while (1) { |
1154 | 464k | i = end; |
1155 | 464k | end += find_emph_char(data + end, size - end, '"'); |
1156 | 464k | if (end == i) return 0; /* no matching delimiter */ |
1157 | 301k | i = end; |
1158 | 7.42M | while (end < size && data[end] == '"' && end - i < nq) end++; |
1159 | 301k | if (end - i >= nq) break; |
1160 | 301k | } |
1161 | | |
1162 | | /* trimming outside spaces */ |
1163 | 101k | f_begin = nq; |
1164 | 3.17M | while (f_begin < end && data[f_begin] == ' ') |
1165 | 3.07M | f_begin++; |
1166 | | |
1167 | 101k | f_end = end - nq; |
1168 | 3.04M | while (f_end > nq && data[f_end-1] == ' ') |
1169 | 2.94M | f_end--; |
1170 | | |
1171 | | /* real quote */ |
1172 | 101k | if (f_begin < f_end) { |
1173 | 100k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
1174 | 100k | parse_inline(work, doc, data + f_begin, f_end - f_begin); |
1175 | | |
1176 | 100k | if (!doc->md.quote(ob, work, &doc->data)) |
1177 | 890 | end = 0; |
1178 | 100k | popbuf(doc, BUFFER_SPAN); |
1179 | 100k | } else { |
1180 | 1.05k | if (!doc->md.quote(ob, 0, &doc->data)) |
1181 | 1.05k | end = 0; |
1182 | 1.05k | } |
1183 | | |
1184 | 101k | return end; |
1185 | 263k | } |
1186 | | |
1187 | | |
1188 | | /* char_escape • '\\' backslash escape */ |
1189 | | static size_t |
1190 | | char_escape(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1191 | 158k | { |
1192 | 158k | static const char *escape_chars = "\\`*_{}[]()#+-.!:|&<>^~=\"$"; |
1193 | 158k | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
1194 | 158k | size_t w; |
1195 | | |
1196 | 158k | if (size > 1) { |
1197 | 156k | if (data[1] == '\\' && (doc->ext_flags & HOEDOWN_EXT_MATH) && |
1198 | 156k | size > 2 && (data[2] == '(' || data[2] == '[')) { |
1199 | 2.52k | const char *end = (data[2] == '[') ? "\\\\]" : "\\\\)"; |
1200 | 2.52k | w = parse_math(ob, doc, data, offset, size, end, 3, data[2] == '['); |
1201 | 2.52k | if (w) return w; |
1202 | 2.52k | } |
1203 | | |
1204 | 155k | if (strchr(escape_chars, data[1]) == NULL) |
1205 | 66.2k | return 0; |
1206 | | |
1207 | 89.2k | if (doc->md.normal_text) { |
1208 | 89.2k | work.data = data + 1; |
1209 | 89.2k | work.size = 1; |
1210 | 89.2k | doc->is_escape_char = 1; |
1211 | 89.2k | doc->md.normal_text(ob, &work, &doc->data); |
1212 | 89.2k | doc->is_escape_char = 0; |
1213 | 89.2k | } |
1214 | 0 | else hoedown_buffer_putc(ob, data[1]); |
1215 | 89.2k | } else if (size == 1) { |
1216 | 2.13k | if (doc->md.normal_text) { |
1217 | 2.13k | work.data = data; |
1218 | 2.13k | work.size = 1; |
1219 | 2.13k | doc->md.normal_text(ob, &work, &doc->data); |
1220 | 2.13k | } |
1221 | 0 | else hoedown_buffer_putc(ob, data[0]); |
1222 | 2.13k | } |
1223 | | |
1224 | 91.4k | return 2; |
1225 | 158k | } |
1226 | | |
1227 | | /* char_entity • '&' escaped when it doesn't belong to an entity */ |
1228 | | /* valid entities are assumed to be anything matching &#?[A-Za-z0-9]+; */ |
1229 | | static size_t |
1230 | | char_entity(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1231 | 193k | { |
1232 | 193k | size_t end = 1; |
1233 | 193k | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
1234 | | |
1235 | 193k | if (end < size && data[end] == '#') |
1236 | 247 | end++; |
1237 | | |
1238 | 631k | while (end < size && isalnum(data[end])) |
1239 | 437k | end++; |
1240 | | |
1241 | 193k | if (end < size && data[end] == ';') |
1242 | 924 | end++; /* real entity */ |
1243 | 192k | else |
1244 | 192k | return 0; /* lone '&' */ |
1245 | | |
1246 | 924 | if (doc->md.entity) { |
1247 | 0 | work.data = data; |
1248 | 0 | work.size = end; |
1249 | 0 | doc->md.entity(ob, &work, &doc->data); |
1250 | 0 | } |
1251 | 924 | else hoedown_buffer_put(ob, data, end); |
1252 | | |
1253 | 924 | return end; |
1254 | 193k | } |
1255 | | |
1256 | | /* char_langle_tag • '<' when tags or autolinks are allowed */ |
1257 | | static size_t |
1258 | | char_langle_tag(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1259 | 234k | { |
1260 | 234k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1261 | 234k | hoedown_autolink_type altype = HOEDOWN_AUTOLINK_NONE; |
1262 | 234k | size_t end = tag_length(data, size, &altype, doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS); |
1263 | 234k | int ret = 0; |
1264 | | |
1265 | 234k | work.data = data; |
1266 | 234k | work.size = end; |
1267 | | |
1268 | 234k | if (end > 2) { |
1269 | 58.5k | if (doc->md.autolink && altype != HOEDOWN_AUTOLINK_NONE) { |
1270 | 8.39k | hoedown_buffer *u_link = newbuf(doc, BUFFER_SPAN); |
1271 | 8.39k | work.data = data + 1; |
1272 | 8.39k | work.size = end - 2; |
1273 | 8.39k | unscape_text(u_link, &work); |
1274 | 8.39k | ret = doc->md.autolink(ob, u_link, altype, &doc->data); |
1275 | 8.39k | popbuf(doc, BUFFER_SPAN); |
1276 | 8.39k | } |
1277 | 50.1k | else if (doc->md.raw_html) |
1278 | 50.1k | ret = doc->md.raw_html(ob, &work, &doc->data); |
1279 | 58.5k | } |
1280 | | |
1281 | 234k | if (!ret) return 0; |
1282 | 58.5k | else return end; |
1283 | 234k | } |
1284 | | |
1285 | | static size_t |
1286 | | char_autolink_www(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1287 | 473k | { |
1288 | 473k | hoedown_buffer *link, *link_url, *link_text; |
1289 | 473k | size_t link_len, rewind; |
1290 | | |
1291 | 473k | if (!doc->md.link || doc->in_link_body) |
1292 | 1.30k | return 0; |
1293 | | |
1294 | 472k | link = newbuf(doc, BUFFER_SPAN); |
1295 | | |
1296 | 472k | if ((link_len = hoedown_autolink__www(&rewind, link, data, offset, size, HOEDOWN_AUTOLINK_SHORT_DOMAINS)) > 0) { |
1297 | 23.3k | link_url = newbuf(doc, BUFFER_SPAN); |
1298 | 23.3k | HOEDOWN_BUFPUTSL(link_url, "http://"); |
1299 | 23.3k | hoedown_buffer_put(link_url, link->data, link->size); |
1300 | | |
1301 | 23.3k | if (ob->size > rewind) |
1302 | 17.6k | ob->size -= rewind; |
1303 | 5.67k | else |
1304 | 5.67k | ob->size = 0; |
1305 | | |
1306 | 23.3k | if (doc->md.normal_text) { |
1307 | 23.3k | link_text = newbuf(doc, BUFFER_SPAN); |
1308 | 23.3k | doc->md.normal_text(link_text, link, &doc->data); |
1309 | 23.3k | doc->md.link(ob, link_text, link_url, NULL, NULL, &doc->data); |
1310 | 23.3k | popbuf(doc, BUFFER_SPAN); |
1311 | 23.3k | } else { |
1312 | 0 | doc->md.link(ob, link, link_url, NULL, NULL, &doc->data); |
1313 | 0 | } |
1314 | 23.3k | popbuf(doc, BUFFER_SPAN); |
1315 | 23.3k | } |
1316 | | |
1317 | 472k | popbuf(doc, BUFFER_SPAN); |
1318 | 472k | return link_len; |
1319 | 473k | } |
1320 | | |
1321 | | static size_t |
1322 | | char_autolink_email(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1323 | 348k | { |
1324 | 348k | hoedown_buffer *link; |
1325 | 348k | size_t link_len, rewind; |
1326 | | |
1327 | 348k | if (!doc->md.autolink || doc->in_link_body) |
1328 | 459 | return 0; |
1329 | | |
1330 | 347k | link = newbuf(doc, BUFFER_SPAN); |
1331 | | |
1332 | 347k | if ((link_len = hoedown_autolink__email(&rewind, link, data, offset, size, 0)) > 0) { |
1333 | 688 | if (ob->size > rewind) |
1334 | 473 | ob->size -= rewind; |
1335 | 215 | else |
1336 | 215 | ob->size = 0; |
1337 | | |
1338 | 688 | doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_EMAIL, &doc->data); |
1339 | 688 | } |
1340 | | |
1341 | 347k | popbuf(doc, BUFFER_SPAN); |
1342 | 347k | return link_len; |
1343 | 348k | } |
1344 | | |
1345 | | static size_t |
1346 | | char_autolink_url(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1347 | 1.57M | { |
1348 | 1.57M | hoedown_buffer *link; |
1349 | 1.57M | size_t link_len, rewind; |
1350 | | |
1351 | 1.57M | if (!doc->md.autolink || doc->in_link_body) |
1352 | 16.6k | return 0; |
1353 | | |
1354 | 1.55M | link = newbuf(doc, BUFFER_SPAN); |
1355 | | |
1356 | 1.55M | if ((link_len = hoedown_autolink__url(&rewind, link, data, offset, size, 0)) > 0) { |
1357 | 5.28k | if (ob->size > rewind) |
1358 | 4.47k | ob->size -= rewind; |
1359 | 813 | else |
1360 | 813 | ob->size = 0; |
1361 | | |
1362 | 5.28k | doc->md.autolink(ob, link, HOEDOWN_AUTOLINK_NORMAL, &doc->data); |
1363 | 5.28k | } |
1364 | | |
1365 | 1.55M | popbuf(doc, BUFFER_SPAN); |
1366 | 1.55M | return link_len; |
1367 | 1.57M | } |
1368 | | |
1369 | | static size_t |
1370 | 122k | char_image(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) { |
1371 | 122k | size_t ret; |
1372 | | |
1373 | 122k | if (size < 2 || data[1] != '[') return 0; |
1374 | | |
1375 | 45.0k | ret = char_link(ob, doc, data + 1, offset + 1, size - 1); |
1376 | 45.0k | if (!ret) return 0; |
1377 | 3.47k | return ret + 1; |
1378 | 45.0k | } |
1379 | | |
1380 | | /* char_link • '[': parsing a link, a footnote or an image */ |
1381 | | static size_t |
1382 | | char_link(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1383 | 828k | { |
1384 | 828k | int is_img = (offset && data[-1] == '!' && !is_escaped(data - offset, offset - 1)); |
1385 | 828k | int is_footnote = (doc->ext_flags & HOEDOWN_EXT_FOOTNOTES && size > 1 && data[1] == '^'); |
1386 | 828k | size_t i = 1, txt_e, link_b = 0, link_e = 0, title_b = 0, title_e = 0; |
1387 | 828k | hoedown_buffer *content = NULL; |
1388 | 828k | hoedown_buffer *link = NULL; |
1389 | 828k | hoedown_buffer *title = NULL; |
1390 | 828k | hoedown_buffer *u_link = NULL; |
1391 | 828k | hoedown_buffer *inline_attr = NULL; |
1392 | 828k | hoedown_buffer *ref_attr = NULL; |
1393 | 828k | hoedown_buffer *attr = NULL; |
1394 | 828k | hoedown_buffer *id = NULL; |
1395 | 828k | size_t org_work_size = doc->work_bufs[BUFFER_SPAN].size; |
1396 | 828k | int ret = 0, in_title = 0, qtype = 0; |
1397 | 828k | hoedown_link_type link_type = HOEDOWN_LINK_NONE; |
1398 | 828k | int ref_attr_exists = 0, inline_attr_exists = 0; |
1399 | | |
1400 | | /* checking whether the correct renderer exists */ |
1401 | 828k | if ((is_footnote && !doc->md.footnote_ref) || (is_img && !doc->md.image) |
1402 | 828k | || (!is_img && !is_footnote && !doc->md.link)) |
1403 | 0 | goto cleanup; |
1404 | | |
1405 | | /* looking for the matching closing bracket */ |
1406 | 828k | i += find_emph_char(data + i, size - i, ']'); |
1407 | 828k | txt_e = i; |
1408 | | |
1409 | 828k | if (i < size && data[i] == ']') i++; |
1410 | 198k | else goto cleanup; |
1411 | | |
1412 | | /* footnote link */ |
1413 | 630k | if (is_footnote) { |
1414 | 39.7k | hoedown_buffer id = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
1415 | 39.7k | struct footnote_ref *fr; |
1416 | | |
1417 | 39.7k | if (txt_e < 3) |
1418 | 25.1k | goto cleanup; |
1419 | | |
1420 | 14.6k | id.data = data + 2; |
1421 | 14.6k | id.size = txt_e - 2; |
1422 | | |
1423 | 14.6k | fr = find_footnote_ref(&doc->footnotes_found, id.data, id.size); |
1424 | | |
1425 | | /* mark footnote used */ |
1426 | 14.6k | if (fr && !fr->is_used) { |
1427 | 755 | if(!add_footnote_ref(&doc->footnotes_used, fr)) |
1428 | 0 | goto cleanup; |
1429 | 755 | fr->is_used = 1; |
1430 | 755 | fr->num = doc->footnotes_used.count; |
1431 | | |
1432 | | /* render */ |
1433 | 755 | if (doc->md.footnote_ref) { |
1434 | 755 | doc->link_id = &id; |
1435 | 755 | ret = doc->md.footnote_ref(ob, fr->num, &doc->data); |
1436 | 755 | doc->link_id = NULL; |
1437 | 755 | } |
1438 | 755 | } |
1439 | | |
1440 | 14.6k | goto cleanup; |
1441 | 14.6k | } |
1442 | | |
1443 | | /* skip any amount of spacing */ |
1444 | | /* (this is much more laxist than original markdown syntax) */ |
1445 | 1.76M | while (i < size && _isspace(data[i])) |
1446 | 1.17M | i++; |
1447 | | |
1448 | | /* inline style link */ |
1449 | 590k | if (i < size && data[i] == '(') { |
1450 | 32.0k | size_t nb_p; |
1451 | | |
1452 | 32.0k | link_type = HOEDOWN_LINK_INLINE; |
1453 | | |
1454 | | /* skipping initial spacing */ |
1455 | 32.0k | i++; |
1456 | | |
1457 | 405k | while (i < size && _isspace(data[i])) |
1458 | 373k | i++; |
1459 | | |
1460 | 32.0k | link_b = i; |
1461 | | |
1462 | | /* looking for link end: ' " ) */ |
1463 | | /* Count the number of open parenthesis */ |
1464 | 32.0k | nb_p = 0; |
1465 | | |
1466 | 25.8M | while (i < size) { |
1467 | 25.8M | if (data[i] == '\\') i += 2; |
1468 | 25.8M | else if (data[i] == '(' && i != 0) { |
1469 | 142k | nb_p++; i++; |
1470 | 142k | } |
1471 | 25.6M | else if (data[i] == ')') { |
1472 | 10.8k | if (nb_p == 0) break; |
1473 | 5.57k | nb_p--; i++; |
1474 | 25.6M | } else if (i >= 1 && _isspace(data[i-1]) && (data[i] == '\'' || data[i] == '"')) break; |
1475 | 25.6M | else i++; |
1476 | 25.8M | } |
1477 | | |
1478 | 32.0k | if (i >= size) goto cleanup; |
1479 | 22.8k | link_e = i; |
1480 | | |
1481 | | /* looking for title end if present */ |
1482 | 22.8k | if (data[i] == '\'' || data[i] == '"') { |
1483 | 17.5k | qtype = data[i]; |
1484 | 17.5k | in_title = 1; |
1485 | 17.5k | i++; |
1486 | 17.5k | title_b = i; |
1487 | | |
1488 | 556M | while (i < size) { |
1489 | 556M | if (data[i] == '\\') i += 2; |
1490 | 556M | else if (data[i] == qtype) {in_title = 0; i++;} |
1491 | 556M | else if ((data[i] == ')') && !in_title) break; |
1492 | 556M | else i++; |
1493 | 556M | } |
1494 | | |
1495 | 17.5k | if (i >= size) goto cleanup; |
1496 | | |
1497 | | /* skipping spacing after title */ |
1498 | 11.0k | title_e = i - 1; |
1499 | 184k | while (title_e > title_b && _isspace(data[title_e])) |
1500 | 173k | title_e--; |
1501 | | |
1502 | | /* checking for closing quote presence */ |
1503 | 11.0k | if (data[title_e] != '\'' && data[title_e] != '"') { |
1504 | 1.53k | title_b = title_e = 0; |
1505 | 1.53k | link_e = i; |
1506 | 1.53k | } |
1507 | 11.0k | } |
1508 | | |
1509 | | /* remove spacing at the end of the link */ |
1510 | 178k | while (link_e > link_b && _isspace(data[link_e - 1])) |
1511 | 162k | link_e--; |
1512 | | |
1513 | | /* remove optional angle brackets around the link */ |
1514 | 16.2k | if (data[link_b] == '<' && data[link_e - 1] == '>') { |
1515 | 201 | link_b++; |
1516 | 201 | link_e--; |
1517 | 201 | } |
1518 | | |
1519 | | /* building escaped link and title */ |
1520 | 16.2k | if (link_e > link_b) { |
1521 | 2.99k | link = newbuf(doc, BUFFER_SPAN); |
1522 | 2.99k | hoedown_buffer_put(link, data + link_b, link_e - link_b); |
1523 | 2.99k | } |
1524 | | |
1525 | 16.2k | if (title_e > title_b) { |
1526 | 2.40k | title = newbuf(doc, BUFFER_SPAN); |
1527 | 2.40k | hoedown_buffer_put(title, data + title_b, title_e - title_b); |
1528 | 2.40k | } |
1529 | | |
1530 | 16.2k | i++; |
1531 | 16.2k | } |
1532 | | |
1533 | | /* reference style link */ |
1534 | 558k | else if (i < size && data[i] == '[') { |
1535 | 69.2k | struct link_ref *lr; |
1536 | | |
1537 | 69.2k | id = newbuf(doc, BUFFER_SPAN); |
1538 | | |
1539 | | /* looking for the id */ |
1540 | 69.2k | i++; |
1541 | 69.2k | link_b = i; |
1542 | 6.38M | while (i < size && data[i] != ']') i++; |
1543 | 69.2k | if (i >= size) goto cleanup; |
1544 | 66.0k | link_e = i; |
1545 | | |
1546 | | /* finding the link_ref */ |
1547 | 66.0k | if (link_b == link_e) { |
1548 | 36.5k | link_type = HOEDOWN_LINK_EMPTY_REFERENCE; |
1549 | 36.5k | replace_spacing(id, data + 1, txt_e - 1); |
1550 | 36.5k | } else { |
1551 | 29.5k | link_type = HOEDOWN_LINK_REFERENCE; |
1552 | 29.5k | hoedown_buffer_put(id, data + link_b, link_e - link_b); |
1553 | 29.5k | } |
1554 | | |
1555 | 66.0k | lr = find_link_ref(doc->refs, id->data, id->size); |
1556 | 66.0k | if (!lr) |
1557 | 60.9k | goto cleanup; |
1558 | | |
1559 | | /* keeping link and title from link_ref */ |
1560 | 5.08k | link = lr->link; |
1561 | 5.08k | title = lr->title; |
1562 | 5.08k | ref_attr = lr->attr; |
1563 | 5.08k | i++; |
1564 | 5.08k | } |
1565 | | |
1566 | | /* shortcut reference style link */ |
1567 | 489k | else { |
1568 | 489k | struct link_ref *lr; |
1569 | | |
1570 | 489k | id = newbuf(doc, BUFFER_SPAN); |
1571 | | |
1572 | 489k | link_type = HOEDOWN_LINK_SHORTCUT; |
1573 | | |
1574 | | /* crafting the id */ |
1575 | 489k | replace_spacing(id, data + 1, txt_e - 1); |
1576 | | |
1577 | | /* finding the link_ref */ |
1578 | 489k | lr = find_link_ref(doc->refs, id->data, id->size); |
1579 | 489k | if (!lr) |
1580 | 199k | goto cleanup; |
1581 | | |
1582 | | /* keeping link and title from link_ref */ |
1583 | 289k | link = lr->link; |
1584 | 289k | title = lr->title; |
1585 | 289k | ref_attr = lr->attr; |
1586 | | |
1587 | | /* rewinding the spacing */ |
1588 | 289k | i = txt_e + 1; |
1589 | 289k | } |
1590 | | |
1591 | | /* building content: img alt is kept, only link content is parsed */ |
1592 | 310k | if (txt_e > 1) { |
1593 | 22.5k | content = newbuf(doc, BUFFER_SPAN); |
1594 | 22.5k | if (is_img) { |
1595 | 10.7k | hoedown_buffer_put(content, data + 1, txt_e - 1); |
1596 | 11.7k | } else { |
1597 | | /* disable autolinking when parsing inline the |
1598 | | * content of a link */ |
1599 | 11.7k | doc->in_link_body = 1; |
1600 | 11.7k | parse_inline(content, doc, data + 1, txt_e - 1); |
1601 | 11.7k | doc->in_link_body = 0; |
1602 | 11.7k | } |
1603 | 22.5k | } |
1604 | | |
1605 | 310k | if (link) { |
1606 | 297k | u_link = newbuf(doc, BUFFER_SPAN); |
1607 | 297k | unscape_text(u_link, link); |
1608 | 297k | } |
1609 | | |
1610 | | /* if special attributes are enabled, attempt to parse an inline one from |
1611 | | * the link */ |
1612 | 310k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1613 | | /* attr is a span because cleanup code depends on it being span */ |
1614 | 310k | inline_attr = newbuf(doc, BUFFER_SPAN); |
1615 | 310k | i += parse_inline_attributes(data + i, size - i, inline_attr, doc->attr_activation); |
1616 | 310k | } |
1617 | | |
1618 | | /* remove optional < and > around inline and ref special attributes */ |
1619 | 310k | if (ref_attr && ref_attr->size > 0) { |
1620 | 60.8k | if (ref_attr->size > 1) { |
1621 | 4.47k | if (ref_attr->data[0] == '<') { |
1622 | 295 | hoedown_buffer_slurp(ref_attr, 1); |
1623 | 295 | } |
1624 | 4.47k | if (ref_attr->data[ref_attr->size - 1] == '>') { |
1625 | 80 | ref_attr->size--; |
1626 | 80 | } |
1627 | 4.47k | } |
1628 | 60.8k | } |
1629 | 310k | if (inline_attr && inline_attr->size > 0) { |
1630 | 3.77k | if (inline_attr->size > 1) { |
1631 | 1.53k | if (inline_attr->data[0] == '<') { |
1632 | 428 | hoedown_buffer_slurp(inline_attr, 1); |
1633 | 428 | } |
1634 | 1.53k | if (inline_attr->data[inline_attr->size - 1] == '>') { |
1635 | 780 | inline_attr->size--; |
1636 | 780 | } |
1637 | 1.53k | } |
1638 | 3.77k | } |
1639 | | |
1640 | | /* construct the final attr that is actually applied to the link */ |
1641 | 310k | ref_attr_exists = ref_attr && ref_attr->size > 0; |
1642 | 310k | inline_attr_exists = inline_attr && inline_attr->size > 0; |
1643 | 310k | if (ref_attr_exists || inline_attr_exists) { |
1644 | 64.0k | attr = newbuf(doc, BUFFER_SPAN); |
1645 | 64.0k | if (ref_attr_exists) { |
1646 | 60.8k | hoedown_buffer_put(attr, ref_attr->data, ref_attr->size); |
1647 | 60.8k | } |
1648 | | /* if both inline and ref attrs exist, join them with a space to prevent |
1649 | | * conflicts */ |
1650 | 64.0k | if (ref_attr_exists && inline_attr_exists) { |
1651 | 519 | hoedown_buffer_putc(attr, ' '); |
1652 | 519 | } |
1653 | 64.0k | if (inline_attr_exists) { |
1654 | 3.67k | hoedown_buffer_put(attr, inline_attr->data, inline_attr->size); |
1655 | 3.67k | } |
1656 | 64.0k | } |
1657 | | |
1658 | | /* calling the relevant rendering function */ |
1659 | 310k | doc->link_id = id; |
1660 | 310k | doc->link_type = link_type; |
1661 | 310k | doc->link_ref_attr = ref_attr; |
1662 | 310k | doc->link_inline_attr = inline_attr; |
1663 | 310k | if (is_img) { |
1664 | 14.7k | ret = doc->md.image(ob, u_link, title, content, attr, &doc->data); |
1665 | 295k | } else { |
1666 | 295k | ret = doc->md.link(ob, content, u_link, title, attr, &doc->data); |
1667 | 295k | } |
1668 | 310k | doc->link_inline_attr = NULL; |
1669 | 310k | doc->link_ref_attr = NULL; |
1670 | 310k | doc->link_type = HOEDOWN_LINK_NONE; |
1671 | 310k | doc->link_id = NULL; |
1672 | | |
1673 | | /* cleanup */ |
1674 | 828k | cleanup: |
1675 | 828k | doc->work_bufs[BUFFER_SPAN].size = (int)org_work_size; |
1676 | 828k | return ret ? i : 0; |
1677 | 310k | } |
1678 | | |
1679 | | static size_t |
1680 | | char_superscript(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1681 | 291k | { |
1682 | 291k | size_t sup_start, sup_len; |
1683 | 291k | hoedown_buffer *sup; |
1684 | | |
1685 | 291k | if (!doc->md.superscript) |
1686 | 0 | return 0; |
1687 | | |
1688 | 291k | if (size < 2) |
1689 | 7.38k | return 0; |
1690 | | |
1691 | 284k | if (data[1] == '(') { |
1692 | 30.2k | sup_start = 2; |
1693 | 30.2k | sup_len = find_emph_char(data + 2, size - 2, ')') + 2; |
1694 | | |
1695 | 30.2k | if (sup_len == size) |
1696 | 359 | return 0; |
1697 | 254k | } else { |
1698 | 254k | sup_start = sup_len = 1; |
1699 | | |
1700 | 23.0M | while (sup_len < size && !_isspace(data[sup_len])) |
1701 | 22.7M | sup_len++; |
1702 | 254k | } |
1703 | | |
1704 | 284k | if (sup_len - sup_start == 0) |
1705 | 31.7k | return (sup_start == 2) ? 3 : 0; |
1706 | | |
1707 | 252k | sup = newbuf(doc, BUFFER_SPAN); |
1708 | 252k | parse_inline(sup, doc, data + sup_start, sup_len - sup_start); |
1709 | 252k | doc->md.superscript(ob, sup, &doc->data); |
1710 | 252k | popbuf(doc, BUFFER_SPAN); |
1711 | | |
1712 | 252k | return (sup_start == 2) ? sup_len + 1 : sup_len; |
1713 | 284k | } |
1714 | | |
1715 | | static size_t |
1716 | | char_math(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t offset, size_t size) |
1717 | 30.1k | { |
1718 | | /* double dollar */ |
1719 | 30.1k | if (size > 1 && data[1] == '$') |
1720 | 6.35k | return parse_math(ob, doc, data, offset, size, "$$", 2, 1); |
1721 | | |
1722 | | /* single dollar allowed only with MATH_EXPLICIT flag */ |
1723 | 23.7k | if (doc->ext_flags & HOEDOWN_EXT_MATH_EXPLICIT) |
1724 | 23.7k | return parse_math(ob, doc, data, offset, size, "$", 1, 0); |
1725 | | |
1726 | 0 | return 0; |
1727 | 23.7k | } |
1728 | | |
1729 | | /********************************* |
1730 | | * BLOCK-LEVEL PARSING FUNCTIONS * |
1731 | | *********************************/ |
1732 | | |
1733 | | /* is_empty • returns the line length when it is empty, 0 otherwise */ |
1734 | | static size_t |
1735 | | is_empty(const uint8_t *data, size_t size) |
1736 | 9.16M | { |
1737 | 9.16M | size_t i; |
1738 | | |
1739 | 80.6M | for (i = 0; i < size && data[i] != '\n'; i++) |
1740 | 79.0M | if (data[i] != ' ') |
1741 | 7.50M | return 0; |
1742 | | |
1743 | 1.65M | return i + 1; |
1744 | 9.16M | } |
1745 | | |
1746 | | /* is_hrule • returns whether a line is a horizontal rule */ |
1747 | | static int |
1748 | | is_hrule(uint8_t *data, size_t size) |
1749 | 5.35M | { |
1750 | 5.35M | size_t i = 0, n = 0; |
1751 | 5.35M | uint8_t c; |
1752 | | |
1753 | | /* skipping initial spaces */ |
1754 | 5.35M | if (size < 3) return 0; |
1755 | 5.30M | if (data[0] == ' ') { i++; |
1756 | 222k | if (data[1] == ' ') { i++; |
1757 | 82.9k | if (data[2] == ' ') { i++; } } } |
1758 | | |
1759 | | /* looking at the hrule uint8_t */ |
1760 | 5.30M | if (i + 2 >= size |
1761 | 5.30M | || (data[i] != '*' && data[i] != '-' && data[i] != '_')) |
1762 | 2.86M | return 0; |
1763 | 2.44M | c = data[i]; |
1764 | | |
1765 | | /* the whole line must be the char or space */ |
1766 | 6.34M | while (i < size && data[i] != '\n') { |
1767 | 4.23M | if (data[i] == c) n++; |
1768 | 1.53M | else if (data[i] != ' ') |
1769 | 334k | return 0; |
1770 | | |
1771 | 3.90M | i++; |
1772 | 3.90M | } |
1773 | | |
1774 | 2.11M | return n >= 3; |
1775 | 2.44M | } |
1776 | | |
1777 | | /* check if a line is a code fence; return the |
1778 | | * end of the code fence. if passed, width of |
1779 | | * the fence rule and character will be returned */ |
1780 | | static size_t |
1781 | | is_codefence(uint8_t *data, size_t size, size_t *width, uint8_t *chr) |
1782 | 8.92M | { |
1783 | 8.92M | size_t i = 0, n = 1, j; |
1784 | 8.92M | uint8_t c; |
1785 | | |
1786 | | /* skipping initial spaces */ |
1787 | 8.92M | if (size < 3) |
1788 | 5.37M | return 0; |
1789 | | |
1790 | 3.54M | if (data[0] == ' ') { i++; |
1791 | 511k | if (data[1] == ' ') { i++; |
1792 | 404k | if (data[2] == ' ') { i++; } } } |
1793 | | |
1794 | | /* looking at the hrule uint8_t */ |
1795 | 3.54M | if (i + 2 >= size) |
1796 | 188k | return 0; |
1797 | 3.35M | c = data[i]; |
1798 | 3.35M | if (!(c=='~' || c=='`')) |
1799 | 3.15M | return 0; |
1800 | | |
1801 | | /* the fence must be that same character */ |
1802 | 1.44M | while (++i < size && data[i] == c) |
1803 | 1.24M | ++n; |
1804 | | |
1805 | 197k | if (n < 3) |
1806 | 65.4k | return 0; |
1807 | | |
1808 | 8.97M | for (j = i; j < size && data[j] != '\n'; ++j) { |
1809 | 8.84M | if (data[j] == c) { |
1810 | | /* Avoid parsing codespan as fence. */ |
1811 | 5.10k | return 0; |
1812 | 5.10k | } |
1813 | 8.84M | } |
1814 | | |
1815 | 127k | if (width) *width = n; |
1816 | 127k | if (chr) *chr = c; |
1817 | 127k | return i; |
1818 | 132k | } |
1819 | | |
1820 | | /* expects single line, checks if it's a codefence and extracts language */ |
1821 | | static int |
1822 | | parse_codefence(hoedown_document *doc, uint8_t *data, size_t size, hoedown_buffer *lang, size_t *width, uint8_t *chr, unsigned int flags, hoedown_buffer *attr) |
1823 | 4.49M | { |
1824 | 4.49M | size_t i, w, lang_start, attr_start = 0; |
1825 | | |
1826 | 4.49M | i = w = is_codefence(data, size, width, chr); |
1827 | 4.49M | if (i == 0) |
1828 | 4.47M | return 0; |
1829 | | |
1830 | 150k | while (i < size && _isspace(data[i])) |
1831 | 132k | i++; |
1832 | | |
1833 | 17.4k | lang_start = i; |
1834 | | |
1835 | 17.4k | if (flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
1836 | 17.4k | attr_start = i + parse_attributes(data + i, size - i, attr, NULL, "", 0, doc->attr_activation); |
1837 | 866k | while (i < attr_start) { |
1838 | 850k | if (_isspace(data[i])) { |
1839 | 1.05k | break; |
1840 | 1.05k | } |
1841 | 849k | i++; |
1842 | 849k | } |
1843 | 17.4k | } else { |
1844 | 0 | while (i < size && !_isspace(data[i])) |
1845 | 0 | i++; |
1846 | 0 | } |
1847 | | |
1848 | 17.4k | lang->data = data + lang_start; |
1849 | 17.4k | lang->size = i - lang_start; |
1850 | | |
1851 | 17.4k | return w; |
1852 | 4.49M | } |
1853 | | |
1854 | | /* is_atxheader • returns whether the line is a hash-prefixed header */ |
1855 | | static int |
1856 | | is_atxheader(hoedown_document *doc, uint8_t *data, size_t size) |
1857 | 5.45M | { |
1858 | 5.45M | size_t level = 0, begin = 0, len; |
1859 | 5.45M | uint8_t *p; |
1860 | | |
1861 | 5.45M | if (data[0] != '#') |
1862 | 5.40M | return 0; |
1863 | | |
1864 | 113k | while (level < size && level < 6 && data[level] == '#') |
1865 | 61.4k | level++; |
1866 | | |
1867 | 51.6k | if (level >= size || data[level] == '\n') { |
1868 | 17.0k | return 0; |
1869 | 17.0k | } |
1870 | | |
1871 | 34.6k | len = size - level; |
1872 | 34.6k | p = memchr(data + level, '\n', len); |
1873 | 34.6k | if (p) { |
1874 | 31.7k | len = p - (data + level) + 1; |
1875 | 31.7k | } |
1876 | | |
1877 | | /* if the header is only whitespace, it is not a header */ |
1878 | 34.6k | if (len && is_empty_all(data + level, len)) { |
1879 | 1.17k | return 0; |
1880 | 1.17k | } |
1881 | | |
1882 | 33.4k | if ((doc->ext_flags & HOEDOWN_EXT_SPACE_HEADERS) && level < size && data[level] != ' ') { |
1883 | 7.54k | return 0; |
1884 | 7.54k | } |
1885 | | |
1886 | | /* if the header is only special attribute, it is not a header */ |
1887 | 25.9k | if (len && (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) { |
1888 | 25.9k | p = memchr(data + level, '{', len); |
1889 | 25.9k | if (p) { |
1890 | | /* get number of characters from # to { */ |
1891 | 7.08k | begin = p - (data + level); |
1892 | 7.08k | if (begin > 0 && !is_empty_all(data + level, begin)) { |
1893 | 1.99k | return 1; |
1894 | 1.99k | } |
1895 | | /* check for special attributes after the # */ |
1896 | 5.09k | return !parse_inline_attributes(data + level + begin, len - begin, NULL, doc->attr_activation); |
1897 | 7.08k | } |
1898 | 25.9k | } |
1899 | | |
1900 | 18.8k | return 1; |
1901 | 25.9k | } |
1902 | | |
1903 | | /* is_headerline • returns whether the line is a setext-style hdr underline */ |
1904 | | static int |
1905 | | is_headerline(uint8_t *data, size_t size) |
1906 | 1.31M | { |
1907 | 1.31M | size_t i = 0; |
1908 | | |
1909 | | /* test of level 1 header */ |
1910 | 1.31M | if (data[i] == '=') { |
1911 | 78.0k | for (i = 1; i < size && data[i] == '='; i++); |
1912 | 295k | while (i < size && data[i] == ' ') i++; |
1913 | 67.2k | return (i >= size || data[i] == '\n') ? 1 : 0; } |
1914 | | |
1915 | | /* test of level 2 header */ |
1916 | 1.24M | if (data[i] == '-') { |
1917 | 254k | for (i = 1; i < size && data[i] == '-'; i++); |
1918 | 396k | while (i < size && data[i] == ' ') i++; |
1919 | 227k | return (i >= size || data[i] == '\n') ? 2 : 0; } |
1920 | | |
1921 | 1.01M | return 0; |
1922 | 1.24M | } |
1923 | | |
1924 | | static int |
1925 | | is_next_headerline(uint8_t *data, size_t size) |
1926 | 613k | { |
1927 | 613k | size_t i = 0; |
1928 | | |
1929 | 48.6M | while (i < size && data[i] != '\n') |
1930 | 48.0M | i++; |
1931 | | |
1932 | 613k | if (++i >= size) |
1933 | 190k | return 0; |
1934 | | |
1935 | 422k | return is_headerline(data + i, size - i); |
1936 | 613k | } |
1937 | | |
1938 | | /* prefix_quote • returns blockquote prefix length */ |
1939 | | static size_t |
1940 | | prefix_quote(uint8_t *data, size_t size) |
1941 | 5.37M | { |
1942 | 5.37M | size_t i = 0; |
1943 | 5.37M | if (i < size && data[i] == ' ') i++; |
1944 | 5.37M | if (i < size && data[i] == ' ') i++; |
1945 | 5.37M | if (i < size && data[i] == ' ') i++; |
1946 | | |
1947 | 5.37M | if (i < size && data[i] == '>') { |
1948 | 88.5k | if (i + 1 < size && data[i + 1] == ' ') |
1949 | 5.58k | return i + 2; |
1950 | | |
1951 | 83.0k | return i + 1; |
1952 | 88.5k | } |
1953 | | |
1954 | 5.28M | return 0; |
1955 | 5.37M | } |
1956 | | |
1957 | | /* prefix_code • returns prefix length for block code*/ |
1958 | | static size_t |
1959 | | prefix_code(uint8_t *data, size_t size) |
1960 | 0 | { |
1961 | 0 | if (size > 3 && data[0] == ' ' && data[1] == ' ' |
1962 | 0 | && data[2] == ' ' && data[3] == ' ') return 4; |
1963 | | |
1964 | 0 | return 0; |
1965 | 0 | } |
1966 | | |
1967 | | /* prefix_oli • returns ordered list item prefix */ |
1968 | | static size_t |
1969 | | prefix_oli(uint8_t *data, size_t size) |
1970 | 4.90M | { |
1971 | 4.90M | size_t i = 0; |
1972 | | |
1973 | 4.90M | if (i < size && data[i] == ' ') i++; |
1974 | 4.90M | if (i < size && data[i] == ' ') i++; |
1975 | 4.90M | if (i < size && data[i] == ' ') i++; |
1976 | | |
1977 | 4.90M | if (i >= size || data[i] < '0' || data[i] > '9') |
1978 | 4.72M | return 0; |
1979 | | |
1980 | 403k | while (i < size && data[i] >= '0' && data[i] <= '9') |
1981 | 224k | i++; |
1982 | | |
1983 | 179k | if (i + 1 >= size || data[i] != '.' || data[i + 1] != ' ') |
1984 | 61.9k | return 0; |
1985 | | |
1986 | 117k | if (is_next_headerline(data + i, size - i)) |
1987 | 1.86k | return 0; |
1988 | | |
1989 | 115k | return i + 2; |
1990 | 117k | } |
1991 | | |
1992 | | /* prefix_uli • returns unordered list item prefix */ |
1993 | | static size_t |
1994 | | prefix_uli(uint8_t *data, size_t size) |
1995 | 5.07M | { |
1996 | 5.07M | size_t i = 0; |
1997 | | |
1998 | 5.07M | if (i < size && data[i] == ' ') i++; |
1999 | 5.07M | if (i < size && data[i] == ' ') i++; |
2000 | 5.07M | if (i < size && data[i] == ' ') i++; |
2001 | | |
2002 | 5.07M | if (i + 1 >= size || |
2003 | 5.07M | (data[i] != '*' && data[i] != '+' && data[i] != '-') || |
2004 | 5.07M | data[i + 1] != ' ') |
2005 | 4.80M | return 0; |
2006 | | |
2007 | 266k | if (is_next_headerline(data + i, size - i)) |
2008 | 17.6k | return 0; |
2009 | | |
2010 | 248k | return i + 2; |
2011 | 266k | } |
2012 | | |
2013 | | /* prefix_dt • returns dictionary definition prefix |
2014 | | * this is in the form of /\s{0,3}:/ (e.g. " :", where spacing is optional) */ |
2015 | | static size_t |
2016 | | prefix_dt(uint8_t *data, size_t size) |
2017 | 4.83M | { |
2018 | 4.83M | size_t i = 0; |
2019 | | |
2020 | | /* skip up to 3 whitespaces (since it's an indented codeblock at 4) */ |
2021 | 4.83M | if (i < size && data[i] == ' ') i++; |
2022 | 4.83M | if (i < size && data[i] == ' ') i++; |
2023 | 4.83M | if (i < size && data[i] == ' ') i++; |
2024 | | |
2025 | | /* if the first character after whitespaces isn't :, it isn't a dt */ |
2026 | 4.83M | if (i + 1 >= size || |
2027 | 4.83M | data[i] != ':' || |
2028 | 4.83M | data[i + 1] != ' ') |
2029 | 4.60M | return 0; |
2030 | | |
2031 | 229k | if (is_next_headerline(data + i, size - i)) |
2032 | 34.2k | return 0; |
2033 | | |
2034 | 195k | return i + 2; |
2035 | 229k | } |
2036 | | |
2037 | | /* is_paragraph • returns if the next block is a paragraph (doesn't follow any |
2038 | | * other special rules for other types of blocks) */ |
2039 | | static int |
2040 | | is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end); |
2041 | | |
2042 | | /* prefix_dli • returns dictionary definition prefix |
2043 | | * a dli looks like a block of text, followed by optional whitespace, followed |
2044 | | * by another block with : as the first non-whitespace character */ |
2045 | | static size_t |
2046 | | prefix_dli(hoedown_document *doc, uint8_t *data, size_t size) |
2047 | 407k | { |
2048 | | /* end is to keep track of the final return value */ |
2049 | 407k | size_t i = 0, j = 0, end = 0; |
2050 | 407k | int empty = 0; |
2051 | | |
2052 | | /* if the first line has a : in front of it, it can't be a definition list |
2053 | | * that starts at this point */ |
2054 | 407k | if (prefix_dt(data, size)) { |
2055 | 2.38k | return 0; |
2056 | 2.38k | } |
2057 | | |
2058 | | /* temporarily toggle definition lists off to prevent infinite loops */ |
2059 | 404k | doc->ext_flags &= ~HOEDOWN_EXT_DEFINITION_LISTS; |
2060 | | |
2061 | | /* check if it is a block of text with no double newlines inside, followed by |
2062 | | * another block of text starting with : */ |
2063 | 4.42M | while (i < size) { |
2064 | | /* if the line we are on is empty, flip the empty flag to indicate that |
2065 | | * the next block of text we see has to start with : to be considered |
2066 | | * a definition list; then skip to the next line */ |
2067 | 4.36M | j = is_empty(data + i, size - i); |
2068 | 4.36M | if(j != 0) { |
2069 | 310k | empty = 1; |
2070 | 310k | i += j; |
2071 | 310k | continue; |
2072 | 310k | } |
2073 | | |
2074 | | /* if anything special is found while parsing the definition term part, |
2075 | | * then return so that the main loop can deal with it */ |
2076 | 4.05M | if (!is_paragraph(doc, data + i, size - i)) { |
2077 | 123k | break; |
2078 | 123k | } |
2079 | | |
2080 | | /* check if the current line starts with :, returning the position of the |
2081 | | * beginning of the line if it does */ |
2082 | 3.93M | j = prefix_dt(data + i, size - i); |
2083 | 3.93M | if (j > 0) { |
2084 | 72.3k | end = i; |
2085 | 72.3k | break; |
2086 | 3.86M | } else if(empty) { |
2087 | | /* if an empty newline has been found, then since : was not the first |
2088 | | * character after whitespaces, it can't be a definition list */ |
2089 | 147k | break; |
2090 | 147k | } |
2091 | | /* scan characters until the next newline */ |
2092 | 221M | for (i = i + 1; i < size && data[i - 1] != '\n'; i++); |
2093 | 3.71M | } |
2094 | | |
2095 | 404k | doc->ext_flags |= HOEDOWN_EXT_DEFINITION_LISTS; |
2096 | 404k | return end; |
2097 | 407k | } |
2098 | | |
2099 | | /* parse_block • parsing of one block, returning next uint8_t to parse */ |
2100 | | static void parse_block(hoedown_buffer *ob, hoedown_document *doc, |
2101 | | uint8_t *data, size_t size); |
2102 | | |
2103 | | |
2104 | | /* parse_blockquote • handles parsing of a blockquote fragment */ |
2105 | | static size_t |
2106 | | parse_blockquote(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2107 | 34.1k | { |
2108 | 34.1k | size_t beg, end = 0, pre, work_size = 0; |
2109 | 34.1k | uint8_t *work_data = 0; |
2110 | 34.1k | hoedown_buffer *out = 0; |
2111 | | |
2112 | 34.1k | doc->blockquote_depth++; |
2113 | | |
2114 | 34.1k | out = newbuf(doc, BUFFER_BLOCK); |
2115 | 34.1k | beg = 0; |
2116 | 210k | while (beg < size) { |
2117 | 41.9M | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++); |
2118 | | |
2119 | 200k | pre = prefix_quote(data + beg, end - beg); |
2120 | | |
2121 | 200k | if (pre) |
2122 | 37.4k | beg += pre; /* skipping prefix */ |
2123 | | |
2124 | | /* empty line finished */ |
2125 | 163k | else if ((doc->ext_flags & HOEDOWN_EXT_BLOCKQUOTE_EMPTY_LINE) && |
2126 | 163k | (is_empty(data + beg, end - beg))) |
2127 | 24.6k | break; |
2128 | | |
2129 | | /* empty line followed by non-quote line */ |
2130 | 138k | else if (is_empty(data + beg, end - beg) && |
2131 | 138k | (end >= size || (prefix_quote(data + end, size - end) == 0 && |
2132 | 0 | !is_empty(data + end, size - end)))) |
2133 | 0 | break; |
2134 | | |
2135 | 176k | if (beg < end) { /* copy into the in-place working buffer */ |
2136 | | /* hoedown_buffer_put(work, data + beg, end - beg); */ |
2137 | 175k | if (!work_data) |
2138 | 33.8k | work_data = data + beg; |
2139 | 142k | else if (data + beg != work_data + work_size) |
2140 | 18.5k | memmove(work_data + work_size, data + beg, end - beg); |
2141 | 175k | work_size += end - beg; |
2142 | 175k | } |
2143 | 176k | beg = end; |
2144 | 176k | } |
2145 | | |
2146 | 34.1k | parse_block(out, doc, work_data, work_size); |
2147 | 34.1k | if (doc->md.blockquote) |
2148 | 34.1k | doc->md.blockquote(ob, out, &doc->data); |
2149 | 34.1k | popbuf(doc, BUFFER_BLOCK); |
2150 | | |
2151 | 34.1k | doc->blockquote_depth--; |
2152 | | |
2153 | 34.1k | return end; |
2154 | 34.1k | } |
2155 | | |
2156 | | static size_t |
2157 | | parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render); |
2158 | | |
2159 | | /* parse_paragraph • handles parsing of a regular paragraph */ |
2160 | | static size_t |
2161 | | parse_paragraph(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2162 | 315k | { |
2163 | 315k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
2164 | 315k | size_t i = 0, end = 0; |
2165 | 315k | int level = 0; |
2166 | | |
2167 | 315k | work.data = data; |
2168 | | |
2169 | 1.03M | while (i < size) { |
2170 | 147M | for (end = i + 1; end < size && data[end - 1] != '\n'; end++) /* empty */; |
2171 | | |
2172 | 998k | if (is_empty(data + i, size - i)) |
2173 | 107k | break; |
2174 | | |
2175 | 890k | if ((level = is_headerline(data + i, size - i)) != 0) { |
2176 | 145k | if (i == 0) { |
2177 | 72.8k | level = 0; |
2178 | 72.8k | i = end; |
2179 | 72.8k | } |
2180 | 145k | break; |
2181 | 145k | } |
2182 | | |
2183 | 745k | if (is_atxheader(doc, data + i, size - i) || |
2184 | 745k | is_hrule(data + i, size - i) || |
2185 | 745k | prefix_quote(data + i, size - i)) { |
2186 | 21.4k | end = i; |
2187 | 21.4k | break; |
2188 | 21.4k | } |
2189 | | |
2190 | 724k | i = end; |
2191 | 724k | } |
2192 | | |
2193 | 315k | work.size = i; |
2194 | 622k | while (work.size && data[work.size - 1] == '\n') |
2195 | 307k | work.size--; |
2196 | | |
2197 | 315k | if (!level) { |
2198 | 242k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2199 | 242k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2200 | 242k | parse_attributes(work.data, work.size, NULL, attr, "paragraph", 1, doc->attr_activation); |
2201 | 242k | if (attr->size > 0) { |
2202 | | /* remove the length of the attribute from the work size - the 12 comes |
2203 | | * from the leading space (1), the paragraph (9), the @ symbol (1), and |
2204 | | * the {} (2) (any extra spaces in the attribute are included inside |
2205 | | * the attribute) */ |
2206 | 0 | work.size -= attr->size + 12; |
2207 | 0 | } |
2208 | 242k | } |
2209 | | |
2210 | 242k | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
2211 | 242k | parse_inline(tmp, doc, work.data, work.size); |
2212 | 242k | if (doc->md.paragraph) |
2213 | 242k | doc->md.paragraph(ob, tmp, attr, &doc->data); |
2214 | 242k | popbuf(doc, BUFFER_BLOCK); |
2215 | 242k | popbuf(doc, BUFFER_ATTRIBUTE); |
2216 | 242k | } else { |
2217 | 72.3k | hoedown_buffer *header_work; |
2218 | 72.3k | hoedown_buffer *attr_work; |
2219 | 72.3k | size_t len; |
2220 | | |
2221 | 72.3k | if (work.size) { |
2222 | 72.3k | size_t beg; |
2223 | 72.3k | i = work.size; |
2224 | 72.3k | work.size -= 1; |
2225 | | |
2226 | 7.75M | while (work.size && data[work.size] != '\n') |
2227 | 7.67M | work.size -= 1; |
2228 | | |
2229 | 72.3k | beg = work.size + 1; |
2230 | 72.3k | while (work.size && data[work.size - 1] == '\n') |
2231 | 0 | work.size -= 1; |
2232 | | |
2233 | 72.3k | if (work.size > 0) { |
2234 | 31.4k | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
2235 | 31.4k | parse_inline(tmp, doc, work.data, work.size); |
2236 | | |
2237 | 31.4k | if (doc->md.paragraph) |
2238 | 31.4k | doc->md.paragraph(ob, tmp, NULL, &doc->data); |
2239 | | |
2240 | 31.4k | popbuf(doc, BUFFER_BLOCK); |
2241 | 31.4k | work.data += beg; |
2242 | 31.4k | work.size = i - beg; |
2243 | 31.4k | } |
2244 | 40.9k | else work.size = i; |
2245 | 72.3k | } |
2246 | | |
2247 | 72.3k | header_work = newbuf(doc, BUFFER_SPAN); |
2248 | 72.3k | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
2249 | | |
2250 | 72.3k | len = work.size; |
2251 | 72.3k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2252 | 72.3k | len = parse_attributes(work.data, work.size, attr_work, NULL, "", 1, doc->attr_activation); |
2253 | 72.3k | } |
2254 | | |
2255 | 72.3k | parse_inline(header_work, doc, work.data, len); |
2256 | | |
2257 | 72.3k | if (doc->md.header) { |
2258 | 72.3k | doc->header_type = HOEDOWN_HEADER_SETEXT; |
2259 | 72.3k | doc->md.header(ob, header_work, attr_work, (int)level, &doc->data); |
2260 | 72.3k | doc->header_type = HOEDOWN_HEADER_NONE; |
2261 | 72.3k | } |
2262 | | |
2263 | 72.3k | popbuf(doc, BUFFER_SPAN); |
2264 | 72.3k | popbuf(doc, BUFFER_ATTRIBUTE); |
2265 | 72.3k | } |
2266 | | |
2267 | 315k | return end; |
2268 | 315k | } |
2269 | | |
2270 | | /* parse_fencedcode • handles parsing of a block-level code fragment */ |
2271 | | static size_t |
2272 | | parse_fencedcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, unsigned int flags) |
2273 | 4.49M | { |
2274 | 4.49M | hoedown_buffer text = { 0, 0, 0, 0, NULL, NULL, NULL }; |
2275 | 4.49M | hoedown_buffer lang = { 0, 0, 0, 0, NULL, NULL, NULL }; |
2276 | 4.49M | size_t i = 0, text_start, line_start; |
2277 | 4.49M | size_t w, w2; |
2278 | 4.49M | size_t width, width2; |
2279 | 4.49M | uint8_t chr, chr2; |
2280 | | /* needed for parse_attribute functions as buffer functions do not work with |
2281 | | * buffers on the stack */ |
2282 | 4.49M | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2283 | | |
2284 | | |
2285 | | /* parse codefence line */ |
2286 | 325M | while (i < size && data[i] != '\n') |
2287 | 321M | i++; |
2288 | | |
2289 | 4.49M | w = parse_codefence(doc, data, i, &lang, &width, &chr, flags, attr); |
2290 | 4.49M | if (!w) { |
2291 | 4.47M | popbuf(doc, BUFFER_ATTRIBUTE); |
2292 | 4.47M | return 0; |
2293 | 4.47M | } |
2294 | | |
2295 | | /* search for end */ |
2296 | 17.4k | i++; |
2297 | 17.4k | text_start = i; |
2298 | 3.77M | while ((line_start = i) < size) { |
2299 | 169M | while (i < size && data[i] != '\n') |
2300 | 165M | i++; |
2301 | | |
2302 | 3.76M | w2 = is_codefence(data + line_start, i - line_start, &width2, &chr2); |
2303 | 3.76M | if (w == w2 && width == width2 && chr == chr2 && |
2304 | 3.76M | is_empty(data + (line_start+w), i - (line_start+w))) |
2305 | 4.85k | break; |
2306 | | |
2307 | 3.75M | if (i < size) i++; |
2308 | 3.75M | } |
2309 | | |
2310 | 17.4k | text.data = data + text_start; |
2311 | 17.4k | text.size = line_start - text_start; |
2312 | | |
2313 | 17.4k | if (doc->md.blockcode) { |
2314 | 9.77k | doc->fencedcode_char = chr; |
2315 | 9.77k | doc->md.blockcode(ob, text.size ? &text : NULL, lang.size ? &lang : NULL, attr->size ? attr : NULL, &doc->data); |
2316 | 9.77k | doc->fencedcode_char = 0; |
2317 | 9.77k | } |
2318 | | |
2319 | 17.4k | popbuf(doc, BUFFER_ATTRIBUTE); |
2320 | | |
2321 | 17.4k | return i; |
2322 | 4.49M | } |
2323 | | |
2324 | | static size_t |
2325 | | parse_blockcode(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2326 | 0 | { |
2327 | 0 | size_t beg, end, pre; |
2328 | 0 | hoedown_buffer *work = 0; |
2329 | 0 | hoedown_buffer *attr = 0; |
2330 | |
|
2331 | 0 | work = newbuf(doc, BUFFER_BLOCK); |
2332 | 0 | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2333 | |
|
2334 | 0 | beg = 0; |
2335 | 0 | while (beg < size) { |
2336 | 0 | for (end = beg + 1; end < size && data[end - 1] != '\n'; end++) {}; |
2337 | 0 | pre = prefix_code(data + beg, end - beg); |
2338 | |
|
2339 | 0 | if (pre) |
2340 | 0 | beg += pre; /* skipping prefix */ |
2341 | 0 | else if (!is_empty(data + beg, end - beg)) |
2342 | | /* non-empty non-prefixed line breaks the pre */ |
2343 | 0 | break; |
2344 | | |
2345 | 0 | if (beg < end) { |
2346 | | /* verbatim copy to the working buffer, |
2347 | | escaping entities */ |
2348 | 0 | if (is_empty(data + beg, end - beg)) |
2349 | 0 | hoedown_buffer_putc(work, '\n'); |
2350 | 0 | else hoedown_buffer_put(work, data + beg, end - beg); |
2351 | 0 | } |
2352 | 0 | beg = end; |
2353 | 0 | } |
2354 | |
|
2355 | 0 | while (work->size && work->data[work->size - 1] == '\n') |
2356 | 0 | work->size -= 1; |
2357 | |
|
2358 | 0 | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2359 | 0 | work->size = parse_attributes(work->data, work->size, NULL, attr, "", 0, doc->attr_activation); |
2360 | 0 | } |
2361 | |
|
2362 | 0 | hoedown_buffer_putc(work, '\n'); |
2363 | |
|
2364 | 0 | if (doc->md.blockcode) |
2365 | 0 | doc->md.blockcode(ob, work, NULL, attr, &doc->data); |
2366 | |
|
2367 | 0 | popbuf(doc, BUFFER_BLOCK); |
2368 | 0 | popbuf(doc, BUFFER_ATTRIBUTE); |
2369 | 0 | return beg; |
2370 | 0 | } |
2371 | | |
2372 | | /* parse_listitem • parsing of a single list item */ |
2373 | | /* assuming initial prefix is already removed */ |
2374 | | static size_t |
2375 | | parse_listitem(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute) |
2376 | 236k | { |
2377 | 236k | hoedown_buffer *work = 0, *inter = 0; |
2378 | 236k | hoedown_buffer *attr = 0; |
2379 | 236k | size_t beg = 0, end, pre, sublist = 0, orgpre = 0, i, len, fence_pre = 0; |
2380 | 236k | int in_empty = 0, has_inside_empty = 0, in_fence = 0; |
2381 | 236k | uint8_t ul_item_char = '*'; |
2382 | 236k | hoedown_buffer *ol_numeral = NULL; |
2383 | | |
2384 | | /* keeping track of the first indentation prefix */ |
2385 | 239k | while (orgpre < 3 && orgpre < size && data[orgpre] == ' ') |
2386 | 3.56k | orgpre++; |
2387 | | |
2388 | 236k | beg = prefix_uli(data, size); |
2389 | 236k | if (beg) ul_item_char = data[beg - 2]; |
2390 | 236k | if (!beg) { |
2391 | 143k | beg = prefix_oli(data, size); |
2392 | 143k | if (beg) { |
2393 | 40.7k | ol_numeral = hoedown_buffer_new(1024); |
2394 | | /* -2 to eliminate the trailing ". " */ |
2395 | 40.7k | hoedown_buffer_put(ol_numeral, data, beg - 2); |
2396 | 40.7k | } |
2397 | 143k | if (*flags & HOEDOWN_LIST_DEFINITION) { |
2398 | 109k | beg = prefix_dt(data, size); |
2399 | 109k | if (beg) ul_item_char = data[beg - 2]; |
2400 | 109k | } |
2401 | 143k | } |
2402 | | |
2403 | 236k | if (!beg) { |
2404 | 34.7k | if (ol_numeral) hoedown_buffer_free(ol_numeral); |
2405 | 34.7k | return 0; |
2406 | 34.7k | } |
2407 | | |
2408 | | /* skipping to the beginning of the following line */ |
2409 | 201k | end = beg; |
2410 | 16.9M | while (end < size && data[end - 1] != '\n') |
2411 | 16.7M | end++; |
2412 | | |
2413 | 201k | if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) { |
2414 | 201k | fence_pre = is_codefence(data + beg, end - beg, &len, NULL); |
2415 | 201k | if (fence_pre) { |
2416 | 3.48k | in_fence = 1; |
2417 | 3.48k | fence_pre = fence_pre + beg - len; |
2418 | 3.48k | } |
2419 | 201k | } |
2420 | | |
2421 | | /* getting working buffers */ |
2422 | 201k | work = newbuf(doc, BUFFER_SPAN); |
2423 | 201k | inter = newbuf(doc, BUFFER_SPAN); |
2424 | | |
2425 | | /* calculating the indentation */ |
2426 | 201k | i = 0; |
2427 | 401k | while (i < 4 && beg + i < end && data[beg + i] == ' ') |
2428 | 200k | i++; |
2429 | | |
2430 | 201k | beg += i; |
2431 | | |
2432 | | /* putting the first line into the working buffer */ |
2433 | 201k | hoedown_buffer_put(work, data + beg, end - beg); |
2434 | 201k | beg = end; |
2435 | | |
2436 | 201k | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2437 | | |
2438 | | /* process the following lines */ |
2439 | 600k | while (beg < size) { |
2440 | 578k | size_t has_next_uli = 0, has_next_oli = 0, has_next_dli = 0; |
2441 | | |
2442 | 578k | end++; |
2443 | | |
2444 | 43.6M | while (end < size && data[end - 1] != '\n') |
2445 | 43.0M | end++; |
2446 | | |
2447 | | /* process an empty line */ |
2448 | 578k | if (is_empty(data + beg, end - beg)) { |
2449 | 111k | in_empty = 1; |
2450 | 111k | beg = end; |
2451 | 111k | continue; |
2452 | 111k | } |
2453 | | |
2454 | | /* calculating the indentation */ |
2455 | 466k | i = 0; |
2456 | 591k | while (i < 4 && beg + i < end && data[beg + i] == ' ') |
2457 | 125k | i++; |
2458 | | |
2459 | 466k | if (in_fence && i > fence_pre) { |
2460 | 4.13k | i = fence_pre; |
2461 | 4.13k | } |
2462 | | |
2463 | 466k | pre = i; |
2464 | | |
2465 | 466k | if (doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) { |
2466 | 466k | if (is_codefence(data + beg + i, end - beg - i, NULL, NULL)) |
2467 | 6.91k | in_fence = !in_fence; |
2468 | 466k | if (in_fence && fence_pre == 0) { |
2469 | 17.1k | fence_pre = pre; |
2470 | 17.1k | } |
2471 | 466k | } |
2472 | | |
2473 | | /* Only check for new list items if we are **not** inside |
2474 | | * a fenced code block */ |
2475 | 466k | if (!in_fence) { |
2476 | 436k | has_next_uli = prefix_uli(data + beg + i, end - beg - i); |
2477 | 436k | has_next_oli = prefix_oli(data + beg + i, end - beg - i); |
2478 | | |
2479 | | /* only check for the next definition if it is same indentation or less |
2480 | | * since embedded definition lists need terms, so finding just a |
2481 | | * colon by itself does not mean anything */ |
2482 | 436k | if (pre <= orgpre) |
2483 | 385k | has_next_dli = prefix_dt(data + beg + i, end - beg - i); |
2484 | 436k | } |
2485 | | |
2486 | | /* checking for a new item */ |
2487 | 466k | if ((has_next_uli && !is_hrule(data + beg + i, end - beg - i)) || |
2488 | 466k | has_next_oli || (*flags & HOEDOWN_LI_DD && has_next_dli)) { |
2489 | 147k | if (in_empty) |
2490 | 49.7k | has_inside_empty = 1; |
2491 | | |
2492 | | /* the following item must have the same (or less) indentation */ |
2493 | 147k | if (pre <= orgpre) { |
2494 | | /* if the following item has different list type, we end this list */ |
2495 | 138k | if (in_empty && ( |
2496 | 47.9k | ((*flags & HOEDOWN_LIST_ORDERED) && has_next_uli) || |
2497 | 47.9k | (!(*flags & HOEDOWN_LIST_ORDERED) && has_next_oli))) { |
2498 | 36.1k | *flags |= HOEDOWN_LI_END; |
2499 | 36.1k | has_inside_empty = 0; |
2500 | 36.1k | } |
2501 | 138k | break; |
2502 | 138k | } |
2503 | | |
2504 | 9.15k | if (!sublist) |
2505 | 6.42k | sublist = work->size; |
2506 | 9.15k | } |
2507 | | /* joining only indented stuff after empty lines; |
2508 | | * note that now we only require 1 space of indentation |
2509 | | * to continue a list */ |
2510 | 318k | else if (in_empty && pre == 0) { |
2511 | 41.5k | *flags |= HOEDOWN_LI_END; |
2512 | 41.5k | break; |
2513 | 41.5k | } |
2514 | | |
2515 | 286k | if (in_empty) { |
2516 | 8.87k | hoedown_buffer_putc(work, '\n'); |
2517 | 8.87k | has_inside_empty = 1; |
2518 | 8.87k | in_empty = 0; |
2519 | 8.87k | } |
2520 | | |
2521 | | /* adding the line without prefix into the working buffer */ |
2522 | 286k | hoedown_buffer_put(work, data + beg + i, end - beg - i); |
2523 | 286k | beg = end; |
2524 | 286k | } |
2525 | | |
2526 | | /* render of li contents */ |
2527 | 201k | if (has_inside_empty) |
2528 | 17.3k | *flags |= HOEDOWN_LI_BLOCK; |
2529 | | |
2530 | 201k | if (*flags & HOEDOWN_LI_BLOCK) { |
2531 | | /* intermediate render of block li */ |
2532 | 67.2k | pre = 0; |
2533 | 67.2k | if (sublist && sublist < work->size) { |
2534 | 2.65k | end = sublist; |
2535 | 64.6k | } else { |
2536 | 64.6k | end = work->size; |
2537 | 64.6k | } |
2538 | | |
2539 | 67.2k | do { |
2540 | 67.2k | if (!(doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE)) { |
2541 | 0 | break; |
2542 | 0 | } |
2543 | | |
2544 | 67.2k | i = 0; |
2545 | 7.11M | while (i < end && work->data[i] != '\n') { |
2546 | 7.04M | i++; |
2547 | 7.04M | } |
2548 | | |
2549 | 67.2k | len = parse_attributes(work->data, i, attr, attribute, "list", 0, doc->attr_activation); |
2550 | 67.2k | if (i == len) { |
2551 | 47.0k | break; |
2552 | 47.0k | } |
2553 | | |
2554 | 20.2k | pre = i; |
2555 | 20.2k | parse_block(inter, doc, work->data, len); |
2556 | 20.2k | } while (0); |
2557 | | |
2558 | 0 | parse_block(inter, doc, work->data + pre, end - pre); |
2559 | 67.2k | if (end == sublist) { |
2560 | 2.65k | parse_block(inter, doc, work->data + sublist, work->size - sublist); |
2561 | 2.65k | } |
2562 | 134k | } else { |
2563 | | /* intermediate render of inline li */ |
2564 | 134k | if (sublist && sublist < work->size) { |
2565 | 3.77k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2566 | 3.77k | len = parse_attributes(work->data, sublist, attr, attribute, "list", 0, doc->attr_activation); |
2567 | 3.77k | } else { |
2568 | 0 | len = sublist; |
2569 | 0 | } |
2570 | 3.77k | parse_inline(inter, doc, work->data, len); |
2571 | 3.77k | parse_block(inter, doc, work->data + sublist, work->size - sublist); |
2572 | 130k | } else { |
2573 | 130k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2574 | 130k | len = parse_attributes(work->data, work->size, attr, attribute, "list", 0, doc->attr_activation); |
2575 | 130k | } else { |
2576 | 0 | len = work->size; |
2577 | 0 | } |
2578 | 130k | parse_inline(inter, doc, work->data, len); |
2579 | 130k | } |
2580 | 134k | } |
2581 | | |
2582 | | /* render of li itself */ |
2583 | 201k | if (doc->md.listitem) { |
2584 | 201k | doc->ul_item_char = ul_item_char; |
2585 | 201k | doc->ol_numeral = ol_numeral; |
2586 | 201k | doc->md.listitem(ob, inter, attr, flags, &doc->data); |
2587 | 201k | doc->ol_numeral = NULL; |
2588 | 201k | doc->ul_item_char = 0; |
2589 | 201k | } |
2590 | | |
2591 | 201k | if (ol_numeral) hoedown_buffer_free(ol_numeral); |
2592 | | |
2593 | 201k | popbuf(doc, BUFFER_SPAN); |
2594 | 201k | popbuf(doc, BUFFER_SPAN); |
2595 | 201k | popbuf(doc, BUFFER_ATTRIBUTE); |
2596 | 201k | return beg; |
2597 | 236k | } |
2598 | | |
2599 | | /* parse_definition • parsing of a term/definition pair, assuming starting |
2600 | | * at start of line */ |
2601 | | static size_t |
2602 | | parse_definition(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags *flags, hoedown_buffer *attribute) |
2603 | 63.1k | { |
2604 | | /* end represents the position of the first line where definitions start */ |
2605 | 63.1k | size_t j = 0, k = 0, len = 0, end = prefix_dli(doc, data, size); |
2606 | 63.1k | if (end <= 0) { |
2607 | 19.6k | return 0; |
2608 | 19.6k | } |
2609 | 43.4k | hoedown_buffer *work = 0, *attr_work; |
2610 | | |
2611 | | |
2612 | | /* scan all the definition terms, rendering them to the output buffer |
2613 | | * the +1 is to account for the trailing newline on each term |
2614 | | * j is a counter keeping track of the beginning of each new term */ |
2615 | 43.4k | *flags |= HOEDOWN_LI_DT; |
2616 | 157k | while (j + 1 < end) { |
2617 | | /* find the end of the term (where the newline is) */ |
2618 | 9.14M | for(k = j + 1; k - 1 < end && data[k - 1] != '\n'; k++); |
2619 | | |
2620 | 113k | len = k - j; |
2621 | | |
2622 | 113k | if (is_empty(data + j, len)) { |
2623 | 3.05k | j = k; |
2624 | 3.05k | continue; |
2625 | 3.05k | } |
2626 | | |
2627 | 110k | work = newbuf(doc, BUFFER_BLOCK); |
2628 | 110k | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
2629 | | |
2630 | 110k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2631 | 110k | len = parse_attributes(data + j, len, attr_work, NULL, "", 1, doc->attr_activation); |
2632 | 110k | } |
2633 | | |
2634 | 110k | parse_inline(work, doc, data + j, len); |
2635 | | |
2636 | 110k | if (doc->md.listitem) { |
2637 | 110k | doc->md.listitem(ob, work, attr_work, flags, &doc->data); |
2638 | 110k | } |
2639 | | |
2640 | 110k | j = k; |
2641 | | |
2642 | 110k | popbuf(doc, BUFFER_BLOCK); |
2643 | 110k | popbuf(doc, BUFFER_ATTRIBUTE); |
2644 | 110k | } |
2645 | 43.4k | *flags &= ~HOEDOWN_LI_DT; |
2646 | | |
2647 | | /* scan all the definitions, rendering it to the output buffer */ |
2648 | 43.4k | *flags |= HOEDOWN_LI_DD; |
2649 | 147k | while (end < size) { |
2650 | 137k | j = parse_listitem(ob, doc, data + end, size - end, flags, attribute); |
2651 | 137k | if (j <= 0) { |
2652 | 34.1k | break; |
2653 | 34.1k | } |
2654 | 103k | end += j; |
2655 | 103k | } |
2656 | | |
2657 | 43.4k | *flags &= ~HOEDOWN_LI_DD; |
2658 | 43.4k | *flags &= ~HOEDOWN_LI_END; |
2659 | | |
2660 | 43.4k | return end; |
2661 | 63.1k | } |
2662 | | |
2663 | | /* parse_list • parsing ordered or unordered list block */ |
2664 | | static size_t |
2665 | | parse_list(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, hoedown_list_flags flags) |
2666 | 94.8k | { |
2667 | 94.8k | hoedown_buffer *work = 0; |
2668 | 94.8k | hoedown_buffer *attr = 0; |
2669 | 94.8k | size_t i = 0, j; |
2670 | | |
2671 | 94.8k | doc->list_depth++; |
2672 | | |
2673 | 94.8k | work = newbuf(doc, BUFFER_BLOCK); |
2674 | 94.8k | attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2675 | | |
2676 | 183k | while (i < size) { |
2677 | 161k | if (flags & HOEDOWN_LIST_DEFINITION) { |
2678 | 63.1k | j = parse_definition(work, doc, data + i, size - i, &flags, attr); |
2679 | 98.6k | } else { |
2680 | 98.6k | j = parse_listitem(work, doc, data + i, size - i, &flags, attr); |
2681 | 98.6k | } |
2682 | 161k | i += j; |
2683 | | |
2684 | 161k | if (!j || (flags & HOEDOWN_LI_END)) |
2685 | 73.0k | break; |
2686 | 161k | } |
2687 | | |
2688 | 94.8k | if (doc->md.list) |
2689 | 94.8k | doc->md.list(ob, work, attr, flags, &doc->data); |
2690 | 94.8k | popbuf(doc, BUFFER_BLOCK); |
2691 | 94.8k | popbuf(doc, BUFFER_ATTRIBUTE); |
2692 | | |
2693 | 94.8k | doc->list_depth--; |
2694 | | |
2695 | 94.8k | return i; |
2696 | 94.8k | } |
2697 | | |
2698 | | /* parse_atxheader • parsing of atx-style headers */ |
2699 | | static size_t |
2700 | | parse_atxheader(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
2701 | 10.0k | { |
2702 | 10.0k | size_t level = 0; |
2703 | 10.0k | size_t i, end, skip; |
2704 | | |
2705 | 22.3k | while (level < size && level < 6 && data[level] == '#') |
2706 | 12.3k | level++; |
2707 | | |
2708 | 46.2k | for (i = level; i < size && data[i] == ' '; i++); |
2709 | | |
2710 | 10.0M | for (end = i; end < size && data[end] != '\n'; end++); |
2711 | 10.0k | skip = end; |
2712 | | |
2713 | 11.4k | while (end && data[end - 1] == '#') |
2714 | 1.36k | end--; |
2715 | | |
2716 | 39.0k | while (end && data[end - 1] == ' ') |
2717 | 29.0k | end--; |
2718 | | |
2719 | 10.0k | if (end > i) { |
2720 | 9.02k | hoedown_buffer *work = newbuf(doc, BUFFER_SPAN); |
2721 | 9.02k | hoedown_buffer *attr = newbuf(doc, BUFFER_ATTRIBUTE); |
2722 | 9.02k | size_t len; |
2723 | | |
2724 | 9.02k | len = end - i; |
2725 | 9.02k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
2726 | 9.02k | len = parse_attributes(data + i, end - i, attr, NULL, "", 1, doc->attr_activation); |
2727 | 9.02k | } |
2728 | | |
2729 | 9.02k | parse_inline(work, doc, data + i, len); |
2730 | | |
2731 | 9.02k | if (doc->md.header) { |
2732 | 9.02k | doc->header_type = HOEDOWN_HEADER_ATX; |
2733 | 9.02k | doc->md.header(ob, work, attr, (int)level, &doc->data); |
2734 | 9.02k | doc->header_type = HOEDOWN_HEADER_NONE; |
2735 | 9.02k | } |
2736 | | |
2737 | 9.02k | popbuf(doc, BUFFER_SPAN); |
2738 | 9.02k | popbuf(doc, BUFFER_ATTRIBUTE); |
2739 | 9.02k | } else { |
2740 | 1.01k | doc->md.header(ob, NULL, NULL, (int)level, &doc->data); |
2741 | 1.01k | } |
2742 | | |
2743 | 10.0k | return skip; |
2744 | 10.0k | } |
2745 | | |
2746 | | /* parse_footnote_def • parse a single footnote definition */ |
2747 | | static void |
2748 | | parse_footnote_def(hoedown_buffer *ob, hoedown_document *doc, unsigned int num, const hoedown_buffer *name, uint8_t *data, size_t size) |
2749 | 755 | { |
2750 | 755 | hoedown_buffer *work = 0; |
2751 | 755 | work = newbuf(doc, BUFFER_SPAN); |
2752 | 755 | doc->footnote_id = name; |
2753 | | |
2754 | 755 | parse_block(work, doc, data, size); |
2755 | | |
2756 | 755 | if (doc->md.footnote_def) |
2757 | 755 | doc->md.footnote_def(ob, work, num, &doc->data); |
2758 | | |
2759 | 755 | doc->footnote_id = NULL; |
2760 | 755 | popbuf(doc, BUFFER_SPAN); |
2761 | 755 | } |
2762 | | |
2763 | | /* parse_footnote_list • render the contents of the footnotes */ |
2764 | | static void |
2765 | | parse_footnote_list(hoedown_buffer *ob, hoedown_document *doc, struct footnote_list *footnotes) |
2766 | 10.1k | { |
2767 | 10.1k | hoedown_buffer *work = 0; |
2768 | 10.1k | struct footnote_item *item; |
2769 | 10.1k | struct footnote_ref *ref; |
2770 | | |
2771 | 10.1k | if (footnotes->count == 0) |
2772 | 9.61k | return; |
2773 | | |
2774 | 582 | work = newbuf(doc, BUFFER_BLOCK); |
2775 | | |
2776 | 582 | item = footnotes->head; |
2777 | 1.33k | while (item) { |
2778 | 755 | ref = item->ref; |
2779 | 755 | parse_footnote_def(work, doc, ref->num, ref->name, ref->contents->data, ref->contents->size); |
2780 | 755 | item = item->next; |
2781 | 755 | } |
2782 | | |
2783 | 582 | if (doc->md.footnotes) |
2784 | 582 | doc->md.footnotes(ob, work, &doc->data); |
2785 | 582 | popbuf(doc, BUFFER_BLOCK); |
2786 | 582 | } |
2787 | | |
2788 | | /* htmlblock_is_end • check for end of HTML block : </tag>( *)\n */ |
2789 | | /* returns tag length on match, 0 otherwise */ |
2790 | | /* assumes data starts with "<" */ |
2791 | | static size_t |
2792 | | htmlblock_is_end( |
2793 | | const char *tag, |
2794 | | size_t tag_len, |
2795 | | hoedown_document *doc, |
2796 | | uint8_t *data, |
2797 | | size_t size) |
2798 | 2.80M | { |
2799 | 2.80M | size_t i = tag_len + 3, w; |
2800 | | |
2801 | | /* try to match the end tag */ |
2802 | | /* note: we're not considering tags like "</tag >" which are still valid */ |
2803 | 2.80M | if (i > size || |
2804 | 2.80M | data[1] != '/' || |
2805 | 2.80M | strncasecmp((char *)data + 2, tag, tag_len) != 0 || |
2806 | 2.80M | data[tag_len + 2] != '>') |
2807 | 2.25M | return 0; |
2808 | | |
2809 | | /* rest of the line must be empty */ |
2810 | 543k | if ((w = is_empty(data + i, size - i)) == 0 && i < size) |
2811 | 68.9k | return 0; |
2812 | | |
2813 | 474k | return i + w; |
2814 | 543k | } |
2815 | | |
2816 | | /* htmlblock_find_end • try to find HTML block ending tag */ |
2817 | | /* returns the length on match, 0 otherwise */ |
2818 | | static size_t |
2819 | | htmlblock_find_end( |
2820 | | const char *tag, |
2821 | | size_t tag_len, |
2822 | | hoedown_document *doc, |
2823 | | uint8_t *data, |
2824 | | size_t size) |
2825 | 8.03M | { |
2826 | 8.03M | size_t i = 0, w; |
2827 | | |
2828 | 10.3M | while (1) { |
2829 | 1.32G | while (i < size && data[i] != '<') i++; |
2830 | 10.3M | if (i >= size) return 0; |
2831 | | |
2832 | 2.80M | w = htmlblock_is_end(tag, tag_len, doc, data + i, size - i); |
2833 | 2.80M | if (w) return i + w; |
2834 | 2.32M | i++; |
2835 | 2.32M | } |
2836 | 8.03M | } |
2837 | | |
2838 | | /* htmlblock_find_end_strict • try to find end of HTML block in strict mode */ |
2839 | | /* (it must have a blank line or a new HTML tag afterwards) */ |
2840 | | /* returns the length on match, 0 otherwise */ |
2841 | | static size_t |
2842 | | htmlblock_find_end_strict( |
2843 | | const char *tag, |
2844 | | size_t tag_len, |
2845 | | hoedown_document *doc, |
2846 | | uint8_t *data, |
2847 | | size_t size) |
2848 | 10.5k | { |
2849 | 10.5k | size_t i = 0, mark; |
2850 | | |
2851 | 8.03M | while (1) { |
2852 | 8.03M | mark = i; |
2853 | 783M | while (i < size && data[i] != '\n') i++; |
2854 | 8.03M | if (i < size) i++; |
2855 | 8.03M | if (i == mark) return 0; |
2856 | | |
2857 | 8.02M | mark += htmlblock_find_end(tag, tag_len, doc, data + mark, i - mark); |
2858 | 8.02M | if (mark == i && (is_empty(data + i, size - i) || (i + 1 < size && data[i] == '<' && data[i + 1] != '/') || i >= size)) break; |
2859 | 8.02M | } |
2860 | | |
2861 | 1.25k | return i; |
2862 | 10.5k | } |
2863 | | |
2864 | | /* parse_htmlblock • parsing of inline HTML block */ |
2865 | | static size_t |
2866 | | parse_htmlblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size, int do_render) |
2867 | 201k | { |
2868 | 201k | hoedown_buffer work = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
2869 | 201k | size_t i, j = 0, tag_len, tag_end; |
2870 | 201k | const char *curtag = NULL; |
2871 | 201k | int meta = 0; |
2872 | | |
2873 | 201k | work.data = data; |
2874 | | |
2875 | | /* identification of the opening tag */ |
2876 | 201k | if (size < 2 || data[0] != '<') |
2877 | 196 | return 0; |
2878 | | |
2879 | 200k | i = 1; |
2880 | 20.2M | while (i < size && data[i] != '>' && data[i] != ' ') |
2881 | 20.0M | i++; |
2882 | | |
2883 | 200k | if (i < size) { |
2884 | 194k | if (doc->ext_flags & HOEDOWN_EXT_HTML5_BLOCKS) |
2885 | 194k | curtag = hoedown_find_html5_block_tag((char *)data + 1, (int)i - 1); |
2886 | 0 | else |
2887 | 0 | curtag = hoedown_find_block_tag((char *)data + 1, (int)i - 1); |
2888 | 194k | } |
2889 | | |
2890 | | /* handling of special cases */ |
2891 | 200k | if (!curtag) { |
2892 | | |
2893 | | /* HTML comment, laxist form */ |
2894 | 190k | if (size > 5 && data[1] == '!' && data[2] == '-' && data[3] == '-') { |
2895 | 27.2k | i = 5; |
2896 | | |
2897 | 27.2k | if (data[4] == '*') { |
2898 | 334 | meta++; |
2899 | 334 | } |
2900 | | |
2901 | 102M | while (i < size && !(data[i - 2] == '-' && data[i - 1] == '-' && data[i] == '>')) |
2902 | 102M | i++; |
2903 | | |
2904 | 27.2k | if (data[i - 3] == '*') { |
2905 | 827 | meta++; |
2906 | 827 | } |
2907 | | |
2908 | 27.2k | i++; |
2909 | | |
2910 | 27.2k | if (i < size) |
2911 | 24.7k | j = is_empty(data + i, size - i); |
2912 | | |
2913 | 27.2k | if (j) { |
2914 | 1.13k | work.size = i + j; |
2915 | | |
2916 | 1.13k | if (do_render && doc->ext_flags & HOEDOWN_EXT_META_BLOCK && |
2917 | 1.13k | meta == 2 && doc->meta) { |
2918 | 0 | size_t org, sz; |
2919 | |
|
2920 | 0 | sz = work.size - 5; |
2921 | 0 | while (sz > 0 && work.data[sz - 1] == '\n') { |
2922 | 0 | sz--; |
2923 | 0 | } |
2924 | |
|
2925 | 0 | org = 5; |
2926 | 0 | while (org < sz && work.data[org] == '\n') { |
2927 | 0 | org++; |
2928 | 0 | } |
2929 | |
|
2930 | 0 | if (org < sz) { |
2931 | 0 | hoedown_buffer_put(doc->meta, work.data + org, sz - org); |
2932 | 0 | hoedown_buffer_putc(doc->meta, '\n'); |
2933 | 0 | } |
2934 | 1.13k | } else if (do_render && doc->md.blockhtml) { |
2935 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2936 | 0 | } |
2937 | 1.13k | return work.size; |
2938 | 1.13k | } |
2939 | 27.2k | } |
2940 | | |
2941 | | /* HR, which is the only self-closing block tag considered */ |
2942 | 189k | if (size > 4 && (data[1] == 'h' || data[1] == 'H') && (data[2] == 'r' || data[2] == 'R')) { |
2943 | 8.89k | i = 3; |
2944 | 33.9M | while (i < size && data[i] != '>') |
2945 | 33.8M | i++; |
2946 | | |
2947 | 8.89k | if (i + 1 < size) { |
2948 | 8.09k | i++; |
2949 | 8.09k | j = is_empty(data + i, size - i); |
2950 | 8.09k | if (j) { |
2951 | 791 | work.size = i + j; |
2952 | 791 | if (do_render && doc->md.blockhtml) |
2953 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2954 | 791 | return work.size; |
2955 | 791 | } |
2956 | 8.09k | } |
2957 | 8.89k | } |
2958 | | |
2959 | | /* Extension script tags */ |
2960 | 188k | if (doc->ext_flags & HOEDOWN_EXT_SCRIPT_TAGS) { |
2961 | 188k | i = script_tag_length(data, size); |
2962 | 188k | if (i) { |
2963 | 9.26k | if (i < size) { |
2964 | 6.41k | j = is_empty(data + i, size - i); |
2965 | 6.41k | } |
2966 | | |
2967 | 9.26k | if (j) { |
2968 | 579 | work.size = i + j; |
2969 | 579 | if (do_render && doc->md.blockhtml) { |
2970 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2971 | 0 | } |
2972 | 579 | return work.size; |
2973 | 579 | } |
2974 | 9.26k | } |
2975 | | |
2976 | 188k | } |
2977 | | |
2978 | | /* no special case recognised */ |
2979 | 187k | return 0; |
2980 | 188k | } |
2981 | | |
2982 | | /* looking for a matching closing tag in strict mode */ |
2983 | 10.5k | tag_len = strlen(curtag); |
2984 | 10.5k | tag_end = htmlblock_find_end_strict(curtag, tag_len, doc, data, size); |
2985 | | |
2986 | | /* if not found, trying a second pass looking for indented match */ |
2987 | | /* but not if tag is "ins" or "del" (following original Markdown.pl) */ |
2988 | 10.5k | if (!tag_end && strcmp(curtag, "ins") != 0 && strcmp(curtag, "del") != 0) |
2989 | 8.29k | tag_end = htmlblock_find_end(curtag, tag_len, doc, data, size); |
2990 | | |
2991 | 10.5k | if (!tag_end) |
2992 | 8.18k | return 0; |
2993 | | |
2994 | | /* the end of the block has been found */ |
2995 | 2.38k | work.size = tag_end; |
2996 | 2.38k | if (do_render && doc->md.blockhtml) |
2997 | 0 | doc->md.blockhtml(ob, &work, &doc->data); |
2998 | | |
2999 | 2.38k | return tag_end; |
3000 | 10.5k | } |
3001 | | |
3002 | | /* Common function to parse table main rows and continued rows. */ |
3003 | | static size_t |
3004 | | parse_table_cell_line( |
3005 | | hoedown_buffer *ob, |
3006 | | uint8_t *data, |
3007 | | size_t size, |
3008 | | size_t offset, |
3009 | | char separator, |
3010 | 57.5k | int is_continuation) { |
3011 | 57.5k | size_t pos, line_end, cell_start, cell_end, len, copy_start, copy_end; |
3012 | | |
3013 | 57.5k | pos = offset; |
3014 | | |
3015 | 113k | while (pos < size && _isspace(data[pos])) pos++; |
3016 | | |
3017 | 57.5k | cell_start = pos; |
3018 | | |
3019 | 57.5k | line_end = pos; |
3020 | 15.6M | while (line_end < size && data[line_end] != '\n') line_end++; |
3021 | 57.5k | len = find_separator_char(data + pos, line_end - pos, separator); |
3022 | | |
3023 | | /* Two possibilities for len == 0: |
3024 | | 1) No more separator char found in the current line. |
3025 | | 2) The next separator is right after the current one, i.e. empty cell. |
3026 | | For case 1, we skip to the end of line; for case 2 we just continue. |
3027 | | */ |
3028 | 57.5k | if (len == 0 && pos < size && data[pos] != separator) { |
3029 | 1.68M | while (pos + len < size && data[pos + len] != '\n') len++; |
3030 | 23.5k | } |
3031 | 57.5k | pos += len; |
3032 | | |
3033 | 57.5k | cell_end = pos - 1; |
3034 | | |
3035 | 397k | while (cell_end > cell_start && _isspace(data[cell_end])) |
3036 | 339k | cell_end--; |
3037 | | |
3038 | | /* If this isn't the first line of the cell, add a new line before the |
3039 | | extra cell contents, to separate them (and make backslash linebreaks |
3040 | | work). |
3041 | | */ |
3042 | 57.5k | if (is_continuation) hoedown_buffer_putc(ob, '\n'); |
3043 | | |
3044 | | /* Remove escaping from pipes */ |
3045 | 57.5k | copy_start = copy_end = cell_start; |
3046 | 13.4M | while (copy_end < cell_end + 1) { |
3047 | 13.3M | if (data[copy_end] == separator && copy_end > copy_start && data[copy_end - 1] == '\\') { |
3048 | 3.01k | hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start - 1); |
3049 | 3.01k | copy_start = copy_end; |
3050 | 3.01k | } |
3051 | 13.3M | copy_end++; |
3052 | 13.3M | } |
3053 | 57.5k | hoedown_buffer_put(ob, data + copy_start, copy_end - copy_start); |
3054 | | |
3055 | 57.5k | return pos - offset; |
3056 | 57.5k | } |
3057 | | |
3058 | | static void |
3059 | | parse_table_row( |
3060 | | hoedown_buffer *ob, |
3061 | | hoedown_document *doc, |
3062 | | uint8_t *data, |
3063 | | size_t size, |
3064 | | size_t columns, |
3065 | | size_t rows, |
3066 | | hoedown_table_flags *col_data, |
3067 | | hoedown_table_flags header_flag) |
3068 | 110k | { |
3069 | 110k | size_t i = 0, col; |
3070 | 110k | hoedown_buffer *row_work = 0; |
3071 | | |
3072 | 110k | if (!doc->md.table_cell || !doc->md.table_row) |
3073 | 84.9k | return; |
3074 | | |
3075 | 25.8k | row_work = newbuf(doc, BUFFER_SPAN); |
3076 | | |
3077 | | /* skip optional first pipe */ |
3078 | 25.8k | if (i < size && data[i] == '|') |
3079 | 13.1k | i++; |
3080 | | |
3081 | 59.8k | for (col = 0; col < columns && i < size; ++col) { |
3082 | 33.9k | size_t pos, extra_rows_in_cell; |
3083 | 33.9k | hoedown_buffer *cell_content; |
3084 | 33.9k | hoedown_buffer *cell_work; |
3085 | | |
3086 | | /* cell_content is the text that is inline parsed into cell_work. It |
3087 | | consists of the values of this cell from each row, concatenated and |
3088 | | separated by new lines. |
3089 | | */ |
3090 | 33.9k | cell_content = newbuf(doc, BUFFER_SPAN); |
3091 | 33.9k | cell_work = newbuf(doc, BUFFER_SPAN); |
3092 | | |
3093 | 33.9k | i += parse_table_cell_line(cell_content, data, size, i, '|', 0 /* is_contination */); |
3094 | | |
3095 | | /* Add extra rows of the cell. This only occurs if rows is greater than 0, |
3096 | | which only happens when multiline tables are enabled. |
3097 | | |
3098 | | Each extra row is a colon, followed by cell contents for the continued |
3099 | | row, separated by colons. |
3100 | | */ |
3101 | 33.9k | extra_rows_in_cell = rows - 1; |
3102 | 33.9k | pos = i; |
3103 | 57.5k | while (extra_rows_in_cell > 0 && pos < size) { |
3104 | 23.6k | size_t c; |
3105 | | |
3106 | | /* seek to the end of the current row */ |
3107 | 2.47M | while (pos < size && data[pos] != '\n') { |
3108 | 2.45M | pos++; |
3109 | 2.45M | } |
3110 | | |
3111 | | /* skip new line and leading colon */ |
3112 | 23.6k | if (pos < size) pos++; |
3113 | 23.6k | if (pos < size) pos++; |
3114 | | |
3115 | | /* Seek to the beginning of the correct column on the continuation line. |
3116 | | * The continuation line should have the expected number of columns, and |
3117 | | * so we never expect pos >= size or data[pos] == '\n'. These checks serve |
3118 | | * as defense in depth against wrong preconditions. */ |
3119 | 42.9k | for (c = 0; c < col; c++) { |
3120 | 3.71M | while (pos < size && data[pos] != '\n' && (is_backslashed(data, pos) || data[pos] != ':')) |
3121 | 3.69M | pos++; |
3122 | 19.3k | if (pos < size && data[pos] == ':') pos++; /* skip colon */ |
3123 | 19.3k | } |
3124 | | |
3125 | 23.6k | parse_table_cell_line(cell_content, data, size, pos, ':', 1 /* is_contination */); |
3126 | | |
3127 | 23.6k | extra_rows_in_cell--; |
3128 | 23.6k | } |
3129 | | |
3130 | 33.9k | parse_inline(cell_work, doc, cell_content->data, cell_content->size); |
3131 | | |
3132 | 33.9k | doc->md.table_cell(row_work, cell_work, col_data[col] | header_flag, &doc->data); |
3133 | | |
3134 | 33.9k | popbuf(doc, BUFFER_SPAN); |
3135 | 33.9k | popbuf(doc, BUFFER_SPAN); |
3136 | 33.9k | i++; |
3137 | 33.9k | } |
3138 | | |
3139 | 29.3k | for (; col < columns; ++col) { |
3140 | 3.51k | hoedown_buffer empty_cell = { 0, 0, 0, 0, NULL, NULL, NULL }; |
3141 | 3.51k | doc->md.table_cell(row_work, &empty_cell, col_data[col] | header_flag, &doc->data); |
3142 | 3.51k | } |
3143 | | |
3144 | 25.8k | doc->md.table_row(ob, row_work, &doc->data); |
3145 | | |
3146 | 25.8k | popbuf(doc, BUFFER_SPAN); |
3147 | 25.8k | } |
3148 | | |
3149 | | static size_t |
3150 | | parse_table_header( |
3151 | | hoedown_buffer *ob, |
3152 | | hoedown_buffer *attr, |
3153 | | hoedown_document *doc, |
3154 | | uint8_t *data, |
3155 | | size_t size, |
3156 | | size_t *columns, |
3157 | | hoedown_table_flags **column_data) |
3158 | 4.47M | { |
3159 | 4.47M | int pipes, rows; |
3160 | 4.47M | size_t i = 0, col, header_end, under_end; |
3161 | 4.47M | hoedown_buffer *header_contents = 0; |
3162 | | |
3163 | 4.47M | pipes = 0; |
3164 | 321M | while (i < size && data[i] != '\n') { |
3165 | 316M | if (!is_backslashed(data, i) && data[i] == '|') { |
3166 | 9.06M | pipes++; |
3167 | 9.06M | } |
3168 | 316M | i++; |
3169 | 316M | } |
3170 | | |
3171 | 4.47M | if (i == size || pipes == 0) |
3172 | 4.24M | return 0; |
3173 | | |
3174 | 229k | header_end = i; |
3175 | | |
3176 | 730k | while (header_end > 0 && _isspace(data[header_end - 1])) |
3177 | 500k | header_end--; |
3178 | | |
3179 | 229k | if (data[0] == '|') |
3180 | 96.9k | pipes--; |
3181 | | |
3182 | 229k | if (header_end && data[header_end - 1] == '|' && !is_backslashed(data, header_end - 1)) |
3183 | 86.0k | pipes--; |
3184 | | |
3185 | 229k | if (doc->ext_flags & HOEDOWN_EXT_SPECIAL_ATTRIBUTE) { |
3186 | 229k | size_t n = parse_attributes(data, header_end, attr, NULL, "", 1, doc->attr_activation); |
3187 | | /* n == header_end when no attribute is found */ |
3188 | 229k | if (n != header_end) { |
3189 | 19.5k | while (n > 0 && _isspace(data[n - 1])) |
3190 | 0 | n--; |
3191 | 19.5k | if (attr->size && n && data[n - 1] == '|' && !is_backslashed(data, n - 1)) |
3192 | 2.45k | pipes--; |
3193 | | |
3194 | 19.5k | header_end = n + 1; |
3195 | 19.5k | } |
3196 | 229k | } |
3197 | | |
3198 | 229k | if (pipes < 0) |
3199 | 44.1k | return 0; |
3200 | | |
3201 | | /* header_contents will have the lines of the header copied into it, and then |
3202 | | is passed to parse_table_row. We need a separate buffer to avoid passing |
3203 | | the attribute to parse_table_row. |
3204 | | */ |
3205 | 185k | header_contents = newbuf(doc, BUFFER_SPAN); |
3206 | 185k | hoedown_buffer_put(header_contents, data, header_end); |
3207 | | |
3208 | 185k | *columns = pipes + 1; |
3209 | 185k | *column_data = hoedown_calloc(*columns, sizeof(hoedown_table_flags)); |
3210 | | |
3211 | | /* If the multiline table extension is enabled, check the next lines for |
3212 | | continuation markers, to find the number of text rows that make up this |
3213 | | logical row, and copy the contents of each row to header_contents, |
3214 | | separated by new lines. |
3215 | | */ |
3216 | 185k | rows = 1; |
3217 | 185k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3218 | 192k | while (i < size) { |
3219 | 192k | size_t j = i + 1; |
3220 | 192k | int colons = 0; |
3221 | | |
3222 | | /* Require that the continuation line starts with a colon */ |
3223 | 192k | if (j >= size || data[j] != ':') break; |
3224 | | /* Skip the leading colon to match the pipe counting behavior above */ |
3225 | 28.3k | j++; |
3226 | | |
3227 | | /* Require that the continuation line start with ": ", to |
3228 | | distinguish from ":-" which could start a left-aligned header |
3229 | | bar. |
3230 | | */ |
3231 | 28.3k | if (j >= size || data[j] != ' ') break; |
3232 | | |
3233 | 14.1M | while (j < size && data[j] != '\n') { |
3234 | 14.1M | j++; |
3235 | 14.1M | if (j < size && !is_backslashed(data, j) && data[j] == ':') |
3236 | 447k | colons++; |
3237 | 14.1M | } |
3238 | | |
3239 | | /* Allow a trailing colon to match the pipe counting behavior above */ |
3240 | 11.6k | if (!is_backslashed(data, j - 1) && data[j - 1] == ':') |
3241 | 766 | colons--; |
3242 | | |
3243 | 11.6k | if (colons != pipes) break; |
3244 | | |
3245 | 7.60k | hoedown_buffer_putc(header_contents, '\n'); |
3246 | | /* data[i] is the previous new line, and data[j] is the next new |
3247 | | line. This copies all the text between the new lines. |
3248 | | */ |
3249 | 7.60k | hoedown_buffer_put(header_contents, data + i + 1, j - i - 1); |
3250 | | |
3251 | 7.60k | rows++; |
3252 | 7.60k | i = j; |
3253 | 7.60k | header_end = j; |
3254 | 7.60k | } |
3255 | 185k | } |
3256 | | |
3257 | | /* Parse the header underline */ |
3258 | 185k | i++; |
3259 | 185k | if (i < size && data[i] == '|') |
3260 | 20.8k | i++; |
3261 | | |
3262 | 185k | under_end = i; |
3263 | 41.4M | while (under_end < size && data[under_end] != '\n') |
3264 | 41.2M | under_end++; |
3265 | | |
3266 | 235k | for (col = 0; col < *columns && i < under_end; ++col) { |
3267 | 189k | size_t dashes = 0; |
3268 | | |
3269 | 394k | while (i < under_end && data[i] == ' ') |
3270 | 204k | i++; |
3271 | | |
3272 | 189k | if (i < under_end && data[i] == ':') { |
3273 | 35.6k | i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_LEFT; |
3274 | 35.6k | dashes++; |
3275 | 35.6k | } |
3276 | | |
3277 | 337k | while (i < under_end && data[i] == '-') { |
3278 | 148k | i++; dashes++; |
3279 | 148k | } |
3280 | | |
3281 | 189k | if (i < under_end && data[i] == ':') { |
3282 | 31.3k | i++; (*column_data)[col] |= HOEDOWN_TABLE_ALIGN_RIGHT; |
3283 | 31.3k | dashes++; |
3284 | 31.3k | } |
3285 | | |
3286 | 287k | while (i < under_end && data[i] == ' ') |
3287 | 98.6k | i++; |
3288 | | |
3289 | 189k | if (i < under_end && data[i] != '|' && data[i] != '+') |
3290 | 102k | break; |
3291 | | |
3292 | 86.9k | if (dashes < 3) |
3293 | 36.7k | break; |
3294 | | |
3295 | 50.2k | i++; |
3296 | 50.2k | } |
3297 | | |
3298 | 185k | if (col < *columns) { |
3299 | | /* clean up header_contents */ |
3300 | 155k | popbuf(doc, BUFFER_SPAN); |
3301 | 155k | return 0; |
3302 | 155k | } |
3303 | | |
3304 | 29.5k | parse_table_row( |
3305 | 29.5k | ob, doc, header_contents->data, |
3306 | 29.5k | header_contents->size, |
3307 | 29.5k | *columns, |
3308 | 29.5k | rows, |
3309 | 29.5k | *column_data, |
3310 | 29.5k | HOEDOWN_TABLE_HEADER |
3311 | 29.5k | ); |
3312 | | |
3313 | | /* clean up header_contents */ |
3314 | 29.5k | popbuf(doc, BUFFER_SPAN); |
3315 | | |
3316 | 29.5k | return under_end + 1; |
3317 | 185k | } |
3318 | | |
3319 | | static size_t |
3320 | | parse_table( |
3321 | | hoedown_buffer *ob, |
3322 | | hoedown_document *doc, |
3323 | | uint8_t *data, |
3324 | | size_t size) |
3325 | 4.47M | { |
3326 | 4.47M | size_t i; |
3327 | | |
3328 | 4.47M | hoedown_buffer *work = 0; |
3329 | 4.47M | hoedown_buffer *header_work = 0; |
3330 | 4.47M | hoedown_buffer *body_work = 0; |
3331 | 4.47M | hoedown_buffer *attr_work = 0; |
3332 | | |
3333 | 4.47M | size_t columns; |
3334 | 4.47M | hoedown_table_flags *col_data = NULL; |
3335 | | |
3336 | 4.47M | work = newbuf(doc, BUFFER_BLOCK); |
3337 | 4.47M | header_work = newbuf(doc, BUFFER_SPAN); |
3338 | 4.47M | body_work = newbuf(doc, BUFFER_BLOCK); |
3339 | 4.47M | attr_work = newbuf(doc, BUFFER_ATTRIBUTE); |
3340 | 4.47M | i = parse_table_header(header_work, attr_work, doc, data, size, &columns, &col_data); |
3341 | 4.47M | if (i > 0) { |
3342 | | |
3343 | 110k | while (i < size) { |
3344 | 100k | size_t row_start; |
3345 | 100k | size_t pipes = 0; |
3346 | 100k | size_t rows = 1; |
3347 | | |
3348 | 100k | row_start = i; |
3349 | | |
3350 | 61.4M | while (i < size && data[i] != '\n') { |
3351 | 61.3M | if (data[i] == '|' && !is_backslashed(data, i)) pipes++; |
3352 | 61.3M | i++; |
3353 | 61.3M | } |
3354 | | |
3355 | 100k | if (pipes == 0 || i == size) { |
3356 | 19.1k | i = row_start; |
3357 | 19.1k | break; |
3358 | 19.1k | } |
3359 | | |
3360 | | /* Don't count a leading pipe. */ |
3361 | 81.1k | if (data[row_start] == '|') |
3362 | 30.3k | pipes--; |
3363 | | |
3364 | | /* Don't count a trailing pipe. */ |
3365 | 81.1k | if (data[i - 1] == '|' && !is_backslashed(data, i - 1)) |
3366 | 9.47k | pipes--; |
3367 | | |
3368 | | /* If the multiline table extension is enabled, check the next |
3369 | | lines for continuation markers, to find the number of text rows |
3370 | | that make up this logical row. |
3371 | | */ |
3372 | 81.1k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3373 | 98.1k | while (i < size) { |
3374 | 97.9k | size_t j = i + 1; |
3375 | 97.9k | size_t colons = 0; |
3376 | | |
3377 | | /* Require that a continued row starts with a colon. */ |
3378 | 97.9k | if (j >= size || data[j] != ':') break; |
3379 | | |
3380 | | /* Don't count leading colon for comparison to pipes. */ |
3381 | 22.3k | j++; |
3382 | | |
3383 | 31.2M | while (j < size && data[j] != '\n') { |
3384 | 31.2M | if (!is_backslashed(data, j) && data[j] == ':') |
3385 | 1.69M | colons++; |
3386 | 31.2M | j++; |
3387 | 31.2M | } |
3388 | | |
3389 | | /* Don't count a trailing colon for comparison to pipes. */ |
3390 | 22.3k | if (!is_backslashed(data, j - 1) && data[j - 1] == ':') |
3391 | 1.98k | colons--; |
3392 | | |
3393 | | /* Hoedown allows table rows where the number of cells is different |
3394 | | * from `columns`. In this case, `parse_table_row` will add empty |
3395 | | * cells. However, the code does not work in the multi-line case, so |
3396 | | * we require the right number of columns. */ |
3397 | 22.3k | if (colons != pipes || colons + 1 != columns) break; |
3398 | | |
3399 | 16.9k | rows++; |
3400 | 16.9k | i = j; |
3401 | 16.9k | } |
3402 | 81.1k | } |
3403 | | |
3404 | 81.1k | parse_table_row( |
3405 | 81.1k | body_work, |
3406 | 81.1k | doc, |
3407 | 81.1k | data + row_start, |
3408 | 81.1k | i - row_start, |
3409 | 81.1k | columns, |
3410 | 81.1k | rows, |
3411 | 81.1k | col_data, 0 |
3412 | 81.1k | ); |
3413 | | |
3414 | 81.1k | i++; |
3415 | | |
3416 | | /* Skip an optional row separator, if it's there. */ |
3417 | 81.1k | if ((doc->ext_flags & HOEDOWN_EXT_MULTILINE_TABLES) != 0) { |
3418 | | /* Use j instead of i, and set i to j only if this is actually a row separator. */ |
3419 | 81.1k | size_t j = i, next_line_end = i, col; |
3420 | | |
3421 | | /* Seek next_line_end to the position of the terminating new line. */ |
3422 | 60.3M | while (next_line_end < size && data[next_line_end] != '\n') |
3423 | 60.2M | next_line_end++; |
3424 | | |
3425 | | /* Skip leading pipe, if any. */ |
3426 | 81.1k | if (j < next_line_end && data[j] == '|') |
3427 | 18.2k | j++; |
3428 | | |
3429 | | /* Ensure that there are at least columns pipe/plus separated |
3430 | | runs of dashes, each at least 3 long. The pipes may be |
3431 | | padded with spaces, and the line may end in a pipe. |
3432 | | */ |
3433 | 84.2k | for (col = 0; col < columns && j < next_line_end; col++) { |
3434 | 70.4k | size_t dashes = 0; |
3435 | | |
3436 | 140k | while (j < next_line_end && data[j] == ' ') |
3437 | 70.3k | j++; |
3438 | | |
3439 | 114k | while (j < next_line_end && data[j] == '-') { |
3440 | 44.0k | j++; |
3441 | 44.0k | dashes++; |
3442 | 44.0k | } |
3443 | | |
3444 | 91.0k | while (j < next_line_end && data[j] == ' ') |
3445 | 20.6k | j++; |
3446 | | |
3447 | 70.4k | if (j < next_line_end && data[j] != '|' && data[j] != '+') |
3448 | 42.3k | break; |
3449 | | |
3450 | 28.1k | if (dashes < 3) |
3451 | 25.0k | break; |
3452 | | |
3453 | 3.08k | j++; |
3454 | 3.08k | } |
3455 | | |
3456 | | /* Skip i past the row separator, if it was valid. */ |
3457 | 81.1k | if (col == columns) |
3458 | 2.90k | i = next_line_end + 1; |
3459 | 81.1k | } |
3460 | 81.1k | } |
3461 | | |
3462 | 29.5k | if (doc->md.table_header) |
3463 | 11.7k | doc->md.table_header(work, header_work, &doc->data); |
3464 | | |
3465 | 29.5k | if (doc->md.table_body) |
3466 | 11.7k | doc->md.table_body(work, body_work, &doc->data); |
3467 | | |
3468 | 29.5k | if (doc->md.table) |
3469 | 11.7k | doc->md.table(ob, work, attr_work, &doc->data); |
3470 | 29.5k | } |
3471 | | |
3472 | 4.47M | free(col_data); |
3473 | 4.47M | popbuf(doc, BUFFER_SPAN); |
3474 | 4.47M | popbuf(doc, BUFFER_BLOCK); |
3475 | 4.47M | popbuf(doc, BUFFER_BLOCK); |
3476 | 4.47M | popbuf(doc, BUFFER_ATTRIBUTE); |
3477 | 4.47M | return i; |
3478 | 4.47M | } |
3479 | | |
3480 | | /* parse_userblock • parsing of user block */ |
3481 | | static size_t |
3482 | | parse_userblock(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
3483 | 0 | { |
3484 | 0 | hoedown_buffer work = { 0, 0, 0, 0, NULL, NULL, NULL }; |
3485 | 0 | size_t len = doc->user_block(data, size, &doc->data); |
3486 | |
|
3487 | 0 | if (!len) { |
3488 | 0 | return 0; |
3489 | 0 | } |
3490 | | |
3491 | 0 | work.data = data; |
3492 | 0 | work.size = len; |
3493 | |
|
3494 | 0 | if (doc->md.user_block) { |
3495 | 0 | doc->md.user_block(ob, &work, &doc->data); |
3496 | 0 | } else { |
3497 | 0 | hoedown_buffer_put(ob, work.data, work.size); |
3498 | 0 | } |
3499 | 0 | return len; |
3500 | 0 | } |
3501 | | |
3502 | | /* is_paragraph • returns if the next block is a paragraph (doesn't follow any |
3503 | | * other special rules for other types of blocks) */ |
3504 | | static int |
3505 | | is_paragraph(hoedown_document *doc, uint8_t *txt_data, size_t end) |
3506 | 4.05M | { |
3507 | | /* temporary buffer for results of checking special blocks */ |
3508 | 4.05M | hoedown_buffer *tmp = newbuf(doc, BUFFER_BLOCK); |
3509 | | /* temporary renderer that has no rendering function */ |
3510 | 4.05M | hoedown_renderer temp_renderer; |
3511 | | /* ensure all callbacks are NULL */ |
3512 | 4.05M | memset(&temp_renderer, 0, sizeof(hoedown_renderer)); |
3513 | | /* store the old renderer */ |
3514 | 4.05M | hoedown_renderer old_renderer; |
3515 | 4.05M | memcpy(&old_renderer, &doc->md, sizeof(hoedown_renderer)); |
3516 | | /* copy the new renderer over to the document */ |
3517 | 4.05M | memcpy(&doc->md, &temp_renderer, sizeof(hoedown_renderer)); |
3518 | | /* these are all the if branches inside parse_block, wrapped into one bool, |
3519 | | * with minimal parsing, and completely idempotent */ |
3520 | 4.05M | int result = !(is_atxheader(doc, txt_data, end) || |
3521 | 4.05M | (doc->user_block && parse_userblock(tmp, doc, txt_data, end)) || |
3522 | 4.05M | (txt_data[0] == '<' && |
3523 | 4.04M | parse_htmlblock(tmp, doc, txt_data, end, 0)) || |
3524 | 4.05M | is_hrule(txt_data, end) || |
3525 | 4.05M | ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) && |
3526 | 4.02M | parse_fencedcode(tmp, doc, txt_data, end, doc->ext_flags)) || |
3527 | 4.05M | ((doc->ext_flags & HOEDOWN_EXT_TABLES) && |
3528 | 4.02M | parse_table(tmp, doc, txt_data, end)) || |
3529 | 4.05M | prefix_quote(txt_data, end) || |
3530 | 4.05M | (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && |
3531 | 3.98M | prefix_code(txt_data, end)) || |
3532 | 4.05M | prefix_uli(txt_data, end) || |
3533 | 4.05M | prefix_oli(txt_data, end) || |
3534 | 4.05M | ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && |
3535 | 3.93M | prefix_dli(doc, txt_data, end))); |
3536 | 4.05M | popbuf(doc, BUFFER_BLOCK); |
3537 | 4.05M | memcpy(&doc->md, &old_renderer, sizeof(hoedown_renderer)); |
3538 | 4.05M | return result; |
3539 | 4.05M | } |
3540 | | |
3541 | | /* parse_block • parsing of one block, returning next uint8_t to parse */ |
3542 | | static void |
3543 | | parse_block(hoedown_buffer *ob, hoedown_document *doc, uint8_t *data, size_t size) |
3544 | 138k | { |
3545 | 138k | size_t beg, end, i; |
3546 | 138k | uint8_t *txt_data; |
3547 | 138k | beg = 0; |
3548 | | |
3549 | 138k | if (doc->work_bufs[BUFFER_SPAN].size + |
3550 | 138k | doc->work_bufs[BUFFER_BLOCK].size > doc->max_nesting) |
3551 | 448 | return; |
3552 | | |
3553 | 796k | while (beg < size) { |
3554 | 658k | txt_data = data + beg; |
3555 | 658k | end = size - beg; |
3556 | | |
3557 | 658k | if (is_atxheader(doc, txt_data, end)) |
3558 | 10.0k | beg += parse_atxheader(ob, doc, txt_data, end); |
3559 | | |
3560 | 648k | else if (doc->user_block && |
3561 | 648k | (i = parse_userblock(ob, doc, txt_data, end)) != 0) |
3562 | 0 | beg += i; |
3563 | | |
3564 | 648k | else if (data[beg] == '<' && doc->md.blockhtml && |
3565 | 648k | (i = parse_htmlblock(ob, doc, txt_data, end, 1)) != 0) |
3566 | 0 | beg += i; |
3567 | | |
3568 | 648k | else if ((i = is_empty(txt_data, end)) != 0) |
3569 | 165k | beg += i; |
3570 | | |
3571 | 482k | else if (is_hrule(txt_data, end)) { |
3572 | 121k | while (beg < size && data[beg] != '\n') |
3573 | 104k | beg++; |
3574 | | |
3575 | 17.3k | if (doc->md.hrule) { |
3576 | 17.3k | doc->hrule_char = data[beg - 1]; |
3577 | 17.3k | doc->md.hrule(ob, &doc->data); |
3578 | 17.3k | doc->hrule_char = 0; |
3579 | 17.3k | } |
3580 | | |
3581 | 17.3k | beg++; |
3582 | 17.3k | } |
3583 | | |
3584 | 465k | else if ((doc->ext_flags & HOEDOWN_EXT_FENCED_CODE) != 0 && |
3585 | 465k | (i = parse_fencedcode(ob, doc, txt_data, end, doc->ext_flags)) != 0) |
3586 | 9.77k | beg += i; |
3587 | | |
3588 | 455k | else if ((doc->ext_flags & HOEDOWN_EXT_TABLES) != 0 && |
3589 | 455k | (i = parse_table(ob, doc, txt_data, end)) != 0) |
3590 | 11.7k | beg += i; |
3591 | | |
3592 | 444k | else if (prefix_quote(txt_data, end)) |
3593 | 34.1k | beg += parse_blockquote(ob, doc, txt_data, end); |
3594 | | |
3595 | 409k | else if (!(doc->ext_flags & HOEDOWN_EXT_DISABLE_INDENTED_CODE) && prefix_code(txt_data, end)) |
3596 | 0 | beg += parse_blockcode(ob, doc, txt_data, end); |
3597 | | |
3598 | 409k | else if (prefix_uli(txt_data, end)) |
3599 | 35.1k | beg += parse_list(ob, doc, txt_data, end, 0); |
3600 | | |
3601 | 374k | else if (prefix_oli(txt_data, end)) |
3602 | 30.8k | beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_ORDERED); |
3603 | | |
3604 | 344k | else if ((doc->ext_flags & HOEDOWN_EXT_DEFINITION_LISTS) && prefix_dli(doc, txt_data, end)) |
3605 | 28.9k | beg += parse_list(ob, doc, txt_data, end, HOEDOWN_LIST_DEFINITION); |
3606 | | |
3607 | 315k | else |
3608 | 315k | beg += parse_paragraph(ob, doc, txt_data, end); |
3609 | 658k | } |
3610 | 138k | } |
3611 | | |
3612 | | |
3613 | | |
3614 | | /********************* |
3615 | | * REFERENCE PARSING * |
3616 | | *********************/ |
3617 | | |
3618 | | /* is_footnote • returns whether a line is a footnote definition or not */ |
3619 | | static int |
3620 | | is_footnote(const uint8_t *data, size_t beg, size_t end, size_t *last, struct footnote_list *list) |
3621 | 2.02M | { |
3622 | 2.02M | size_t i = 0; |
3623 | 2.02M | hoedown_buffer *contents = NULL; |
3624 | 2.02M | hoedown_buffer *name = NULL; |
3625 | 2.02M | size_t ind = 0; |
3626 | 2.02M | int in_empty = 0; |
3627 | 2.02M | size_t start = 0; |
3628 | | |
3629 | 2.02M | size_t id_offset, id_end; |
3630 | 2.02M | size_t id_indent = 0, content_line = 0, content_indent = 0; |
3631 | | |
3632 | | /* up to 3 optional leading spaces */ |
3633 | 2.02M | if (beg + 3 >= end) return 0; |
3634 | 2.02M | if (data[beg] == ' ') { i = 1; |
3635 | 135k | if (data[beg + 1] == ' ') { i = 2; |
3636 | 17.7k | if (data[beg + 2] == ' ') { i = 3; |
3637 | 10.0k | if (data[beg + 3] == ' ') return 0; } } } |
3638 | 2.01M | i += beg; |
3639 | | |
3640 | | /* id part: caret followed by anything between brackets */ |
3641 | 2.01M | if (data[i] != '[') return 0; |
3642 | 710k | i++; |
3643 | 710k | if (i >= end || data[i] != '^') return 0; |
3644 | 515k | i++; |
3645 | 515k | id_offset = i; |
3646 | 1.62M | while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') |
3647 | 1.11M | i++; |
3648 | 515k | if (i >= end || data[i] != ']') return 0; |
3649 | 508k | id_end = i; |
3650 | | |
3651 | | /* spacer: colon (space | tab)* newline? (space | tab)* */ |
3652 | 508k | i++; |
3653 | 508k | if (i >= end || data[i] != ':') return 0; |
3654 | 497k | i++; |
3655 | 497k | if (i >= end) return 0; |
3656 | | |
3657 | | /* getting content and name buffers */ |
3658 | 497k | contents = hoedown_buffer_new(64); |
3659 | 497k | name = hoedown_buffer_new(64); |
3660 | | |
3661 | 497k | start = i; |
3662 | | |
3663 | | /* getting item indent size */ |
3664 | 1.59M | while (id_indent != start && data[start - id_indent] != '\n' && data[start - id_indent] != '\r') { |
3665 | 1.09M | id_indent++; |
3666 | 1.09M | } |
3667 | | |
3668 | | /* process lines similar to a list item */ |
3669 | 1.05M | while (i < end) { |
3670 | 10.2M | while (i < end && data[i] != '\n' && data[i] != '\r') i++; |
3671 | | |
3672 | | /* process an empty line */ |
3673 | 1.05M | if (is_empty(data + start, i - start)) { |
3674 | 453k | in_empty = 1; |
3675 | 453k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3676 | 453k | i++; |
3677 | 453k | if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; |
3678 | 453k | } |
3679 | 453k | start = i; |
3680 | 453k | continue; |
3681 | 453k | } |
3682 | | |
3683 | | /* calculating the indentation */ |
3684 | 601k | ind = 0; |
3685 | 779k | while (ind < 4 && start + ind < end && data[start + ind] == ' ') |
3686 | 177k | ind++; |
3687 | | |
3688 | 601k | content_line++; |
3689 | | |
3690 | | /* joining only indented stuff after empty lines; |
3691 | | * note that now we only require 1 space of indentation |
3692 | | * to continue, just like lists */ |
3693 | 601k | if (ind == 0) { |
3694 | 528k | if (start == id_end + 2 && data[start] == '\t') {} |
3695 | 496k | else break; |
3696 | 528k | } |
3697 | 73.5k | else if (in_empty) { |
3698 | 3.58k | hoedown_buffer_putc(contents, '\n'); |
3699 | 3.58k | } |
3700 | | |
3701 | 104k | in_empty = 0; |
3702 | | |
3703 | | /* re-calculating the indentation */ |
3704 | 104k | if (content_line == 2 && data[start + ind] == ' ') { |
3705 | 46.1k | while (ind < id_indent && data[start + ind] == ' ') { |
3706 | 23.4k | ind++; |
3707 | 23.4k | } |
3708 | 22.6k | content_indent = ind; |
3709 | 22.6k | } |
3710 | 104k | if (content_indent > ind) { |
3711 | 21.8k | while (ind < content_indent && data[start + ind] == ' ') { |
3712 | 7.77k | ind++; |
3713 | 7.77k | } |
3714 | 14.0k | } |
3715 | | |
3716 | | /* adding the line into the content buffer */ |
3717 | 104k | hoedown_buffer_put(contents, data + start + ind, i - start - ind); |
3718 | | /* add carriage return */ |
3719 | 104k | if (i < end) { |
3720 | 104k | hoedown_buffer_putc(contents, '\n'); |
3721 | 104k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3722 | 104k | i++; |
3723 | 104k | if (i < end && data[i] == '\n' && data[i - 1] == '\r') i++; |
3724 | 104k | } |
3725 | 104k | } |
3726 | 104k | start = i; |
3727 | 104k | } |
3728 | | |
3729 | 497k | if (last) |
3730 | 497k | *last = start; |
3731 | | |
3732 | 497k | if (list) { |
3733 | 497k | struct footnote_ref *ref; |
3734 | 497k | ref = create_footnote_ref(list, data + id_offset, id_end - id_offset); |
3735 | 497k | if (!ref) |
3736 | 0 | return 0; |
3737 | 497k | if (!add_footnote_ref(list, ref)) { |
3738 | 0 | free_footnote_ref(ref); |
3739 | 0 | return 0; |
3740 | 0 | } |
3741 | 497k | ref->contents = contents; |
3742 | 497k | hoedown_buffer_put(name, data + id_offset, id_end - id_offset); |
3743 | 497k | ref->name = name; |
3744 | 497k | } |
3745 | | |
3746 | 497k | return 1; |
3747 | 497k | } |
3748 | | |
3749 | | /* is_html_comment • returns whether a html comment or not */ |
3750 | | static int |
3751 | | is_html_comment(const uint8_t *data, size_t beg, size_t end, size_t *last) |
3752 | 1.52M | { |
3753 | 1.52M | size_t i = 0; |
3754 | | |
3755 | 1.52M | if (beg + 5 >= end) return 0; |
3756 | 1.52M | if (!(data[beg] == '<' && data[beg + 1] == '!' && data[beg + 2] == '-' && data[beg + 3] == '-')) return 0; |
3757 | | |
3758 | 23.2k | i = 5; |
3759 | 18.7M | while (beg + i < end && !(data[beg + i - 2] == '-' && data[beg + i - 1] == '-' && data[beg + i] == '>')) i++; |
3760 | | /* i can only ever be beyond the end if the ending --> is not found */ |
3761 | 23.2k | if (beg + i >= end) return 0; |
3762 | 22.0k | i++; |
3763 | | |
3764 | 22.0k | if (beg + i < end && (data[beg + i] == '\n' || data[beg + i] == '\r')) { |
3765 | 19.7k | i++; |
3766 | 19.7k | if (beg + i < end && data[beg + i] == '\r' && data[beg + i - 1] == '\n') i++; |
3767 | 19.7k | } |
3768 | | |
3769 | 22.0k | if (last) |
3770 | 22.0k | *last = beg + i; |
3771 | | |
3772 | 22.0k | return 1; |
3773 | 23.2k | } |
3774 | | |
3775 | | /* is_ref • returns whether a line is a reference or not */ |
3776 | | static int |
3777 | | is_ref(const uint8_t *data, size_t beg, size_t end, size_t *last, struct link_ref **refs) |
3778 | 1.50M | { |
3779 | | /* int n; */ |
3780 | 1.50M | size_t i = 0; |
3781 | 1.50M | size_t id_offset, id_end; |
3782 | 1.50M | size_t link_offset, link_end; |
3783 | 1.50M | size_t title_offset, title_end; |
3784 | 1.50M | size_t line_end; |
3785 | 1.50M | size_t attr_offset = 0, attr_end = 0; |
3786 | | |
3787 | | /* up to 3 optional leading spaces */ |
3788 | 1.50M | if (beg + 3 >= end) return 0; |
3789 | 1.50M | if (data[beg] == ' ') { i = 1; |
3790 | 135k | if (data[beg + 1] == ' ') { i = 2; |
3791 | 17.7k | if (data[beg + 2] == ' ') { i = 3; |
3792 | 10.0k | if (data[beg + 3] == ' ') return 0; } } } |
3793 | 1.49M | i += beg; |
3794 | | |
3795 | | /* id part: anything but a newline between brackets */ |
3796 | 1.49M | if (data[i] != '[') return 0; |
3797 | 213k | i++; |
3798 | 213k | id_offset = i; |
3799 | 5.01M | while (i < end && data[i] != '\n' && data[i] != '\r' && data[i] != ']') |
3800 | 4.79M | i++; |
3801 | 213k | if (i >= end || data[i] != ']') return 0; |
3802 | 197k | id_end = i; |
3803 | | |
3804 | | /* spacer: colon (space | tab)* newline? (space | tab)* */ |
3805 | 197k | i++; |
3806 | 197k | if (i >= end || data[i] != ':') return 0; |
3807 | 172k | i++; |
3808 | 174k | while (i < end && data[i] == ' ') i++; |
3809 | 172k | if (i < end && (data[i] == '\n' || data[i] == '\r')) { |
3810 | 37.1k | i++; |
3811 | 37.1k | if (i < end && data[i] == '\r' && data[i - 1] == '\n') i++; } |
3812 | 173k | while (i < end && data[i] == ' ') i++; |
3813 | 172k | if (i >= end) return 0; |
3814 | | |
3815 | | /* link: spacing-free sequence, optionally between angle brackets */ |
3816 | 172k | if (data[i] == '<') |
3817 | 1.72k | i++; |
3818 | | |
3819 | 172k | link_offset = i; |
3820 | | |
3821 | 3.95M | while (i < end && data[i] != ' ' && data[i] != '\n' && data[i] != '\r') |
3822 | 3.78M | i++; |
3823 | | |
3824 | 172k | if (data[i - 1] == '>') link_end = i - 1; |
3825 | 171k | else link_end = i; |
3826 | | |
3827 | | /* optional spacer: (space | tab)* (newline | '\'' | '"' | '(' ) */ |
3828 | 244k | while (i < end && data[i] == ' ') i++; |
3829 | 172k | if (i < end && data[i] != '\n' && data[i] != '\r' |
3830 | 172k | && data[i] != '\'' && data[i] != '"' && data[i] != '(') |
3831 | 9.19k | return 0; |
3832 | 162k | line_end = 0; |
3833 | | /* computing end-of-line */ |
3834 | 162k | if (i >= end || data[i] == '\r' || data[i] == '\n') line_end = i; |
3835 | 162k | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') |
3836 | 1.04k | line_end = i + 1; |
3837 | | |
3838 | | /* optional (space|tab)* spacer after a newline */ |
3839 | 162k | if (line_end) { |
3840 | 101k | i = line_end + 1; |
3841 | 109k | while (i < end && data[i] == ' ') i++; } |
3842 | | |
3843 | | /* optional title: any non-newline sequence enclosed in '"() |
3844 | | alone on its line */ |
3845 | 162k | title_offset = title_end = 0; |
3846 | 162k | if (i + 1 < end |
3847 | 162k | && (data[i] == '\'' || data[i] == '"' || data[i] == '(')) { |
3848 | 109k | char d = data[i++]; |
3849 | 109k | title_offset = i; |
3850 | | |
3851 | | /* looking for end of tile */ |
3852 | 3.65M | while (i < end && data[i] != d && data[i] != '\n' && data[i] != '\r') { |
3853 | 3.54M | ++i; |
3854 | 3.54M | } |
3855 | | |
3856 | 109k | if (i + 1 < end && data[i] == d) { |
3857 | 59.1k | title_end = i++; |
3858 | 59.1k | attr_offset = i; |
3859 | | |
3860 | | /* looking for EOL */ |
3861 | 3.57M | while (i < end && data[i] != '\n' && data[i] != '\r') { |
3862 | 3.51M | i++; |
3863 | 3.51M | } |
3864 | | |
3865 | | /* looking for attribute */ |
3866 | 59.1k | if (data[i-1] == '}' && |
3867 | 59.1k | memchr(&data[attr_offset], '{', i - attr_offset)) { |
3868 | 156k | while (attr_offset < i && data[attr_offset] != '{') { |
3869 | 105k | ++attr_offset; |
3870 | 105k | } |
3871 | 51.0k | ++attr_offset; |
3872 | 51.0k | attr_end = i - 1; |
3873 | 51.0k | } else { |
3874 | 8.07k | if (data[i-1] == d) { |
3875 | 2.42k | title_end = i - 1; |
3876 | 5.65k | } else { |
3877 | 5.65k | title_end = i; |
3878 | 5.65k | } |
3879 | 8.07k | attr_offset = 0; |
3880 | 8.07k | attr_end = 0; |
3881 | 8.07k | } |
3882 | 59.1k | if (i + 1 < end && data[i] == '\r' && data[i + 1] == '\n') { |
3883 | 519 | ++i; |
3884 | 519 | } |
3885 | | |
3886 | 59.1k | line_end = i; |
3887 | 59.1k | } else { |
3888 | | /* looking for EOL */ |
3889 | 50.7k | while (i < end && data[i] != '\n' && data[i] != '\r') { |
3890 | 11 | i++; |
3891 | 11 | } |
3892 | 50.7k | if (i + 1 < end && data[i] == '\n' && data[i + 1] == '\r') { |
3893 | 686 | title_end = i + 1; |
3894 | 50.0k | } else { |
3895 | 50.0k | title_end = i; |
3896 | 50.0k | } |
3897 | | /* stepping back */ |
3898 | 50.7k | i -= 1; |
3899 | 51.4k | while (i > title_offset && data[i] == ' ') { |
3900 | 766 | i -= 1; |
3901 | 766 | } |
3902 | 50.7k | if (i > title_offset && |
3903 | 50.7k | (data[i] == '\'' || data[i] == '"' || data[i] == ')')) { |
3904 | 44.2k | line_end = title_end; |
3905 | 44.2k | title_end = i; |
3906 | 44.2k | } |
3907 | 50.7k | } |
3908 | 109k | } |
3909 | | |
3910 | 162k | if (!line_end || link_end == link_offset) |
3911 | 30.2k | return 0; /* garbage after the link empty link */ |
3912 | | |
3913 | | /* a valid ref has been found, filling-in return structures */ |
3914 | 132k | if (last) |
3915 | 132k | *last = line_end; |
3916 | | |
3917 | 132k | if (refs) { |
3918 | 132k | struct link_ref *ref; |
3919 | | |
3920 | 132k | ref = add_link_ref(refs, data + id_offset, id_end - id_offset); |
3921 | 132k | if (!ref) |
3922 | 0 | return 0; |
3923 | | |
3924 | 132k | ref->link = hoedown_buffer_new(link_end - link_offset); |
3925 | 132k | hoedown_buffer_put(ref->link, data + link_offset, link_end - link_offset); |
3926 | | |
3927 | 132k | if (title_end > title_offset) { |
3928 | 53.6k | ref->title = hoedown_buffer_new(title_end - title_offset); |
3929 | 53.6k | hoedown_buffer_put(ref->title, data + title_offset, title_end - title_offset); |
3930 | 53.6k | } |
3931 | 132k | if (attr_end > attr_offset) { |
3932 | 51.0k | ref->attr = hoedown_buffer_new(attr_end - attr_offset); |
3933 | 51.0k | hoedown_buffer_put(ref->attr, data + attr_offset, attr_end - attr_offset); |
3934 | 51.0k | } |
3935 | 132k | } |
3936 | | |
3937 | 132k | return 1; |
3938 | 132k | } |
3939 | | |
3940 | | static void expand_tabs(hoedown_buffer *ob, const uint8_t *line, size_t size) |
3941 | 1.24M | { |
3942 | | /* This code makes two assumptions: |
3943 | | * - Input is valid UTF-8. (Any byte with top two bits 10 is skipped, |
3944 | | * whether or not it is a valid UTF-8 continuation byte.) |
3945 | | * - Input contains no combining characters. (Combining characters |
3946 | | * should be skipped but are not.) |
3947 | | */ |
3948 | 1.24M | size_t i = 0, tab = 0; |
3949 | | |
3950 | 41.0M | while (i < size) { |
3951 | 40.9M | size_t org = i; |
3952 | | |
3953 | 90.4M | while (i < size && line[i] != '\t') { |
3954 | | /* ignore UTF-8 continuation bytes */ |
3955 | 49.5M | if ((line[i] & 0xc0) != 0x80) |
3956 | 46.3M | tab++; |
3957 | 49.5M | i++; |
3958 | 49.5M | } |
3959 | | |
3960 | 40.9M | if (i > org) |
3961 | 1.69M | hoedown_buffer_put(ob, line + org, i - org); |
3962 | | |
3963 | 40.9M | if (i >= size) |
3964 | 1.09M | break; |
3965 | | |
3966 | 158M | do { |
3967 | 158M | hoedown_buffer_putc(ob, ' '); tab++; |
3968 | 158M | } while (tab % 4); |
3969 | | |
3970 | 39.8M | i++; |
3971 | 39.8M | } |
3972 | 1.24M | } |
3973 | | |
3974 | | /********************** |
3975 | | * EXPORTED FUNCTIONS * |
3976 | | **********************/ |
3977 | | |
3978 | | hoedown_document * |
3979 | | hoedown_document_new( |
3980 | | const hoedown_renderer *renderer, |
3981 | | hoedown_extensions extensions, |
3982 | | size_t max_nesting, |
3983 | | uint8_t attr_activation, |
3984 | | hoedown_user_block user_block, |
3985 | | hoedown_buffer *meta) |
3986 | 10.1k | { |
3987 | 10.1k | hoedown_document *doc = NULL; |
3988 | | |
3989 | 10.1k | assert(max_nesting > 0 && renderer); |
3990 | | |
3991 | 10.1k | doc = hoedown_malloc(sizeof(hoedown_document)); |
3992 | 10.1k | memcpy(&doc->md, renderer, sizeof(hoedown_renderer)); |
3993 | | |
3994 | 10.1k | doc->data.opaque = renderer->opaque; |
3995 | | |
3996 | 10.1k | hoedown_stack_init(&doc->work_bufs[BUFFER_BLOCK], 4); |
3997 | 10.1k | hoedown_stack_init(&doc->work_bufs[BUFFER_SPAN], 8); |
3998 | 10.1k | hoedown_stack_init(&doc->work_bufs[BUFFER_ATTRIBUTE], 8); |
3999 | | |
4000 | 10.1k | memset(doc->active_char, 0x0, 256); |
4001 | | |
4002 | 10.1k | if (extensions & HOEDOWN_EXT_UNDERLINE && doc->md.underline) { |
4003 | 10.1k | doc->active_char['_'] = MD_CHAR_EMPHASIS; |
4004 | 10.1k | } |
4005 | | |
4006 | 10.1k | if (doc->md.emphasis || doc->md.double_emphasis || doc->md.triple_emphasis) { |
4007 | 10.1k | doc->active_char['*'] = MD_CHAR_EMPHASIS; |
4008 | 10.1k | doc->active_char['_'] = MD_CHAR_EMPHASIS; |
4009 | 10.1k | if (extensions & HOEDOWN_EXT_STRIKETHROUGH) |
4010 | 10.1k | doc->active_char['~'] = MD_CHAR_EMPHASIS; |
4011 | 10.1k | if (extensions & HOEDOWN_EXT_HIGHLIGHT) |
4012 | 10.1k | doc->active_char['='] = MD_CHAR_EMPHASIS; |
4013 | 10.1k | } |
4014 | | |
4015 | 10.1k | if (doc->md.codespan) |
4016 | 10.1k | doc->active_char['`'] = MD_CHAR_CODESPAN; |
4017 | | |
4018 | 10.1k | if (doc->md.linebreak) |
4019 | 10.1k | doc->active_char['\n'] = MD_CHAR_LINEBREAK; |
4020 | | |
4021 | 10.1k | if (doc->md.image || doc->md.link || doc->md.footnotes || doc->md.footnote_ref) { |
4022 | 10.1k | doc->active_char['['] = MD_CHAR_LINK; |
4023 | 10.1k | doc->active_char['!'] = MD_CHAR_IMAGE; |
4024 | 10.1k | } |
4025 | | |
4026 | 10.1k | doc->active_char['<'] = MD_CHAR_LANGLE; |
4027 | 10.1k | doc->active_char['\\'] = MD_CHAR_ESCAPE; |
4028 | 10.1k | doc->active_char['&'] = MD_CHAR_ENTITY; |
4029 | | |
4030 | 10.1k | if (extensions & HOEDOWN_EXT_AUTOLINK) { |
4031 | 10.1k | doc->active_char[':'] = MD_CHAR_AUTOLINK_URL; |
4032 | 10.1k | doc->active_char['@'] = MD_CHAR_AUTOLINK_EMAIL; |
4033 | 10.1k | doc->active_char['w'] = MD_CHAR_AUTOLINK_WWW; |
4034 | 10.1k | } |
4035 | | |
4036 | 10.1k | if (extensions & HOEDOWN_EXT_SUPERSCRIPT) |
4037 | 10.1k | doc->active_char['^'] = MD_CHAR_SUPERSCRIPT; |
4038 | | |
4039 | 10.1k | if (extensions & HOEDOWN_EXT_QUOTE) |
4040 | 10.1k | doc->active_char['"'] = MD_CHAR_QUOTE; |
4041 | | |
4042 | 10.1k | if (extensions & HOEDOWN_EXT_MATH) |
4043 | 10.1k | doc->active_char['$'] = MD_CHAR_MATH; |
4044 | | |
4045 | | /* Extension data */ |
4046 | 10.1k | doc->ext_flags = extensions; |
4047 | 10.1k | doc->max_nesting = max_nesting; |
4048 | 10.1k | doc->attr_activation = attr_activation; |
4049 | 10.1k | doc->in_link_body = 0; |
4050 | 10.1k | doc->link_id = NULL; |
4051 | 10.1k | doc->link_ref_attr = NULL; |
4052 | 10.1k | doc->link_inline_attr = NULL; |
4053 | 10.1k | doc->is_escape_char = 0; |
4054 | 10.1k | doc->header_type = HOEDOWN_HEADER_NONE; |
4055 | 10.1k | doc->link_type = HOEDOWN_LINK_NONE; |
4056 | 10.1k | doc->footnote_id = NULL; |
4057 | 10.1k | doc->list_depth = 0; |
4058 | 10.1k | doc->blockquote_depth = 0; |
4059 | 10.1k | doc->ul_item_char = 0; |
4060 | 10.1k | doc->hrule_char = 0; |
4061 | 10.1k | doc->fencedcode_char = 0; |
4062 | 10.1k | doc->ol_numeral = NULL; |
4063 | 10.1k | doc->user_block = user_block; |
4064 | 10.1k | doc->meta = meta; |
4065 | | |
4066 | 10.1k | return doc; |
4067 | 10.1k | } |
4068 | | |
4069 | | void |
4070 | | hoedown_document_render(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size) |
4071 | 10.1k | { |
4072 | 10.1k | static const uint8_t UTF8_BOM[] = {0xEF, 0xBB, 0xBF}; |
4073 | | |
4074 | 10.1k | hoedown_buffer *text; |
4075 | 10.1k | size_t beg, end; |
4076 | | |
4077 | 10.1k | int footnotes_enabled; |
4078 | | |
4079 | 10.1k | text = hoedown_buffer_new(64); |
4080 | | |
4081 | | /* Preallocate enough space for our buffer to avoid expanding while copying */ |
4082 | 10.1k | hoedown_buffer_grow(text, size); |
4083 | | |
4084 | | /* reset the references table */ |
4085 | 10.1k | memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); |
4086 | | |
4087 | 10.1k | footnotes_enabled = doc->ext_flags & HOEDOWN_EXT_FOOTNOTES; |
4088 | | |
4089 | | /* reset the footnotes lists */ |
4090 | 10.1k | if (footnotes_enabled) { |
4091 | 10.1k | memset(&doc->footnotes_found, 0x0, sizeof(doc->footnotes_found)); |
4092 | 10.1k | memset(&doc->footnotes_used, 0x0, sizeof(doc->footnotes_used)); |
4093 | 10.1k | } |
4094 | | |
4095 | | /* first pass: looking for references, copying everything else */ |
4096 | 10.1k | beg = 0; |
4097 | | |
4098 | | /* Skip a possible UTF-8 BOM, even though the Unicode standard |
4099 | | * discourages having these in UTF-8 documents */ |
4100 | 10.1k | if (size >= 3 && memcmp(data, UTF8_BOM, 3) == 0) |
4101 | 31 | beg += 3; |
4102 | | |
4103 | 2.03M | while (beg < size) /* iterating over lines */ |
4104 | 2.02M | if (footnotes_enabled && is_footnote(data, beg, size, &end, &doc->footnotes_found)) { |
4105 | 497k | if (doc->md.footnote_ref_def) { |
4106 | 0 | hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
4107 | 0 | original.data = (uint8_t*) (data + beg); |
4108 | 0 | original.size = end - beg; |
4109 | 0 | doc->md.footnote_ref_def(&original, &doc->data); |
4110 | 0 | } |
4111 | 497k | beg = end; |
4112 | 1.52M | } else if (is_html_comment(data, beg, size, &end)) { |
4113 | 22.0k | size_t i = 0; |
4114 | 6.29M | while (i < (end - beg) && beg + i < size) { |
4115 | 6.27M | if (data[beg + i] == '\t' && (data[beg + i] & 0xc0) != 0x80) { |
4116 | 1.52M | hoedown_buffer_put(text, (uint8_t*)" ", 4); |
4117 | 4.74M | } else { |
4118 | 4.74M | hoedown_buffer_putc(text, data[beg + i]); |
4119 | 4.74M | } |
4120 | 6.27M | i++; |
4121 | 6.27M | } |
4122 | 22.0k | beg = end; |
4123 | 1.50M | } else if (is_ref(data, beg, size, &end, doc->refs)) { |
4124 | 132k | if (doc->md.ref) { |
4125 | 0 | hoedown_buffer original = { NULL, 0, 0, 0, NULL, NULL, NULL }; |
4126 | 0 | original.data = (uint8_t*) (data + beg); |
4127 | 0 | original.size = end - beg; |
4128 | 0 | doc->md.ref(&original, &doc->data); |
4129 | 0 | } |
4130 | 132k | beg = end; |
4131 | 1.37M | } else { /* skipping to the next line */ |
4132 | 1.37M | end = beg; |
4133 | 90.7M | while (end < size && data[end] != '\n' && data[end] != '\r') |
4134 | 89.3M | end++; |
4135 | | |
4136 | | /* adding the line body if present */ |
4137 | 1.37M | if (end > beg) |
4138 | 1.24M | expand_tabs(text, data + beg, end - beg); |
4139 | | |
4140 | 2.94M | while (end < size && (data[end] == '\n' || data[end] == '\r')) { |
4141 | | /* add one \n per newline */ |
4142 | 1.57M | if (data[end] == '\n' || (end + 1 < size && data[end + 1] != '\n')) |
4143 | 1.56M | hoedown_buffer_putc(text, '\n'); |
4144 | 1.57M | end++; |
4145 | 1.57M | } |
4146 | | |
4147 | 1.37M | beg = end; |
4148 | 1.37M | } |
4149 | | |
4150 | | /* pre-grow the output buffer to minimize allocations */ |
4151 | 10.1k | hoedown_buffer_grow(ob, text->size + (text->size >> 1)); |
4152 | | |
4153 | | /* second pass: actual rendering */ |
4154 | 10.1k | if (doc->md.doc_header) |
4155 | 0 | doc->md.doc_header(ob, 0, &doc->data); |
4156 | | |
4157 | 10.1k | if (text->size) { |
4158 | | /* adding a final newline if not already present */ |
4159 | 10.0k | if (text->data[text->size - 1] != '\n') |
4160 | 9.06k | hoedown_buffer_putc(text, '\n'); |
4161 | | |
4162 | 10.0k | parse_block(ob, doc, text->data, text->size); |
4163 | 10.0k | } |
4164 | | |
4165 | | /* footnotes */ |
4166 | 10.1k | if (footnotes_enabled) |
4167 | 10.1k | parse_footnote_list(ob, doc, &doc->footnotes_used); |
4168 | | |
4169 | 10.1k | if (doc->md.doc_footer) |
4170 | 0 | doc->md.doc_footer(ob, 0, &doc->data); |
4171 | | |
4172 | | /* clean-up */ |
4173 | 10.1k | hoedown_buffer_free(text); |
4174 | 10.1k | free_link_refs(doc->refs); |
4175 | 10.1k | if (footnotes_enabled) { |
4176 | 10.1k | free_footnote_list(&doc->footnotes_found, 1); |
4177 | 10.1k | free_footnote_list(&doc->footnotes_used, 0); |
4178 | 10.1k | } |
4179 | | |
4180 | 10.1k | assert(doc->work_bufs[BUFFER_SPAN].size == 0); |
4181 | 10.1k | assert(doc->work_bufs[BUFFER_BLOCK].size == 0); |
4182 | 10.1k | assert(doc->work_bufs[BUFFER_ATTRIBUTE].size == 0); |
4183 | 10.1k | } |
4184 | | |
4185 | | void |
4186 | | hoedown_document_render_inline(hoedown_document *doc, hoedown_buffer *ob, const uint8_t *data, size_t size) |
4187 | 0 | { |
4188 | 0 | size_t i = 0, mark; |
4189 | 0 | hoedown_buffer *text = hoedown_buffer_new(64); |
4190 | | |
4191 | | /* reset the references table */ |
4192 | 0 | memset(&doc->refs, 0x0, REF_TABLE_SIZE * sizeof(void *)); |
4193 | | |
4194 | | /* first pass: expand tabs and process newlines */ |
4195 | 0 | hoedown_buffer_grow(text, size); |
4196 | 0 | while (1) { |
4197 | 0 | mark = i; |
4198 | 0 | while (i < size && data[i] != '\n' && data[i] != '\r') |
4199 | 0 | i++; |
4200 | |
|
4201 | 0 | expand_tabs(text, data + mark, i - mark); |
4202 | |
|
4203 | 0 | if (i >= size) |
4204 | 0 | break; |
4205 | | |
4206 | 0 | while (i < size && (data[i] == '\n' || data[i] == '\r')) { |
4207 | | /* add one \n per newline */ |
4208 | 0 | if (data[i] == '\n' || (i + 1 < size && data[i + 1] != '\n')) |
4209 | 0 | hoedown_buffer_putc(text, '\n'); |
4210 | 0 | i++; |
4211 | 0 | } |
4212 | 0 | } |
4213 | | |
4214 | | /* second pass: actual rendering */ |
4215 | 0 | hoedown_buffer_grow(ob, text->size + (text->size >> 1)); |
4216 | |
|
4217 | 0 | if (doc->md.doc_header) |
4218 | 0 | doc->md.doc_header(ob, 1, &doc->data); |
4219 | |
|
4220 | 0 | parse_inline(ob, doc, text->data, text->size); |
4221 | |
|
4222 | 0 | if (doc->md.doc_footer) |
4223 | 0 | doc->md.doc_footer(ob, 1, &doc->data); |
4224 | | |
4225 | | /* clean-up */ |
4226 | 0 | hoedown_buffer_free(text); |
4227 | |
|
4228 | 0 | assert(doc->work_bufs[BUFFER_SPAN].size == 0); |
4229 | 0 | assert(doc->work_bufs[BUFFER_BLOCK].size == 0); |
4230 | 0 | } |
4231 | | |
4232 | | void |
4233 | | hoedown_document_free(hoedown_document *doc) |
4234 | 10.1k | { |
4235 | 10.1k | size_t i; |
4236 | | |
4237 | 97.2k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_SPAN].asize; ++i) |
4238 | 87.0k | hoedown_buffer_free(doc->work_bufs[BUFFER_SPAN].item[i]); |
4239 | | |
4240 | 56.0k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_BLOCK].asize; ++i) |
4241 | 45.8k | hoedown_buffer_free(doc->work_bufs[BUFFER_BLOCK].item[i]); |
4242 | | |
4243 | 92.4k | for (i = 0; i < (size_t)doc->work_bufs[BUFFER_ATTRIBUTE].asize; ++i) |
4244 | 82.2k | hoedown_buffer_free(doc->work_bufs[BUFFER_ATTRIBUTE].item[i]); |
4245 | | |
4246 | 10.1k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_SPAN]); |
4247 | 10.1k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_BLOCK]); |
4248 | 10.1k | hoedown_stack_uninit(&doc->work_bufs[BUFFER_ATTRIBUTE]); |
4249 | | |
4250 | 10.1k | free(doc); |
4251 | 10.1k | } |
4252 | | |
4253 | | const hoedown_buffer* |
4254 | | hoedown_document_link_id(hoedown_document* document) |
4255 | 0 | { |
4256 | 0 | return document->link_id; |
4257 | 0 | } |
4258 | | |
4259 | | const hoedown_buffer* |
4260 | | hoedown_document_link_ref_attr(hoedown_document* document) |
4261 | 0 | { |
4262 | 0 | return document->link_ref_attr; |
4263 | 0 | } |
4264 | | |
4265 | | const hoedown_buffer* |
4266 | | hoedown_document_link_inline_attr(hoedown_document* document) |
4267 | 0 | { |
4268 | 0 | return document->link_inline_attr; |
4269 | 0 | } |
4270 | | |
4271 | | int |
4272 | | hoedown_document_is_escaped(hoedown_document* document) |
4273 | 0 | { |
4274 | 0 | return document->is_escape_char; |
4275 | 0 | } |
4276 | | |
4277 | | hoedown_header_type |
4278 | | hoedown_document_header_type(hoedown_document* document) |
4279 | 0 | { |
4280 | 0 | return document->header_type; |
4281 | 0 | } |
4282 | | |
4283 | | hoedown_link_type |
4284 | | hoedown_document_link_type(hoedown_document* document) |
4285 | 0 | { |
4286 | 0 | return document->link_type; |
4287 | 0 | } |
4288 | | |
4289 | | const hoedown_buffer* |
4290 | | hoedown_document_footnote_id(hoedown_document* document) |
4291 | 0 | { |
4292 | 0 | return document->footnote_id; |
4293 | 0 | } |
4294 | | |
4295 | | int |
4296 | | hoedown_document_list_depth(hoedown_document* document) |
4297 | 0 | { |
4298 | 0 | return document->list_depth; |
4299 | 0 | } |
4300 | | |
4301 | | int |
4302 | | hoedown_document_blockquote_depth(hoedown_document* document) |
4303 | 0 | { |
4304 | 0 | return document->blockquote_depth; |
4305 | 0 | } |
4306 | | |
4307 | | uint8_t |
4308 | | hoedown_document_ul_item_char(hoedown_document* document) |
4309 | 0 | { |
4310 | 0 | return document->ul_item_char; |
4311 | 0 | } |
4312 | | |
4313 | | uint8_t |
4314 | | hoedown_document_hrule_char(hoedown_document* document) |
4315 | 0 | { |
4316 | 0 | return document->hrule_char; |
4317 | 0 | } |
4318 | | |
4319 | | uint8_t |
4320 | | hoedown_document_fencedcode_char(hoedown_document* document) |
4321 | 0 | { |
4322 | 0 | return document->fencedcode_char; |
4323 | 0 | } |
4324 | | |
4325 | | const hoedown_buffer* |
4326 | | hoedown_document_ol_numeral(hoedown_document* document) |
4327 | 0 | { |
4328 | 0 | return document->ol_numeral; |
4329 | 0 | } |