Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | #include <stdlib.h>  | 
2  |  | #include <stdio.h>  | 
3  |  | #include <string.h>  | 
4  |  | #include <assert.h>  | 
5  |  |  | 
6  |  | #include "config.h"  | 
7  |  | #include "cmark.h"  | 
8  |  | #include "node.h"  | 
9  |  | #include "buffer.h"  | 
10  |  |  | 
11  | 8.52M  | #define BUFFER_SIZE 100  | 
12  | 284M  | #define MAX_INDENT 40  | 
13  |  |  | 
14  |  | // Functions to convert cmark_nodes to XML strings.  | 
15  |  |  | 
16  |  | // C0 control characters, U+FFFE and U+FFF aren't allowed in XML.  | 
17  |  | static const char XML_ESCAPE_TABLE[256] = { | 
18  |  |     /* 0x00 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 1,  | 
19  |  |     /* 0x10 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,  | 
20  |  |     /* 0x20 */ 0, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
21  |  |     /* 0x30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 0, 5, 0,  | 
22  |  |     /* 0x40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
23  |  |     /* 0x50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
24  |  |     /* 0x60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
25  |  |     /* 0x70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
26  |  |     /* 0x80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
27  |  |     /* 0x90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
28  |  |     /* 0xA0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
29  |  |     /* 0xB0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9,  | 
30  |  |     /* 0xC0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
31  |  |     /* 0xD0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
32  |  |     /* 0xE0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
33  |  |     /* 0xF0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  | 
34  |  | };  | 
35  |  |  | 
36  |  | // U+FFFD Replacement Character encoded in UTF-8  | 
37  |  | #define UTF8_REPL "\xEF\xBF\xBD"  | 
38  |  |  | 
39  |  | static const char *XML_ESCAPES[] = { | 
40  |  |   "", UTF8_REPL, """, "&", "<", ">"  | 
41  |  | };  | 
42  |  |  | 
43  |  | static void escape_xml(cmark_strbuf *ob, const unsigned char *src,  | 
44  | 5.08M  |                        bufsize_t size) { | 
45  | 5.08M  |   bufsize_t i = 0, org, esc = 0;  | 
46  |  |  | 
47  | 127M  |   while (i < size) { | 
48  | 126M  |     org = i;  | 
49  | 501M  |     while (i < size && (esc = XML_ESCAPE_TABLE[src[i]]) == 0)  | 
50  | 374M  |       i++;  | 
51  |  |  | 
52  | 126M  |     if (i > org)  | 
53  | 121M  |       cmark_strbuf_put(ob, src + org, i - org);  | 
54  |  |  | 
55  | 126M  |     if (i >= size)  | 
56  | 4.28M  |       break;  | 
57  |  |  | 
58  | 122M  |     if (esc == 9) { | 
59  |  |       // To replace U+FFFE and U+FFFF with U+FFFD, only the last byte has to  | 
60  |  |       // be changed.  | 
61  |  |       // We know that src[i] is 0xBE or 0xBF.  | 
62  | 108M  |       if (i >= 2 && src[i-2] == 0xEF && src[i-1] == 0xBF) { | 
63  | 4.43k  |         cmark_strbuf_putc(ob, 0xBD);  | 
64  | 108M  |       } else { | 
65  | 108M  |         cmark_strbuf_putc(ob, src[i]);  | 
66  | 108M  |       }  | 
67  | 108M  |     } else { | 
68  | 14.3M  |       cmark_strbuf_puts(ob, XML_ESCAPES[esc]);  | 
69  | 14.3M  |     }  | 
70  |  |  | 
71  | 122M  |     i++;  | 
72  | 122M  |   }  | 
73  | 5.08M  | }  | 
74  |  |  | 
75  | 317k  | static void escape_xml_str(cmark_strbuf *dest, const unsigned char *source) { | 
76  | 317k  |   if (source)  | 
77  | 317k  |     escape_xml(dest, source, strlen((char *)source));  | 
78  | 317k  | }  | 
79  |  |  | 
80  |  | struct render_state { | 
81  |  |   cmark_strbuf *xml;  | 
82  |  |   int indent;  | 
83  |  | };  | 
84  |  |  | 
85  | 17.9M  | static CMARK_INLINE void indent(struct render_state *state) { | 
86  | 17.9M  |   int i;  | 
87  | 297M  |   for (i = 0; i < state->indent && i < MAX_INDENT; i++) { | 
88  | 279M  |     cmark_strbuf_putc(state->xml, ' ');  | 
89  | 279M  |   }  | 
90  | 17.9M  | }  | 
91  |  |  | 
92  |  | static int S_render_node(cmark_node *node, cmark_event_type ev_type,  | 
93  | 18.3M  |                          struct render_state *state, int options) { | 
94  | 18.3M  |   cmark_strbuf *xml = state->xml;  | 
95  | 18.3M  |   bool literal = false;  | 
96  | 18.3M  |   cmark_delim_type delim;  | 
97  | 18.3M  |   bool entering = (ev_type == CMARK_EVENT_ENTER);  | 
98  | 18.3M  |   char buffer[BUFFER_SIZE];  | 
99  |  |  | 
100  | 18.3M  |   if (entering) { | 
101  | 12.6M  |     indent(state);  | 
102  | 12.6M  |     cmark_strbuf_putc(xml, '<');  | 
103  | 12.6M  |     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));  | 
104  |  |  | 
105  | 12.6M  |     if (options & CMARK_OPT_SOURCEPOS && node->start_line != 0) { | 
106  | 7.44M  |       snprintf(buffer, BUFFER_SIZE, " sourcepos=\"%d:%d-%d:%d\"",  | 
107  | 7.44M  |                node->start_line, node->start_column, node->end_line,  | 
108  | 7.44M  |                node->end_column);  | 
109  | 7.44M  |       cmark_strbuf_puts(xml, buffer);  | 
110  | 7.44M  |     }  | 
111  |  |  | 
112  | 12.6M  |     literal = false;  | 
113  |  |  | 
114  | 12.6M  |     switch (node->type) { | 
115  | 42.5k  |     case CMARK_NODE_DOCUMENT:  | 
116  | 42.5k  |       cmark_strbuf_puts(xml, " xmlns=\"http://commonmark.org/xml/1.0\"");  | 
117  | 42.5k  |       break;  | 
118  | 4.16M  |     case CMARK_NODE_TEXT:  | 
119  | 4.31M  |     case CMARK_NODE_CODE:  | 
120  | 4.40M  |     case CMARK_NODE_HTML_BLOCK:  | 
121  | 4.70M  |     case CMARK_NODE_HTML_INLINE:  | 
122  | 4.70M  |       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");  | 
123  | 4.70M  |       escape_xml(xml, node->data, node->len);  | 
124  | 4.70M  |       cmark_strbuf_puts(xml, "</");  | 
125  | 4.70M  |       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));  | 
126  | 4.70M  |       literal = true;  | 
127  | 4.70M  |       break;  | 
128  | 976k  |     case CMARK_NODE_LIST:  | 
129  | 976k  |       switch (cmark_node_get_list_type(node)) { | 
130  | 12.2k  |       case CMARK_ORDERED_LIST:  | 
131  | 12.2k  |         cmark_strbuf_puts(xml, " type=\"ordered\"");  | 
132  | 12.2k  |         snprintf(buffer, BUFFER_SIZE, " start=\"%d\"",  | 
133  | 12.2k  |                  cmark_node_get_list_start(node));  | 
134  | 12.2k  |         cmark_strbuf_puts(xml, buffer);  | 
135  | 12.2k  |         delim = cmark_node_get_list_delim(node);  | 
136  | 12.2k  |         if (delim == CMARK_PAREN_DELIM) { | 
137  | 1.89k  |           cmark_strbuf_puts(xml, " delim=\"paren\"");  | 
138  | 10.3k  |         } else if (delim == CMARK_PERIOD_DELIM) { | 
139  | 10.3k  |           cmark_strbuf_puts(xml, " delim=\"period\"");  | 
140  | 10.3k  |         }  | 
141  | 12.2k  |         break;  | 
142  | 964k  |       case CMARK_BULLET_LIST:  | 
143  | 964k  |         cmark_strbuf_puts(xml, " type=\"bullet\"");  | 
144  | 964k  |         break;  | 
145  | 0  |       default:  | 
146  | 0  |         break;  | 
147  | 976k  |       }  | 
148  | 976k  |       snprintf(buffer, BUFFER_SIZE, " tight=\"%s\"",  | 
149  | 976k  |                (cmark_node_get_list_tight(node) ? "true" : "false"));  | 
150  | 976k  |       cmark_strbuf_puts(xml, buffer);  | 
151  | 976k  |       break;  | 
152  | 83.8k  |     case CMARK_NODE_HEADING:  | 
153  | 83.8k  |       snprintf(buffer, BUFFER_SIZE, " level=\"%d\"", node->as.heading.level);  | 
154  | 83.8k  |       cmark_strbuf_puts(xml, buffer);  | 
155  | 83.8k  |       break;  | 
156  | 66.4k  |     case CMARK_NODE_CODE_BLOCK:  | 
157  | 66.4k  |       if (node->as.code.info) { | 
158  | 4.94k  |         cmark_strbuf_puts(xml, " info=\"");  | 
159  | 4.94k  |         escape_xml_str(xml, node->as.code.info);  | 
160  | 4.94k  |         cmark_strbuf_putc(xml, '"');  | 
161  | 4.94k  |       }  | 
162  | 66.4k  |       cmark_strbuf_puts(xml, " xml:space=\"preserve\">");  | 
163  | 66.4k  |       escape_xml(xml, node->data, node->len);  | 
164  | 66.4k  |       cmark_strbuf_puts(xml, "</");  | 
165  | 66.4k  |       cmark_strbuf_puts(xml, cmark_node_get_type_string(node));  | 
166  | 66.4k  |       literal = true;  | 
167  | 66.4k  |       break;  | 
168  | 0  |     case CMARK_NODE_CUSTOM_BLOCK:  | 
169  | 0  |     case CMARK_NODE_CUSTOM_INLINE:  | 
170  | 0  |       cmark_strbuf_puts(xml, " on_enter=\"");  | 
171  | 0  |       escape_xml_str(xml, node->as.custom.on_enter);  | 
172  | 0  |       cmark_strbuf_putc(xml, '"');  | 
173  | 0  |       cmark_strbuf_puts(xml, " on_exit=\"");  | 
174  | 0  |       escape_xml_str(xml, node->as.custom.on_exit);  | 
175  | 0  |       cmark_strbuf_putc(xml, '"');  | 
176  | 0  |       break;  | 
177  | 295k  |     case CMARK_NODE_LINK:  | 
178  | 304k  |     case CMARK_NODE_IMAGE:  | 
179  | 304k  |       cmark_strbuf_puts(xml, " destination=\"");  | 
180  | 304k  |       escape_xml_str(xml, node->as.link.url);  | 
181  | 304k  |       cmark_strbuf_putc(xml, '"');  | 
182  | 304k  |       if (node->as.link.title) { | 
183  | 8.45k  |         cmark_strbuf_puts(xml, " title=\"");  | 
184  | 8.45k  |         escape_xml_str(xml, node->as.link.title);  | 
185  | 8.45k  |         cmark_strbuf_putc(xml, '"');  | 
186  | 8.45k  |       }  | 
187  | 304k  |       break;  | 
188  | 6.50M  |     default:  | 
189  | 6.50M  |       break;  | 
190  | 12.6M  |     }  | 
191  | 12.6M  |     if (node->first_child) { | 
192  | 5.25M  |       state->indent += 2;  | 
193  | 7.43M  |     } else if (!literal) { | 
194  | 2.66M  |       cmark_strbuf_puts(xml, " /");  | 
195  | 2.66M  |     }  | 
196  | 12.6M  |     cmark_strbuf_puts(xml, ">\n");  | 
197  |  |  | 
198  | 12.6M  |   } else if (node->first_child) { | 
199  | 5.25M  |     state->indent -= 2;  | 
200  | 5.25M  |     indent(state);  | 
201  | 5.25M  |     cmark_strbuf_puts(xml, "</");  | 
202  | 5.25M  |     cmark_strbuf_puts(xml, cmark_node_get_type_string(node));  | 
203  | 5.25M  |     cmark_strbuf_puts(xml, ">\n");  | 
204  | 5.25M  |   }  | 
205  |  |  | 
206  | 18.3M  |   return 1;  | 
207  | 18.3M  | }  | 
208  |  |  | 
209  | 42.5k  | char *cmark_render_xml(cmark_node *root, int options) { | 
210  | 42.5k  |   char *result;  | 
211  | 42.5k  |   cmark_strbuf xml = CMARK_BUF_INIT(root->mem);  | 
212  | 42.5k  |   cmark_event_type ev_type;  | 
213  | 42.5k  |   cmark_node *cur;  | 
214  | 42.5k  |   struct render_state state = {&xml, 0}; | 
215  |  |  | 
216  | 42.5k  |   cmark_iter *iter = cmark_iter_new(root);  | 
217  |  |  | 
218  | 42.5k  |   cmark_strbuf_puts(state.xml, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");  | 
219  | 42.5k  |   cmark_strbuf_puts(state.xml,  | 
220  | 42.5k  |                     "<!DOCTYPE document SYSTEM \"CommonMark.dtd\">\n");  | 
221  | 18.3M  |   while ((ev_type = cmark_iter_next(iter)) != CMARK_EVENT_DONE) { | 
222  | 18.3M  |     cur = cmark_iter_get_node(iter);  | 
223  | 18.3M  |     S_render_node(cur, ev_type, &state, options);  | 
224  | 18.3M  |   }  | 
225  | 42.5k  |   result = (char *)cmark_strbuf_detach(&xml);  | 
226  |  |  | 
227  | 42.5k  |   cmark_iter_free(iter);  | 
228  | 42.5k  |   return result;  | 
229  | 42.5k  | }  |