Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * html.c: a libFuzzer target to test several HTML parser interfaces. |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | */ |
6 | | |
7 | | #include <stdio.h> |
8 | | #include <stdlib.h> |
9 | | #include <string.h> |
10 | | |
11 | | #include <libxml/HTMLparser.h> |
12 | | #include <libxml/HTMLtree.h> |
13 | | #include <libxml/catalog.h> |
14 | | #include "fuzz.h" |
15 | | |
16 | | int |
17 | | LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED, |
18 | 2 | char ***argv ATTRIBUTE_UNUSED) { |
19 | 2 | xmlFuzzMemSetup(); |
20 | 2 | xmlInitParser(); |
21 | 2 | #ifdef LIBXML_CATALOG_ENABLED |
22 | 2 | xmlInitializeCatalog(); |
23 | 2 | xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE); |
24 | 2 | #endif |
25 | | |
26 | 2 | return 0; |
27 | 2 | } |
28 | | |
29 | | int |
30 | 9.88k | LLVMFuzzerTestOneInput(const char *data, size_t size) { |
31 | 9.88k | xmlParserCtxtPtr ctxt; |
32 | 9.88k | htmlDocPtr doc; |
33 | 9.88k | const char *docBuffer; |
34 | 9.88k | size_t failurePos, docSize, maxChunkSize; |
35 | 9.88k | int opts, errorCode; |
36 | 9.88k | #ifdef LIBXML_OUTPUT_ENABLED |
37 | 9.88k | xmlOutputBufferPtr out = NULL; |
38 | 9.88k | #endif |
39 | | |
40 | 9.88k | xmlFuzzDataInit(data, size); |
41 | 9.88k | opts = (int) xmlFuzzReadInt(4); |
42 | 9.88k | failurePos = xmlFuzzReadInt(4) % (size + 100); |
43 | | |
44 | 9.88k | maxChunkSize = xmlFuzzReadInt(4) % (size + size / 8 + 1); |
45 | 9.88k | if (maxChunkSize == 0) |
46 | 1.06k | maxChunkSize = 1; |
47 | | |
48 | 9.88k | docBuffer = xmlFuzzReadRemaining(&docSize); |
49 | 9.88k | if (docBuffer == NULL) { |
50 | 0 | xmlFuzzDataCleanup(); |
51 | 0 | return(0); |
52 | 0 | } |
53 | | |
54 | | /* Pull parser */ |
55 | | |
56 | 9.88k | xmlFuzzInjectFailure(failurePos); |
57 | 9.88k | ctxt = htmlNewParserCtxt(); |
58 | 9.88k | if (ctxt == NULL) { |
59 | 6 | errorCode = XML_ERR_NO_MEMORY; |
60 | 9.87k | } else { |
61 | 9.87k | xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL); |
62 | 9.87k | doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts); |
63 | 9.87k | errorCode = ctxt->errNo; |
64 | 9.87k | xmlFuzzCheckFailureReport("htmlCtxtReadMemory", |
65 | 9.87k | errorCode == XML_ERR_NO_MEMORY, |
66 | 9.87k | errorCode == XML_IO_EIO); |
67 | | |
68 | 9.87k | if (doc != NULL) { |
69 | 8.23k | xmlDocPtr copy; |
70 | | |
71 | 8.23k | #ifdef LIBXML_OUTPUT_ENABLED |
72 | 8.23k | const xmlChar *content; |
73 | | |
74 | | /* |
75 | | * Also test the serializer. Call htmlDocContentDumpOutput with our |
76 | | * own buffer to avoid encoding the output. The HTML encoding is |
77 | | * excruciatingly slow (see htmlEntityValueLookup). |
78 | | */ |
79 | 8.23k | out = xmlAllocOutputBuffer(NULL); |
80 | 8.23k | htmlDocContentDumpOutput(out, doc, NULL); |
81 | 8.23k | content = xmlOutputBufferGetContent(out); |
82 | 8.23k | xmlFuzzCheckFailureReport("htmlDocContentDumpOutput", |
83 | 8.23k | content == NULL, 0); |
84 | 8.23k | if (content == NULL) { |
85 | 162 | xmlOutputBufferClose(out); |
86 | 162 | out = NULL; |
87 | 162 | } |
88 | 8.23k | #endif |
89 | | |
90 | 8.23k | copy = xmlCopyDoc(doc, 1); |
91 | 8.23k | xmlFuzzCheckFailureReport("xmlCopyNode", copy == NULL, 0); |
92 | 8.23k | xmlFreeDoc(copy); |
93 | | |
94 | 8.23k | xmlFreeDoc(doc); |
95 | 8.23k | } |
96 | | |
97 | 9.87k | htmlFreeParserCtxt(ctxt); |
98 | 9.87k | } |
99 | | |
100 | | |
101 | | /* Push parser */ |
102 | | |
103 | 9.88k | #ifdef LIBXML_PUSH_ENABLED |
104 | 9.88k | xmlFuzzInjectFailure(failurePos); |
105 | 9.88k | ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL, |
106 | 9.88k | XML_CHAR_ENCODING_NONE); |
107 | | |
108 | 9.88k | if (ctxt != NULL) { |
109 | 9.86k | size_t consumed; |
110 | 9.86k | int errorCodePush, numChunks, maxChunks; |
111 | | |
112 | 9.86k | xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL); |
113 | 9.86k | htmlCtxtUseOptions(ctxt, opts); |
114 | | |
115 | 9.86k | consumed = 0; |
116 | 9.86k | numChunks = 0; |
117 | 9.86k | maxChunks = 50 + docSize / 100; |
118 | 486k | while (numChunks == 0 || |
119 | 486k | (consumed < docSize && numChunks < maxChunks)) { |
120 | 476k | size_t chunkSize; |
121 | 476k | int terminate; |
122 | | |
123 | 476k | numChunks += 1; |
124 | 476k | chunkSize = docSize - consumed; |
125 | | |
126 | 476k | if (numChunks < maxChunks && chunkSize > maxChunkSize) { |
127 | 466k | chunkSize = maxChunkSize; |
128 | 466k | terminate = 0; |
129 | 466k | } else { |
130 | 9.86k | terminate = 1; |
131 | 9.86k | } |
132 | | |
133 | 476k | htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, terminate); |
134 | 476k | consumed += chunkSize; |
135 | 476k | } |
136 | | |
137 | 9.86k | errorCodePush = ctxt->errNo; |
138 | 9.86k | xmlFuzzCheckFailureReport("htmlParseChunk", |
139 | 9.86k | errorCodePush == XML_ERR_NO_MEMORY, |
140 | 9.86k | errorCodePush == XML_IO_EIO); |
141 | 9.86k | doc = ctxt->myDoc; |
142 | | |
143 | | /* |
144 | | * Push and pull parser differ in when exactly they |
145 | | * stop parsing, and the error code is the *last* error |
146 | | * reported, so we can't check whether the codes match. |
147 | | */ |
148 | 9.86k | if (errorCode != XML_ERR_NO_MEMORY && |
149 | 9.86k | errorCode != XML_IO_EIO && |
150 | 9.86k | errorCodePush != XML_ERR_NO_MEMORY && |
151 | 9.86k | errorCodePush != XML_IO_EIO && |
152 | 9.86k | (errorCode == XML_ERR_OK) != (errorCodePush == XML_ERR_OK)) { |
153 | 0 | fprintf(stderr, "pull/push parser error mismatch: %d != %d\n", |
154 | 0 | errorCode, errorCodePush); |
155 | | #if 0 |
156 | | FILE *f = fopen("c.html", "wb"); |
157 | | fwrite(docBuffer, docSize, 1, f); |
158 | | fclose(f); |
159 | | fprintf(stderr, "opts: %X\n", opts); |
160 | | #endif |
161 | 0 | abort(); |
162 | 0 | } |
163 | | |
164 | 9.86k | #ifdef LIBXML_OUTPUT_ENABLED |
165 | | /* |
166 | | * Verify that pull and push parser produce the same result. |
167 | | * |
168 | | * The NOBLANKS option doesn't work reliably in push mode. |
169 | | */ |
170 | 9.86k | if ((opts & XML_PARSE_NOBLANKS) == 0 && |
171 | 9.86k | errorCode == XML_ERR_OK && |
172 | 9.86k | errorCodePush == XML_ERR_OK && |
173 | 9.86k | out != NULL) { |
174 | 4.20k | xmlOutputBufferPtr outPush; |
175 | 4.20k | const xmlChar *content, *contentPush; |
176 | | |
177 | 4.20k | outPush = xmlAllocOutputBuffer(NULL); |
178 | 4.20k | htmlDocContentDumpOutput(outPush, doc, NULL); |
179 | 4.20k | content = xmlOutputBufferGetContent(out); |
180 | 4.20k | contentPush = xmlOutputBufferGetContent(outPush); |
181 | | |
182 | 4.20k | if (content != NULL && contentPush != NULL) { |
183 | 4.20k | size_t outSize = xmlOutputBufferGetSize(out); |
184 | | |
185 | 4.20k | if (outSize != xmlOutputBufferGetSize(outPush) || |
186 | 4.20k | memcmp(content, contentPush, outSize) != 0) { |
187 | 0 | fprintf(stderr, "pull/push parser roundtrip " |
188 | 0 | "mismatch\n"); |
189 | | #if 0 |
190 | | FILE *f = fopen("c.html", "wb"); |
191 | | fwrite(docBuffer, docSize, 1, f); |
192 | | fclose(f); |
193 | | fprintf(stderr, "opts: %X\n", opts); |
194 | | fprintf(stderr, "---\n%s\n---\n%s\n---\n", |
195 | | xmlOutputBufferGetContent(out), |
196 | | xmlOutputBufferGetContent(outPush)); |
197 | | #endif |
198 | 0 | abort(); |
199 | 0 | } |
200 | 4.20k | } |
201 | | |
202 | 4.20k | xmlOutputBufferClose(outPush); |
203 | 4.20k | } |
204 | 9.86k | #endif |
205 | | |
206 | 9.86k | xmlFreeDoc(doc); |
207 | 9.86k | htmlFreeParserCtxt(ctxt); |
208 | 9.86k | } |
209 | 9.88k | #endif |
210 | | |
211 | | /* Cleanup */ |
212 | | |
213 | 9.88k | #ifdef LIBXML_OUTPUT_ENABLED |
214 | 9.88k | xmlOutputBufferClose(out); |
215 | 9.88k | #endif |
216 | | |
217 | 9.88k | xmlFuzzInjectFailure(0); |
218 | 9.88k | xmlFuzzDataCleanup(); |
219 | 9.88k | xmlResetLastError(); |
220 | | |
221 | 9.88k | return(0); |
222 | 9.88k | } |
223 | | |
224 | | size_t |
225 | | LLVMFuzzerCustomMutator(char *data, size_t size, size_t maxSize, |
226 | 0 | unsigned seed) { |
227 | 0 | static const xmlFuzzChunkDesc chunks[] = { |
228 | 0 | { 4, XML_FUZZ_PROB_ONE / 10 }, /* opts */ |
229 | 0 | { 4, XML_FUZZ_PROB_ONE / 10 }, /* failurePos */ |
230 | 0 | { 0, 0 } |
231 | 0 | }; |
232 | |
|
233 | 0 | return xmlFuzzMutateChunks(chunks, data, size, maxSize, seed, |
234 | 0 | LLVMFuzzerMutate); |
235 | 0 | } |
236 | | |