/src/libxslt/tests/fuzz/fuzz.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * fuzz.c: Fuzz targets for libxslt |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | */ |
6 | | |
7 | | #include <stdlib.h> |
8 | | #include <stdio.h> |
9 | | #include <string.h> |
10 | | |
11 | | #include "fuzz.h" |
12 | | |
13 | | #include <libxml/tree.h> |
14 | | #include <libxml/parser.h> |
15 | | #include <libxml/xpath.h> |
16 | | #include <libxml/xpathInternals.h> |
17 | | #include <libxslt/extensions.h> |
18 | | #include <libxslt/functions.h> |
19 | | #include <libxslt/security.h> |
20 | | #include <libxslt/transform.h> |
21 | | #include <libxslt/xslt.h> |
22 | | #include <libxslt/xsltInternals.h> |
23 | | #include <libxslt/xsltutils.h> |
24 | | #include <libexslt/exslt.h> |
25 | | |
26 | | #if defined(_WIN32) |
27 | | #define DIR_SEP '\\' |
28 | | #else |
29 | 12 | #define DIR_SEP '/' |
30 | | #endif |
31 | | |
32 | | static xmlDocPtr doc; |
33 | | static xsltSecurityPrefsPtr sec; |
34 | | static xsltTransformContextPtr tctxt; |
35 | | static xmlHashTablePtr saxonExtHash; |
36 | | |
37 | | static void |
38 | 332k | xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) { |
39 | 332k | xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt; |
40 | | /* |
41 | | * Stopping the parser should be slightly faster and might catch some |
42 | | * issues related to recent libxml2 changes. |
43 | | */ |
44 | 332k | xmlStopParser(ctxt); |
45 | 332k | } |
46 | | |
47 | | static void |
48 | | xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED, |
49 | 340k | const char *msg ATTRIBUTE_UNUSED, ...) { |
50 | 340k | } |
51 | | |
52 | | static void |
53 | 12 | xsltFuzzInit(void) { |
54 | | /* Init libxml2, libxslt and libexslt */ |
55 | 12 | xmlInitParser(); |
56 | 12 | xsltInit(); |
57 | 12 | exsltRegisterAll(); |
58 | | |
59 | | /* Suppress error messages */ |
60 | 12 | xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc); |
61 | 12 | xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc); |
62 | | |
63 | | /* Disallow I/O */ |
64 | 12 | sec = xsltNewSecurityPrefs(); |
65 | 12 | xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid); |
66 | 12 | xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid); |
67 | 12 | xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid); |
68 | 12 | xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid); |
69 | 12 | xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid); |
70 | 12 | } |
71 | | |
72 | | static xmlDocPtr |
73 | 12 | xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) { |
74 | 12 | char *path; |
75 | | |
76 | 12 | if (dir != NULL) { |
77 | 0 | path = malloc(strlen(dir) + 1 + strlen(filename) + 1); |
78 | 0 | sprintf(path, "%s/%s", dir, filename); |
79 | 0 | doc = xmlReadFile(path, NULL, 0); |
80 | 0 | if (doc == NULL) |
81 | 0 | fprintf(stderr, "Error: unable to parse file '%s' in '%s'\n", |
82 | 0 | filename, dir); |
83 | 12 | } else { |
84 | 12 | const char *end; |
85 | 12 | size_t dirLen; |
86 | | |
87 | 12 | end = strrchr(argv0, DIR_SEP); |
88 | 12 | dirLen = (end == NULL) ? 0 : end - argv0 + 1; |
89 | 12 | path = malloc(dirLen + strlen(filename) + 1); |
90 | 12 | memcpy(path, argv0, dirLen); |
91 | 12 | path[dirLen] = '\0'; |
92 | 12 | strcat(path, filename); |
93 | 12 | doc = xmlReadFile(path, NULL, 0); |
94 | | |
95 | 12 | if (doc == NULL && dirLen > 0) { |
96 | | /* Binary might be in .libs, try parent directory */ |
97 | 0 | path[dirLen-1] = 0; |
98 | 0 | end = strrchr(path, DIR_SEP); |
99 | 0 | dirLen = (end == NULL) ? 0 : end - path + 1; |
100 | 0 | path[dirLen] = '\0'; |
101 | 0 | strcat(path, filename); |
102 | 0 | doc = xmlReadFile(path, NULL, 0); |
103 | 0 | } |
104 | | |
105 | 12 | if (doc == NULL) |
106 | 0 | fprintf(stderr, "Error: unable to parse file '%s'\n", filename); |
107 | 12 | } |
108 | | |
109 | 12 | free(path); |
110 | | |
111 | 12 | return doc; |
112 | 12 | } |
113 | | |
114 | | /* XPath fuzzer |
115 | | * |
116 | | * This fuzz target parses and evaluates XPath expressions in an (E)XSLT |
117 | | * context using a static XML document. It heavily exercises the libxml2 |
118 | | * XPath engine (xpath.c), a few other parts of libxml2, and most of |
119 | | * libexslt. |
120 | | * |
121 | | * Some EXSLT functions need the transform context to create RVTs for |
122 | | * node-sets. A couple of functions also access the stylesheet. The |
123 | | * XPath context from the transform context is used to parse and |
124 | | * evaluate expressions. |
125 | | * |
126 | | * All these objects are created once at startup. After fuzzing each input, |
127 | | * they're reset as cheaply as possible. |
128 | | * |
129 | | * TODO |
130 | | * |
131 | | * - Some expressions can create lots of temporary node sets (RVTs) which |
132 | | * aren't freed until the whole expression was evaluated, leading to |
133 | | * extensive memory usage. Cleaning them up earlier would require |
134 | | * callbacks from the XPath engine, for example after evaluating a |
135 | | * predicate expression, which doesn't seem feasible. Terminating the |
136 | | * evaluation after creating a certain number of RVTs is a simple |
137 | | * workaround. |
138 | | * - Register a custom xsl:decimal-format declaration for format-number(). |
139 | | * - Some functions add strings to the stylesheet or transform context |
140 | | * dictionary, for example via xsltGetQName, requiring a clean up of the |
141 | | * dicts after fuzzing each input. This behavior seems questionable. |
142 | | * Extension functions shouldn't needlessly modify the transform context |
143 | | * or stylesheet. |
144 | | * - Register xsl:keys and fuzz the key() function. |
145 | | * - Add a few custom func:functions. |
146 | | * - Fuzz the document() function with external documents. |
147 | | */ |
148 | | |
149 | | int |
150 | | xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p, |
151 | 12 | const char *dir) { |
152 | 12 | const char *xmlFilename = "xpath.xml"; |
153 | 12 | xsltStylesheetPtr style; |
154 | 12 | xmlXPathContextPtr xpctxt; |
155 | | |
156 | 12 | xsltFuzzInit(); |
157 | | |
158 | | /* Load XML document */ |
159 | 12 | doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename); |
160 | 12 | if (doc == NULL) |
161 | 0 | return -1; |
162 | | |
163 | 12 | style = xsltNewStylesheet(); |
164 | 12 | tctxt = xsltNewTransformContext(style, doc); |
165 | 12 | xsltSetCtxtSecurityPrefs(sec, tctxt); |
166 | | |
167 | | /* |
168 | | * Some extension functions need the current instruction. |
169 | | * |
170 | | * - format-number() for namespaces. |
171 | | * - document() for the base URL. |
172 | | * - maybe others? |
173 | | * |
174 | | * For fuzzing, it's enough to use the source document's root element. |
175 | | */ |
176 | 12 | tctxt->inst = xmlDocGetRootElement(doc); |
177 | | |
178 | 12 | saxonExtHash = (xmlHashTablePtr) |
179 | 12 | xsltStyleGetExtData(style, SAXON_NAMESPACE); |
180 | | |
181 | | /* Set up XPath context */ |
182 | 12 | xpctxt = tctxt->xpathCtxt; |
183 | | |
184 | | /* Resource limits to avoid timeouts and call stack overflows */ |
185 | 12 | xpctxt->opLimit = 500000; |
186 | | |
187 | | /* Test namespaces used in xpath.xml */ |
188 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a"); |
189 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b"); |
190 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c"); |
191 | | |
192 | | /* EXSLT namespaces */ |
193 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE); |
194 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE); |
195 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE); |
196 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE); |
197 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE); |
198 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE); |
199 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE); |
200 | 12 | xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE); |
201 | | |
202 | | /* Register variables */ |
203 | 12 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5)); |
204 | 12 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1)); |
205 | 12 | xmlXPathRegisterVariable(xpctxt, BAD_CAST "s", |
206 | 12 | xmlXPathNewString(BAD_CAST "var")); |
207 | 12 | xmlXPathRegisterVariable( |
208 | 12 | xpctxt, BAD_CAST "n", |
209 | 12 | xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt)); |
210 | | |
211 | 12 | return 0; |
212 | 12 | } |
213 | | |
214 | | xmlXPathObjectPtr |
215 | 8.61k | xsltFuzzXPath(const char *data, size_t size) { |
216 | 8.61k | xmlXPathContextPtr xpctxt = tctxt->xpathCtxt; |
217 | 8.61k | xmlChar *xpathExpr; |
218 | | |
219 | | /* Null-terminate */ |
220 | 8.61k | xpathExpr = malloc(size + 1); |
221 | 8.61k | memcpy(xpathExpr, data, size); |
222 | 8.61k | xpathExpr[size] = 0; |
223 | | |
224 | | /* Compile and return early if the expression is invalid */ |
225 | 8.61k | xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr); |
226 | 8.61k | free(xpathExpr); |
227 | 8.61k | if (compExpr == NULL) |
228 | 2.95k | return NULL; |
229 | | |
230 | | /* Initialize XPath evaluation context and evaluate */ |
231 | 5.65k | xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */ |
232 | 5.65k | xpctxt->contextSize = 1; |
233 | 5.65k | xpctxt->proximityPosition = 1; |
234 | 5.65k | xpctxt->opCount = 0; |
235 | 5.65k | xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt); |
236 | 5.65k | xmlXPathFreeCompExpr(compExpr); |
237 | | |
238 | | /* Clean object cache */ |
239 | 5.65k | xmlXPathContextSetCache(xpctxt, 0, 0, 0); |
240 | 5.65k | xmlXPathContextSetCache(xpctxt, 1, -1, 0); |
241 | | |
242 | | /* Clean dictionaries */ |
243 | 5.65k | if (xmlDictSize(tctxt->dict) > 0) { |
244 | 915 | xmlDictFree(tctxt->dict); |
245 | 915 | xmlDictFree(tctxt->style->dict); |
246 | 915 | tctxt->style->dict = xmlDictCreate(); |
247 | 915 | tctxt->dict = xmlDictCreateSub(tctxt->style->dict); |
248 | 915 | } |
249 | | |
250 | | /* Clean saxon:expression cache */ |
251 | 5.65k | if (xmlHashSize(saxonExtHash) > 0) { |
252 | | /* There doesn't seem to be a cheaper way with the public API. */ |
253 | 0 | xsltShutdownCtxtExts(tctxt); |
254 | 0 | xsltInitCtxtExts(tctxt); |
255 | 0 | saxonExtHash = (xmlHashTablePtr) |
256 | 0 | xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE); |
257 | 0 | } |
258 | | |
259 | 5.65k | return xpathObj; |
260 | 8.61k | } |
261 | | |
262 | | void |
263 | 8.61k | xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) { |
264 | 8.61k | xmlXPathFreeObject(obj); |
265 | | |
266 | | /* Some XSLT extension functions create RVTs. */ |
267 | 8.61k | xsltFreeRVTs(tctxt); |
268 | 8.61k | } |
269 | | |
270 | | void |
271 | 0 | xsltFuzzXPathCleanup(void) { |
272 | 0 | xsltStylesheetPtr style = tctxt->style; |
273 | |
|
274 | 0 | xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt); |
275 | 0 | xsltFreeSecurityPrefs(sec); |
276 | 0 | sec = NULL; |
277 | 0 | xsltFreeTransformContext(tctxt); |
278 | 0 | tctxt = NULL; |
279 | 0 | xsltFreeStylesheet(style); |
280 | 0 | style = NULL; |
281 | 0 | xmlFreeDoc(doc); |
282 | 0 | doc = NULL; |
283 | 0 | } |
284 | | |
285 | | /* |
286 | | * XSLT fuzzer |
287 | | * |
288 | | * This is a rather naive fuzz target using a static XML document. |
289 | | * |
290 | | * TODO |
291 | | * |
292 | | * - Improve seed corpus |
293 | | * - Mutate multiple input documents: source, xsl:import, xsl:include |
294 | | * - format-number() with xsl:decimal-format |
295 | | * - Better coverage for xsl:key and key() function |
296 | | * - EXSLT func:function |
297 | | * - xsl:document |
298 | | */ |
299 | | |
300 | | int |
301 | | xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p, |
302 | 0 | const char *dir) { |
303 | 0 | const char *xmlFilename = "xslt.xml"; |
304 | |
|
305 | 0 | xsltFuzzInit(); |
306 | | |
307 | | /* Load XML document */ |
308 | 0 | doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename); |
309 | 0 | if (doc == NULL) |
310 | 0 | return -1; |
311 | | |
312 | 0 | return 0; |
313 | 0 | } |
314 | | |
315 | | xmlChar * |
316 | 0 | xsltFuzzXslt(const char *data, size_t size) { |
317 | 0 | xmlDocPtr xsltDoc; |
318 | 0 | xmlDocPtr result; |
319 | 0 | xmlNodePtr xsltRoot; |
320 | 0 | xsltStylesheetPtr sheet; |
321 | 0 | xsltTransformContextPtr ctxt; |
322 | 0 | xmlChar *ret = NULL; |
323 | 0 | int retLen; |
324 | |
|
325 | 0 | xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0); |
326 | 0 | if (xsltDoc == NULL) |
327 | 0 | return NULL; |
328 | 0 | xsltRoot = xmlDocGetRootElement(xsltDoc); |
329 | 0 | xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl"); |
330 | 0 | xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt"); |
331 | 0 | xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto"); |
332 | 0 | xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date"); |
333 | 0 | xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn"); |
334 | 0 | xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math"); |
335 | 0 | xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set"); |
336 | 0 | xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str"); |
337 | 0 | xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon"); |
338 | |
|
339 | 0 | sheet = xsltNewStylesheet(); |
340 | 0 | if (sheet == NULL) { |
341 | 0 | xmlFreeDoc(xsltDoc); |
342 | 0 | return NULL; |
343 | 0 | } |
344 | 0 | sheet->xpathCtxt->opLimit = 100000; |
345 | 0 | sheet->xpathCtxt->opCount = 0; |
346 | 0 | if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) { |
347 | 0 | xsltFreeStylesheet(sheet); |
348 | 0 | xmlFreeDoc(xsltDoc); |
349 | 0 | return NULL; |
350 | 0 | } |
351 | | |
352 | 0 | ctxt = xsltNewTransformContext(sheet, doc); |
353 | 0 | xsltSetCtxtSecurityPrefs(sec, ctxt); |
354 | 0 | ctxt->maxTemplateDepth = 100; |
355 | 0 | ctxt->opLimit = 20000; |
356 | 0 | ctxt->xpathCtxt->opLimit = 100000; |
357 | 0 | ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount; |
358 | |
|
359 | 0 | result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt); |
360 | 0 | if (result != NULL) |
361 | 0 | xsltSaveResultToString(&ret, &retLen, result, sheet); |
362 | |
|
363 | 0 | xmlFreeDoc(result); |
364 | 0 | xsltFreeTransformContext(ctxt); |
365 | 0 | xsltFreeStylesheet(sheet); |
366 | |
|
367 | 0 | return ret; |
368 | 0 | } |
369 | | |
370 | | void |
371 | 0 | xsltFuzzXsltCleanup(void) { |
372 | 0 | xsltFreeSecurityPrefs(sec); |
373 | 0 | sec = NULL; |
374 | 0 | xmlFreeDoc(doc); |
375 | 0 | doc = NULL; |
376 | 0 | } |