Coverage Report

Created: 2024-08-17 06:45

/src/libxslt/tests/fuzz/fuzz.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * fuzz.c: Fuzz targets for libxslt
3
 *
4
 * See Copyright for the status of this software.
5
 */
6
7
#include <stdlib.h>
8
#include <stdio.h>
9
#include <string.h>
10
11
#include "fuzz.h"
12
13
#include <libxml/tree.h>
14
#include <libxml/parser.h>
15
#include <libxml/xpath.h>
16
#include <libxml/xpathInternals.h>
17
#include <libxslt/extensions.h>
18
#include <libxslt/functions.h>
19
#include <libxslt/security.h>
20
#include <libxslt/transform.h>
21
#include <libxslt/xslt.h>
22
#include <libxslt/xsltInternals.h>
23
#include <libxslt/xsltutils.h>
24
#include <libexslt/exslt.h>
25
26
#if defined(_WIN32)
27
  #define DIR_SEP '\\'
28
#else
29
12
  #define DIR_SEP '/'
30
#endif
31
32
static xmlDocPtr doc;
33
static xsltSecurityPrefsPtr sec;
34
static xsltTransformContextPtr tctxt;
35
static xmlHashTablePtr saxonExtHash;
36
37
static void
38
332k
xsltFuzzXmlErrorFunc(void *vctxt, const char *msg ATTRIBUTE_UNUSED, ...) {
39
332k
    xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr) vctxt;
40
    /*
41
     * Stopping the parser should be slightly faster and might catch some
42
     * issues related to recent libxml2 changes.
43
     */
44
332k
    xmlStopParser(ctxt);
45
332k
}
46
47
static void
48
xsltFuzzXsltErrorFunc(void *vctxt ATTRIBUTE_UNUSED,
49
340k
                      const char *msg ATTRIBUTE_UNUSED, ...) {
50
340k
}
51
52
static void
53
12
xsltFuzzInit(void) {
54
    /* Init libxml2, libxslt and libexslt */
55
12
    xmlInitParser();
56
12
    xsltInit();
57
12
    exsltRegisterAll();
58
59
    /* Suppress error messages */
60
12
    xmlSetGenericErrorFunc(NULL, xsltFuzzXmlErrorFunc);
61
12
    xsltSetGenericErrorFunc(NULL, xsltFuzzXsltErrorFunc);
62
63
    /* Disallow I/O */
64
12
    sec = xsltNewSecurityPrefs();
65
12
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_FILE, xsltSecurityForbid);
66
12
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_FILE, xsltSecurityForbid);
67
12
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_CREATE_DIRECTORY, xsltSecurityForbid);
68
12
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_READ_NETWORK, xsltSecurityForbid);
69
12
    xsltSetSecurityPrefs(sec, XSLT_SECPREF_WRITE_NETWORK, xsltSecurityForbid);
70
12
}
71
72
static xmlDocPtr
73
12
xsltFuzzLoadDoc(const char *argv0, const char *dir, const char *filename) {
74
12
    char *path;
75
76
12
    if (dir != NULL) {
77
0
        path = malloc(strlen(dir) + 1 + strlen(filename) + 1);
78
0
        sprintf(path, "%s/%s", dir, filename);
79
0
        doc = xmlReadFile(path, NULL, 0);
80
0
        if (doc == NULL)
81
0
            fprintf(stderr, "Error: unable to parse file '%s' in '%s'\n",
82
0
                    filename, dir);
83
12
    } else {
84
12
        const char *end;
85
12
        size_t dirLen;
86
87
12
        end = strrchr(argv0, DIR_SEP);
88
12
        dirLen = (end == NULL) ? 0 : end - argv0 + 1;
89
12
        path = malloc(dirLen + strlen(filename) + 1);
90
12
        memcpy(path, argv0, dirLen);
91
12
        path[dirLen] = '\0';
92
12
        strcat(path, filename);
93
12
        doc = xmlReadFile(path, NULL, 0);
94
95
12
        if (doc == NULL && dirLen > 0) {
96
            /* Binary might be in .libs, try parent directory */
97
0
            path[dirLen-1] = 0;
98
0
            end = strrchr(path, DIR_SEP);
99
0
            dirLen = (end == NULL) ? 0 : end - path + 1;
100
0
            path[dirLen] = '\0';
101
0
            strcat(path, filename);
102
0
            doc = xmlReadFile(path, NULL, 0);
103
0
        }
104
105
12
        if (doc == NULL)
106
0
            fprintf(stderr, "Error: unable to parse file '%s'\n", filename);
107
12
    }
108
109
12
    free(path);
110
111
12
    return doc;
112
12
}
113
114
/* XPath fuzzer
115
 *
116
 * This fuzz target parses and evaluates XPath expressions in an (E)XSLT
117
 * context using a static XML document. It heavily exercises the libxml2
118
 * XPath engine (xpath.c), a few other parts of libxml2, and most of
119
 * libexslt.
120
 *
121
 * Some EXSLT functions need the transform context to create RVTs for
122
 * node-sets. A couple of functions also access the stylesheet. The
123
 * XPath context from the transform context is used to parse and
124
 * evaluate expressions.
125
 *
126
 * All these objects are created once at startup. After fuzzing each input,
127
 * they're reset as cheaply as possible.
128
 *
129
 * TODO
130
 *
131
 * - Some expressions can create lots of temporary node sets (RVTs) which
132
 *   aren't freed until the whole expression was evaluated, leading to
133
 *   extensive memory usage. Cleaning them up earlier would require
134
 *   callbacks from the XPath engine, for example after evaluating a
135
 *   predicate expression, which doesn't seem feasible. Terminating the
136
 *   evaluation after creating a certain number of RVTs is a simple
137
 *   workaround.
138
 * - Register a custom xsl:decimal-format declaration for format-number().
139
 * - Some functions add strings to the stylesheet or transform context
140
 *   dictionary, for example via xsltGetQName, requiring a clean up of the
141
 *   dicts after fuzzing each input. This behavior seems questionable.
142
 *   Extension functions shouldn't needlessly modify the transform context
143
 *   or stylesheet.
144
 * - Register xsl:keys and fuzz the key() function.
145
 * - Add a few custom func:functions.
146
 * - Fuzz the document() function with external documents.
147
 */
148
149
int
150
xsltFuzzXPathInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
151
12
                  const char *dir) {
152
12
    const char *xmlFilename = "xpath.xml";
153
12
    xsltStylesheetPtr style;
154
12
    xmlXPathContextPtr xpctxt;
155
156
12
    xsltFuzzInit();
157
158
    /* Load XML document */
159
12
    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
160
12
    if (doc == NULL)
161
0
        return -1;
162
163
12
    style = xsltNewStylesheet();
164
12
    tctxt = xsltNewTransformContext(style, doc);
165
12
    xsltSetCtxtSecurityPrefs(sec, tctxt);
166
167
    /*
168
     * Some extension functions need the current instruction.
169
     *
170
     * - format-number() for namespaces.
171
     * - document() for the base URL.
172
     * - maybe others?
173
     *
174
     * For fuzzing, it's enough to use the source document's root element.
175
     */
176
12
    tctxt->inst = xmlDocGetRootElement(doc);
177
178
12
    saxonExtHash = (xmlHashTablePtr)
179
12
        xsltStyleGetExtData(style, SAXON_NAMESPACE);
180
181
    /* Set up XPath context */
182
12
    xpctxt = tctxt->xpathCtxt;
183
184
    /* Resource limits to avoid timeouts and call stack overflows */
185
12
    xpctxt->opLimit = 500000;
186
187
    /* Test namespaces used in xpath.xml */
188
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "a", BAD_CAST "a");
189
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "b", BAD_CAST "b");
190
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "c", BAD_CAST "c");
191
192
    /* EXSLT namespaces */
193
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "crypto", EXSLT_CRYPTO_NAMESPACE);
194
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "date", EXSLT_DATE_NAMESPACE);
195
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "dyn", EXSLT_DYNAMIC_NAMESPACE);
196
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "exsl", EXSLT_COMMON_NAMESPACE);
197
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "math", EXSLT_MATH_NAMESPACE);
198
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "saxon", SAXON_NAMESPACE);
199
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "set", EXSLT_SETS_NAMESPACE);
200
12
    xmlXPathRegisterNs(xpctxt, BAD_CAST "str", EXSLT_STRINGS_NAMESPACE);
201
202
    /* Register variables */
203
12
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "f", xmlXPathNewFloat(-1.5));
204
12
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "b", xmlXPathNewBoolean(1));
205
12
    xmlXPathRegisterVariable(xpctxt, BAD_CAST "s",
206
12
                             xmlXPathNewString(BAD_CAST "var"));
207
12
    xmlXPathRegisterVariable(
208
12
            xpctxt, BAD_CAST "n",
209
12
            xmlXPathEval(BAD_CAST "//node() | /*/*/namespace::*", xpctxt));
210
211
12
    return 0;
212
12
}
213
214
xmlXPathObjectPtr
215
8.61k
xsltFuzzXPath(const char *data, size_t size) {
216
8.61k
    xmlXPathContextPtr xpctxt = tctxt->xpathCtxt;
217
8.61k
    xmlChar *xpathExpr;
218
219
    /* Null-terminate */
220
8.61k
    xpathExpr = malloc(size + 1);
221
8.61k
    memcpy(xpathExpr, data, size);
222
8.61k
    xpathExpr[size] = 0;
223
224
    /* Compile and return early if the expression is invalid */
225
8.61k
    xmlXPathCompExprPtr compExpr = xmlXPathCtxtCompile(xpctxt, xpathExpr);
226
8.61k
    free(xpathExpr);
227
8.61k
    if (compExpr == NULL)
228
2.95k
        return NULL;
229
230
    /* Initialize XPath evaluation context and evaluate */
231
5.65k
    xpctxt->node = (xmlNodePtr) doc; /* Maybe test different context nodes? */
232
5.65k
    xpctxt->contextSize = 1;
233
5.65k
    xpctxt->proximityPosition = 1;
234
5.65k
    xpctxt->opCount = 0;
235
5.65k
    xmlXPathObjectPtr xpathObj = xmlXPathCompiledEval(compExpr, xpctxt);
236
5.65k
    xmlXPathFreeCompExpr(compExpr);
237
238
    /* Clean object cache */
239
5.65k
    xmlXPathContextSetCache(xpctxt, 0, 0, 0);
240
5.65k
    xmlXPathContextSetCache(xpctxt, 1, -1, 0);
241
242
    /* Clean dictionaries */
243
5.65k
    if (xmlDictSize(tctxt->dict) > 0) {
244
915
        xmlDictFree(tctxt->dict);
245
915
        xmlDictFree(tctxt->style->dict);
246
915
        tctxt->style->dict = xmlDictCreate();
247
915
        tctxt->dict = xmlDictCreateSub(tctxt->style->dict);
248
915
    }
249
250
    /* Clean saxon:expression cache */
251
5.65k
    if (xmlHashSize(saxonExtHash) > 0) {
252
        /* There doesn't seem to be a cheaper way with the public API. */
253
0
        xsltShutdownCtxtExts(tctxt);
254
0
        xsltInitCtxtExts(tctxt);
255
0
        saxonExtHash = (xmlHashTablePtr)
256
0
            xsltStyleGetExtData(tctxt->style, SAXON_NAMESPACE);
257
0
    }
258
259
5.65k
    return xpathObj;
260
8.61k
}
261
262
void
263
8.61k
xsltFuzzXPathFreeObject(xmlXPathObjectPtr obj) {
264
8.61k
    xmlXPathFreeObject(obj);
265
266
    /* Some XSLT extension functions create RVTs. */
267
8.61k
    xsltFreeRVTs(tctxt);
268
8.61k
}
269
270
void
271
0
xsltFuzzXPathCleanup(void) {
272
0
    xsltStylesheetPtr style = tctxt->style;
273
274
0
    xmlXPathRegisteredNsCleanup(tctxt->xpathCtxt);
275
0
    xsltFreeSecurityPrefs(sec);
276
0
    sec = NULL;
277
0
    xsltFreeTransformContext(tctxt);
278
0
    tctxt = NULL;
279
0
    xsltFreeStylesheet(style);
280
0
    style = NULL;
281
0
    xmlFreeDoc(doc);
282
0
    doc = NULL;
283
0
}
284
285
/*
286
 * XSLT fuzzer
287
 *
288
 * This is a rather naive fuzz target using a static XML document.
289
 *
290
 * TODO
291
 *
292
 * - Improve seed corpus
293
 * - Mutate multiple input documents: source, xsl:import, xsl:include
294
 * - format-number() with xsl:decimal-format
295
 * - Better coverage for xsl:key and key() function
296
 * - EXSLT func:function
297
 * - xsl:document
298
 */
299
300
int
301
xsltFuzzXsltInit(int *argc_p ATTRIBUTE_UNUSED, char ***argv_p,
302
0
                 const char *dir) {
303
0
    const char *xmlFilename = "xslt.xml";
304
305
0
    xsltFuzzInit();
306
307
    /* Load XML document */
308
0
    doc = xsltFuzzLoadDoc((*argv_p)[0], dir, xmlFilename);
309
0
    if (doc == NULL)
310
0
        return -1;
311
312
0
    return 0;
313
0
}
314
315
xmlChar *
316
0
xsltFuzzXslt(const char *data, size_t size) {
317
0
    xmlDocPtr xsltDoc;
318
0
    xmlDocPtr result;
319
0
    xmlNodePtr xsltRoot;
320
0
    xsltStylesheetPtr sheet;
321
0
    xsltTransformContextPtr ctxt;
322
0
    xmlChar *ret = NULL;
323
0
    int retLen;
324
325
0
    xsltDoc = xmlReadMemory(data, size, NULL, NULL, 0);
326
0
    if (xsltDoc == NULL)
327
0
        return NULL;
328
0
    xsltRoot = xmlDocGetRootElement(xsltDoc);
329
0
    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exsl");
330
0
    xmlNewNs(xsltRoot, EXSLT_COMMON_NAMESPACE, BAD_CAST "exslt");
331
0
    xmlNewNs(xsltRoot, EXSLT_CRYPTO_NAMESPACE, BAD_CAST "crypto");
332
0
    xmlNewNs(xsltRoot, EXSLT_DATE_NAMESPACE, BAD_CAST "date");
333
0
    xmlNewNs(xsltRoot, EXSLT_DYNAMIC_NAMESPACE, BAD_CAST "dyn");
334
0
    xmlNewNs(xsltRoot, EXSLT_MATH_NAMESPACE, BAD_CAST "math");
335
0
    xmlNewNs(xsltRoot, EXSLT_SETS_NAMESPACE, BAD_CAST "set");
336
0
    xmlNewNs(xsltRoot, EXSLT_STRINGS_NAMESPACE, BAD_CAST "str");
337
0
    xmlNewNs(xsltRoot, SAXON_NAMESPACE, BAD_CAST "saxon");
338
339
0
    sheet = xsltNewStylesheet();
340
0
    if (sheet == NULL) {
341
0
        xmlFreeDoc(xsltDoc);
342
0
        return NULL;
343
0
    }
344
0
    sheet->xpathCtxt->opLimit = 100000;
345
0
    sheet->xpathCtxt->opCount = 0;
346
0
    if (xsltParseStylesheetUser(sheet, xsltDoc) != 0) {
347
0
        xsltFreeStylesheet(sheet);
348
0
        xmlFreeDoc(xsltDoc);
349
0
        return NULL;
350
0
    }
351
352
0
    ctxt = xsltNewTransformContext(sheet, doc);
353
0
    xsltSetCtxtSecurityPrefs(sec, ctxt);
354
0
    ctxt->maxTemplateDepth = 100;
355
0
    ctxt->opLimit = 20000;
356
0
    ctxt->xpathCtxt->opLimit = 100000;
357
0
    ctxt->xpathCtxt->opCount = sheet->xpathCtxt->opCount;
358
359
0
    result = xsltApplyStylesheetUser(sheet, doc, NULL, NULL, NULL, ctxt);
360
0
    if (result != NULL)
361
0
        xsltSaveResultToString(&ret, &retLen, result, sheet);
362
363
0
    xmlFreeDoc(result);
364
0
    xsltFreeTransformContext(ctxt);
365
0
    xsltFreeStylesheet(sheet);
366
367
0
    return ret;
368
0
}
369
370
void
371
0
xsltFuzzXsltCleanup(void) {
372
0
    xsltFreeSecurityPrefs(sec);
373
0
    sec = NULL;
374
0
    xmlFreeDoc(doc);
375
0
    doc = NULL;
376
0
}