Coverage Report

Created: 2025-07-18 06:55

/src/libxml2/fuzz/html.c
Line
Count
Source (jump to first uncovered line)
1
/*
2
 * html.c: a libFuzzer target to test several HTML parser interfaces.
3
 *
4
 * See Copyright for the status of this software.
5
 */
6
7
#include <stdio.h>
8
#include <stdlib.h>
9
#include <string.h>
10
11
#include <libxml/HTMLparser.h>
12
#include <libxml/HTMLtree.h>
13
#include <libxml/catalog.h>
14
#include "fuzz.h"
15
16
int
17
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
18
2
                     char ***argv ATTRIBUTE_UNUSED) {
19
2
    xmlFuzzMemSetup();
20
2
    xmlInitParser();
21
2
#ifdef LIBXML_CATALOG_ENABLED
22
2
    xmlInitializeCatalog();
23
2
    xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
24
2
#endif
25
26
2
    return 0;
27
2
}
28
29
int
30
9.88k
LLVMFuzzerTestOneInput(const char *data, size_t size) {
31
9.88k
    xmlParserCtxtPtr ctxt;
32
9.88k
    htmlDocPtr doc;
33
9.88k
    const char *docBuffer;
34
9.88k
    size_t failurePos, docSize, maxChunkSize;
35
9.88k
    int opts, errorCode;
36
9.88k
#ifdef LIBXML_OUTPUT_ENABLED
37
9.88k
    xmlOutputBufferPtr out = NULL;
38
9.88k
#endif
39
40
9.88k
    xmlFuzzDataInit(data, size);
41
9.88k
    opts = (int) xmlFuzzReadInt(4);
42
9.88k
    failurePos = xmlFuzzReadInt(4) % (size + 100);
43
44
9.88k
    maxChunkSize = xmlFuzzReadInt(4) % (size + size / 8 + 1);
45
9.88k
    if (maxChunkSize == 0)
46
1.06k
        maxChunkSize = 1;
47
48
9.88k
    docBuffer = xmlFuzzReadRemaining(&docSize);
49
9.88k
    if (docBuffer == NULL) {
50
0
        xmlFuzzDataCleanup();
51
0
        return(0);
52
0
    }
53
54
    /* Pull parser */
55
56
9.88k
    xmlFuzzInjectFailure(failurePos);
57
9.88k
    ctxt = htmlNewParserCtxt();
58
9.88k
    if (ctxt == NULL) {
59
6
        errorCode = XML_ERR_NO_MEMORY;
60
9.87k
    } else {
61
9.87k
        xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
62
9.87k
        doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts);
63
9.87k
        errorCode = ctxt->errNo;
64
9.87k
        xmlFuzzCheckFailureReport("htmlCtxtReadMemory",
65
9.87k
                                  errorCode == XML_ERR_NO_MEMORY,
66
9.87k
                                  errorCode == XML_IO_EIO);
67
68
9.87k
        if (doc != NULL) {
69
8.23k
            xmlDocPtr copy;
70
71
8.23k
#ifdef LIBXML_OUTPUT_ENABLED
72
8.23k
            const xmlChar *content;
73
74
            /*
75
             * Also test the serializer. Call htmlDocContentDumpOutput with our
76
             * own buffer to avoid encoding the output. The HTML encoding is
77
             * excruciatingly slow (see htmlEntityValueLookup).
78
             */
79
8.23k
            out = xmlAllocOutputBuffer(NULL);
80
8.23k
            htmlDocContentDumpOutput(out, doc, NULL);
81
8.23k
            content = xmlOutputBufferGetContent(out);
82
8.23k
            xmlFuzzCheckFailureReport("htmlDocContentDumpOutput",
83
8.23k
                                      content == NULL, 0);
84
8.23k
            if (content == NULL) {
85
162
                xmlOutputBufferClose(out);
86
162
                out = NULL;
87
162
            }
88
8.23k
#endif
89
90
8.23k
            copy = xmlCopyDoc(doc, 1);
91
8.23k
            xmlFuzzCheckFailureReport("xmlCopyNode", copy == NULL, 0);
92
8.23k
            xmlFreeDoc(copy);
93
94
8.23k
            xmlFreeDoc(doc);
95
8.23k
        }
96
97
9.87k
        htmlFreeParserCtxt(ctxt);
98
9.87k
    }
99
100
101
    /* Push parser */
102
103
9.88k
#ifdef LIBXML_PUSH_ENABLED
104
9.88k
    xmlFuzzInjectFailure(failurePos);
105
9.88k
    ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
106
9.88k
                                    XML_CHAR_ENCODING_NONE);
107
108
9.88k
    if (ctxt != NULL) {
109
9.86k
        size_t consumed;
110
9.86k
        int errorCodePush, numChunks, maxChunks;
111
112
9.86k
        xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
113
9.86k
        htmlCtxtUseOptions(ctxt, opts);
114
115
9.86k
        consumed = 0;
116
9.86k
        numChunks = 0;
117
9.86k
        maxChunks = 50 + docSize / 100;
118
486k
        while (numChunks == 0 ||
119
486k
               (consumed < docSize && numChunks < maxChunks)) {
120
476k
            size_t chunkSize;
121
476k
            int terminate;
122
123
476k
            numChunks += 1;
124
476k
            chunkSize = docSize - consumed;
125
126
476k
            if (numChunks < maxChunks && chunkSize > maxChunkSize) {
127
466k
                chunkSize = maxChunkSize;
128
466k
                terminate = 0;
129
466k
            } else {
130
9.86k
                terminate = 1;
131
9.86k
            }
132
133
476k
            htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, terminate);
134
476k
            consumed += chunkSize;
135
476k
        }
136
137
9.86k
        errorCodePush = ctxt->errNo;
138
9.86k
        xmlFuzzCheckFailureReport("htmlParseChunk",
139
9.86k
                                  errorCodePush == XML_ERR_NO_MEMORY,
140
9.86k
                                  errorCodePush == XML_IO_EIO);
141
9.86k
        doc = ctxt->myDoc;
142
143
        /*
144
         * Push and pull parser differ in when exactly they
145
         * stop parsing, and the error code is the *last* error
146
         * reported, so we can't check whether the codes match.
147
         */
148
9.86k
        if (errorCode != XML_ERR_NO_MEMORY &&
149
9.86k
            errorCode != XML_IO_EIO &&
150
9.86k
            errorCodePush != XML_ERR_NO_MEMORY &&
151
9.86k
            errorCodePush != XML_IO_EIO &&
152
9.86k
            (errorCode == XML_ERR_OK) != (errorCodePush == XML_ERR_OK)) {
153
0
            fprintf(stderr, "pull/push parser error mismatch: %d != %d\n",
154
0
                    errorCode, errorCodePush);
155
#if 0
156
            FILE *f = fopen("c.html", "wb");
157
            fwrite(docBuffer, docSize, 1, f);
158
            fclose(f);
159
            fprintf(stderr, "opts: %X\n", opts);
160
#endif
161
0
            abort();
162
0
        }
163
164
9.86k
#ifdef LIBXML_OUTPUT_ENABLED
165
        /*
166
         * Verify that pull and push parser produce the same result.
167
         *
168
         * The NOBLANKS option doesn't work reliably in push mode.
169
         */
170
9.86k
        if ((opts & XML_PARSE_NOBLANKS) == 0 &&
171
9.86k
            errorCode == XML_ERR_OK &&
172
9.86k
            errorCodePush == XML_ERR_OK &&
173
9.86k
            out != NULL) {
174
4.20k
            xmlOutputBufferPtr outPush;
175
4.20k
            const xmlChar *content, *contentPush;
176
177
4.20k
            outPush = xmlAllocOutputBuffer(NULL);
178
4.20k
            htmlDocContentDumpOutput(outPush, doc, NULL);
179
4.20k
            content = xmlOutputBufferGetContent(out);
180
4.20k
            contentPush = xmlOutputBufferGetContent(outPush);
181
182
4.20k
            if (content != NULL && contentPush != NULL) {
183
4.20k
                size_t outSize = xmlOutputBufferGetSize(out);
184
185
4.20k
                if (outSize != xmlOutputBufferGetSize(outPush) ||
186
4.20k
                    memcmp(content, contentPush, outSize) != 0) {
187
0
                    fprintf(stderr, "pull/push parser roundtrip "
188
0
                            "mismatch\n");
189
#if 0
190
                    FILE *f = fopen("c.html", "wb");
191
                    fwrite(docBuffer, docSize, 1, f);
192
                    fclose(f);
193
                    fprintf(stderr, "opts: %X\n", opts);
194
                    fprintf(stderr, "---\n%s\n---\n%s\n---\n",
195
                            xmlOutputBufferGetContent(out),
196
                            xmlOutputBufferGetContent(outPush));
197
#endif
198
0
                    abort();
199
0
                }
200
4.20k
            }
201
202
4.20k
            xmlOutputBufferClose(outPush);
203
4.20k
        }
204
9.86k
#endif
205
206
9.86k
        xmlFreeDoc(doc);
207
9.86k
        htmlFreeParserCtxt(ctxt);
208
9.86k
    }
209
9.88k
#endif
210
211
    /* Cleanup */
212
213
9.88k
#ifdef LIBXML_OUTPUT_ENABLED
214
9.88k
    xmlOutputBufferClose(out);
215
9.88k
#endif
216
217
9.88k
    xmlFuzzInjectFailure(0);
218
9.88k
    xmlFuzzDataCleanup();
219
9.88k
    xmlResetLastError();
220
221
9.88k
    return(0);
222
9.88k
}
223
224
size_t
225
LLVMFuzzerCustomMutator(char *data, size_t size, size_t maxSize,
226
0
                        unsigned seed) {
227
0
    static const xmlFuzzChunkDesc chunks[] = {
228
0
        { 4, XML_FUZZ_PROB_ONE / 10 }, /* opts */
229
0
        { 4, XML_FUZZ_PROB_ONE / 10 }, /* failurePos */
230
0
        { 0, 0 }
231
0
    };
232
233
0
    return xmlFuzzMutateChunks(chunks, data, size, maxSize, seed,
234
0
                               LLVMFuzzerMutate);
235
0
}
236