Coverage Report

Created: 2026-02-26 06:58

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/libxml2/fuzz/html.c
Line
Count
Source
1
/*
2
 * html.c: a libFuzzer target to test several HTML parser interfaces.
3
 *
4
 * See Copyright for the status of this software.
5
 */
6
7
#include <stdio.h>
8
#include <stdlib.h>
9
#include <string.h>
10
11
#include <libxml/HTMLparser.h>
12
#include <libxml/HTMLtree.h>
13
#include <libxml/catalog.h>
14
#include "fuzz.h"
15
16
int
17
LLVMFuzzerInitialize(int *argc ATTRIBUTE_UNUSED,
18
2
                     char ***argv ATTRIBUTE_UNUSED) {
19
2
    xmlFuzzMemSetup();
20
2
    xmlInitParser();
21
2
#ifdef LIBXML_CATALOG_ENABLED
22
2
    xmlInitializeCatalog();
23
2
    xmlCatalogSetDefaults(XML_CATA_ALLOW_NONE);
24
2
#endif
25
26
2
    return 0;
27
2
}
28
29
int
30
10.1k
LLVMFuzzerTestOneInput(const char *data, size_t size) {
31
10.1k
    xmlParserCtxtPtr ctxt;
32
10.1k
    htmlDocPtr doc;
33
10.1k
    const char *docBuffer;
34
10.1k
    size_t failurePos, docSize, maxChunkSize;
35
10.1k
    int opts, errorCode;
36
10.1k
#ifdef LIBXML_OUTPUT_ENABLED
37
10.1k
    xmlOutputBufferPtr out = NULL;
38
10.1k
#endif
39
40
10.1k
    xmlFuzzDataInit(data, size);
41
10.1k
    opts = (int) xmlFuzzReadInt(4);
42
10.1k
    failurePos = xmlFuzzReadInt(4) % (size + 100);
43
44
10.1k
    maxChunkSize = xmlFuzzReadInt(4) % (size + size / 8 + 1);
45
10.1k
    if (maxChunkSize == 0)
46
1.12k
        maxChunkSize = 1;
47
48
10.1k
    docBuffer = xmlFuzzReadRemaining(&docSize);
49
10.1k
    if (docBuffer == NULL) {
50
0
        xmlFuzzDataCleanup();
51
0
        return(0);
52
0
    }
53
54
    /* Pull parser */
55
56
10.1k
    xmlFuzzInjectFailure(failurePos);
57
10.1k
    ctxt = htmlNewParserCtxt();
58
10.1k
    if (ctxt == NULL) {
59
7
        errorCode = XML_ERR_NO_MEMORY;
60
10.1k
    } else {
61
10.1k
        xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
62
10.1k
        doc = htmlCtxtReadMemory(ctxt, docBuffer, docSize, NULL, NULL, opts);
63
10.1k
        errorCode = ctxt->errNo;
64
10.1k
        xmlFuzzCheckFailureReport("htmlCtxtReadMemory",
65
10.1k
                                  errorCode == XML_ERR_NO_MEMORY,
66
10.1k
                                  errorCode == XML_IO_EIO);
67
68
10.1k
        if (doc != NULL) {
69
8.44k
            xmlDocPtr copy;
70
71
8.44k
#ifdef LIBXML_OUTPUT_ENABLED
72
8.44k
            const xmlChar *content;
73
74
            /*
75
             * Also test the serializer. Call htmlDocContentDumpOutput with our
76
             * own buffer to avoid encoding the output. The HTML encoding is
77
             * excruciatingly slow (see htmlEntityValueLookup).
78
             */
79
8.44k
            out = xmlAllocOutputBuffer(NULL);
80
8.44k
            htmlDocContentDumpOutput(out, doc, NULL);
81
8.44k
            content = xmlOutputBufferGetContent(out);
82
8.44k
            xmlFuzzCheckFailureReport("htmlDocContentDumpOutput",
83
8.44k
                                      content == NULL, 0);
84
8.44k
            if (content == NULL) {
85
180
                xmlOutputBufferClose(out);
86
180
                out = NULL;
87
180
            }
88
8.44k
#endif
89
90
8.44k
            copy = xmlCopyDoc(doc, 1);
91
8.44k
            xmlFuzzCheckFailureReport("xmlCopyNode", copy == NULL, 0);
92
8.44k
            xmlFreeDoc(copy);
93
94
8.44k
            xmlFreeDoc(doc);
95
8.44k
        }
96
97
10.1k
        htmlFreeParserCtxt(ctxt);
98
10.1k
    }
99
100
101
    /* Push parser */
102
103
10.1k
#ifdef LIBXML_PUSH_ENABLED
104
10.1k
    xmlFuzzInjectFailure(failurePos);
105
10.1k
    ctxt = htmlCreatePushParserCtxt(NULL, NULL, NULL, 0, NULL,
106
10.1k
                                    XML_CHAR_ENCODING_NONE);
107
108
10.1k
    if (ctxt != NULL) {
109
10.1k
        size_t consumed;
110
10.1k
        int errorCodePush, numChunks, maxChunks;
111
112
10.1k
        xmlCtxtSetErrorHandler(ctxt, xmlFuzzSErrorFunc, NULL);
113
10.1k
        htmlCtxtUseOptions(ctxt, opts);
114
115
10.1k
        consumed = 0;
116
10.1k
        numChunks = 0;
117
10.1k
        maxChunks = 50 + docSize / 100;
118
482k
        while (numChunks == 0 ||
119
472k
               (consumed < docSize && numChunks < maxChunks)) {
120
472k
            size_t chunkSize;
121
472k
            int terminate;
122
123
472k
            numChunks += 1;
124
472k
            chunkSize = docSize - consumed;
125
126
472k
            if (numChunks < maxChunks && chunkSize > maxChunkSize) {
127
462k
                chunkSize = maxChunkSize;
128
462k
                terminate = 0;
129
462k
            } else {
130
10.1k
                terminate = 1;
131
10.1k
            }
132
133
472k
            htmlParseChunk(ctxt, docBuffer + consumed, chunkSize, terminate);
134
472k
            consumed += chunkSize;
135
472k
        }
136
137
10.1k
        errorCodePush = ctxt->errNo;
138
10.1k
        xmlFuzzCheckFailureReport("htmlParseChunk",
139
10.1k
                                  errorCodePush == XML_ERR_NO_MEMORY,
140
10.1k
                                  errorCodePush == XML_IO_EIO);
141
10.1k
        doc = ctxt->myDoc;
142
143
        /*
144
         * Push and pull parser differ in when exactly they
145
         * stop parsing, and the error code is the *last* error
146
         * reported, so we can't check whether the codes match.
147
         */
148
10.1k
        if (errorCode != XML_ERR_NO_MEMORY &&
149
8.44k
            errorCode != XML_IO_EIO &&
150
8.44k
            errorCodePush != XML_ERR_NO_MEMORY &&
151
8.43k
            errorCodePush != XML_IO_EIO &&
152
8.43k
            (errorCode == XML_ERR_OK) != (errorCodePush == XML_ERR_OK)) {
153
0
            fprintf(stderr, "pull/push parser error mismatch: %d != %d\n",
154
0
                    errorCode, errorCodePush);
155
#if 0
156
            FILE *f = fopen("c.html", "wb");
157
            fwrite(docBuffer, docSize, 1, f);
158
            fclose(f);
159
            fprintf(stderr, "opts: %X\n", opts);
160
#endif
161
0
            abort();
162
0
        }
163
164
10.1k
#ifdef LIBXML_OUTPUT_ENABLED
165
        /*
166
         * Verify that pull and push parser produce the same result.
167
         *
168
         * The NOBLANKS option doesn't work reliably in push mode.
169
         */
170
10.1k
        if ((opts & XML_PARSE_NOBLANKS) == 0 &&
171
6.80k
            errorCode == XML_ERR_OK &&
172
4.40k
            errorCodePush == XML_ERR_OK &&
173
4.40k
            out != NULL) {
174
4.32k
            xmlOutputBufferPtr outPush;
175
4.32k
            const xmlChar *content, *contentPush;
176
177
4.32k
            outPush = xmlAllocOutputBuffer(NULL);
178
4.32k
            htmlDocContentDumpOutput(outPush, doc, NULL);
179
4.32k
            content = xmlOutputBufferGetContent(out);
180
4.32k
            contentPush = xmlOutputBufferGetContent(outPush);
181
182
4.32k
            if (content != NULL && contentPush != NULL) {
183
4.31k
                size_t outSize = xmlOutputBufferGetSize(out);
184
185
4.31k
                if (outSize != xmlOutputBufferGetSize(outPush) ||
186
4.31k
                    memcmp(content, contentPush, outSize) != 0) {
187
0
                    fprintf(stderr, "pull/push parser roundtrip "
188
0
                            "mismatch\n");
189
#if 0
190
                    FILE *f = fopen("c.html", "wb");
191
                    fwrite(docBuffer, docSize, 1, f);
192
                    fclose(f);
193
                    fprintf(stderr, "opts: %X\n", opts);
194
                    fprintf(stderr, "---\n%s\n---\n%s\n---\n",
195
                            xmlOutputBufferGetContent(out),
196
                            xmlOutputBufferGetContent(outPush));
197
#endif
198
0
                    abort();
199
0
                }
200
4.31k
            }
201
202
4.32k
            xmlOutputBufferClose(outPush);
203
4.32k
        }
204
10.1k
#endif
205
206
10.1k
        xmlFreeDoc(doc);
207
10.1k
        htmlFreeParserCtxt(ctxt);
208
10.1k
    }
209
10.1k
#endif
210
211
    /* Cleanup */
212
213
10.1k
#ifdef LIBXML_OUTPUT_ENABLED
214
10.1k
    xmlOutputBufferClose(out);
215
10.1k
#endif
216
217
10.1k
    xmlFuzzInjectFailure(0);
218
10.1k
    xmlFuzzDataCleanup();
219
10.1k
    xmlResetLastError();
220
221
10.1k
    return(0);
222
10.1k
}
223
224
size_t
225
LLVMFuzzerCustomMutator(char *data, size_t size, size_t maxSize,
226
0
                        unsigned seed) {
227
0
    static const xmlFuzzChunkDesc chunks[] = {
228
0
        { 4, XML_FUZZ_PROB_ONE / 10 }, /* opts */
229
0
        { 4, XML_FUZZ_PROB_ONE / 10 }, /* failurePos */
230
0
        { 0, 0 }
231
0
    };
232
233
0
    return xmlFuzzMutateChunks(chunks, data, size, maxSize, seed,
234
0
                               LLVMFuzzerMutate);
235
0
}
236