Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * fuzz.c: Common functions for fuzzing. |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | */ |
6 | | |
7 | | #include <stdio.h> |
8 | | #include <stdlib.h> |
9 | | #include <string.h> |
10 | | #include <sys/stat.h> |
11 | | |
12 | | #include <libxml/hash.h> |
13 | | #include <libxml/parser.h> |
14 | | #include <libxml/parserInternals.h> |
15 | | #include <libxml/tree.h> |
16 | | #include <libxml/xmlIO.h> |
17 | | #include "fuzz.h" |
18 | | |
19 | | typedef struct { |
20 | | const char *data; |
21 | | size_t size; |
22 | | } xmlFuzzEntityInfo; |
23 | | |
24 | | /* Single static instance for now */ |
25 | | static struct { |
26 | | /* Original data */ |
27 | | const char *data; |
28 | | size_t size; |
29 | | |
30 | | /* Remaining data */ |
31 | | const char *ptr; |
32 | | size_t remaining; |
33 | | |
34 | | /* Buffer for unescaped strings */ |
35 | | char *outBuf; |
36 | | char *outPtr; /* Free space at end of buffer */ |
37 | | |
38 | | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ |
39 | | |
40 | | /* The first entity is the main entity. */ |
41 | | const char *mainUrl; |
42 | | xmlFuzzEntityInfo *mainEntity; |
43 | | } fuzzData; |
44 | | |
45 | | /** |
46 | | * xmlFuzzErrorFunc: |
47 | | * |
48 | | * An error function that simply discards all errors. |
49 | | */ |
50 | | void |
51 | | xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, |
52 | 694k | ...) { |
53 | 694k | } |
54 | | |
55 | | /** |
56 | | * xmlFuzzDataInit: |
57 | | * |
58 | | * Initialize fuzz data provider. |
59 | | */ |
60 | | void |
61 | 4.44k | xmlFuzzDataInit(const char *data, size_t size) { |
62 | 4.44k | fuzzData.data = data; |
63 | 4.44k | fuzzData.size = size; |
64 | 4.44k | fuzzData.ptr = data; |
65 | 4.44k | fuzzData.remaining = size; |
66 | | |
67 | 4.44k | fuzzData.outBuf = xmlMalloc(size + 1); |
68 | 4.44k | fuzzData.outPtr = fuzzData.outBuf; |
69 | | |
70 | 4.44k | fuzzData.entities = xmlHashCreate(8); |
71 | 4.44k | fuzzData.mainUrl = NULL; |
72 | 4.44k | fuzzData.mainEntity = NULL; |
73 | 4.44k | } |
74 | | |
75 | | /** |
76 | | * xmlFuzzDataFree: |
77 | | * |
78 | | * Cleanup fuzz data provider. |
79 | | */ |
80 | | void |
81 | 4.44k | xmlFuzzDataCleanup(void) { |
82 | 4.44k | xmlFree(fuzzData.outBuf); |
83 | 4.44k | xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator); |
84 | 4.44k | } |
85 | | |
86 | | /** |
87 | | * xmlFuzzReadInt: |
88 | | * @size: size of string in bytes |
89 | | * |
90 | | * Read an integer from the fuzz data. |
91 | | */ |
92 | | int |
93 | 4.44k | xmlFuzzReadInt(void) { |
94 | 4.44k | int ret; |
95 | | |
96 | 4.44k | if (fuzzData.remaining < sizeof(int)) |
97 | 0 | return(0); |
98 | 4.44k | memcpy(&ret, fuzzData.ptr, sizeof(int)); |
99 | 4.44k | fuzzData.ptr += sizeof(int); |
100 | 4.44k | fuzzData.remaining -= sizeof(int); |
101 | | |
102 | 4.44k | return ret; |
103 | 4.44k | } |
104 | | |
105 | | /** |
106 | | * xmlFuzzReadRemaining: |
107 | | * @size: size of string in bytes |
108 | | * |
109 | | * Read remaining bytes from fuzz data. |
110 | | */ |
111 | | const char * |
112 | 0 | xmlFuzzReadRemaining(size_t *size) { |
113 | 0 | const char *ret = fuzzData.ptr; |
114 | |
|
115 | 0 | *size = fuzzData.remaining; |
116 | 0 | fuzzData.ptr += fuzzData.remaining; |
117 | 0 | fuzzData.remaining = 0; |
118 | |
|
119 | 0 | return(ret); |
120 | 0 | } |
121 | | |
122 | | /* |
123 | | * xmlFuzzWriteString: |
124 | | * @out: output file |
125 | | * @str: string to write |
126 | | * |
127 | | * Write a random-length string to file in a format similar to |
128 | | * FuzzedDataProvider. Backslash followed by newline marks the end of the |
129 | | * string. Two backslashes are used to escape a backslash. |
130 | | */ |
131 | | void |
132 | 0 | xmlFuzzWriteString(FILE *out, const char *str) { |
133 | 0 | for (; *str; str++) { |
134 | 0 | int c = (unsigned char) *str; |
135 | 0 | putc(c, out); |
136 | 0 | if (c == '\\') |
137 | 0 | putc(c, out); |
138 | 0 | } |
139 | 0 | putc('\\', out); |
140 | 0 | putc('\n', out); |
141 | 0 | } |
142 | | |
143 | | /** |
144 | | * xmlFuzzReadString: |
145 | | * @size: size of string in bytes |
146 | | * |
147 | | * Read a random-length string from the fuzz data. |
148 | | * |
149 | | * The format is similar to libFuzzer's FuzzedDataProvider but treats |
150 | | * backslash followed by newline as end of string. This makes the fuzz data |
151 | | * more readable. A backslash character is escaped with another backslash. |
152 | | * |
153 | | * Returns a zero-terminated string or NULL if the fuzz data is exhausted. |
154 | | */ |
155 | | const char * |
156 | 18.1k | xmlFuzzReadString(size_t *size) { |
157 | 18.1k | const char *out = fuzzData.outPtr; |
158 | | |
159 | 356M | while (fuzzData.remaining > 0) { |
160 | 356M | int c = *fuzzData.ptr++; |
161 | 356M | fuzzData.remaining--; |
162 | | |
163 | 356M | if ((c == '\\') && (fuzzData.remaining > 0)) { |
164 | 137k | int c2 = *fuzzData.ptr; |
165 | | |
166 | 137k | if (c2 == '\n') { |
167 | 11.5k | fuzzData.ptr++; |
168 | 11.5k | fuzzData.remaining--; |
169 | 11.5k | *size = fuzzData.outPtr - out; |
170 | 11.5k | *fuzzData.outPtr++ = '\0'; |
171 | 11.5k | return(out); |
172 | 11.5k | } |
173 | 126k | if (c2 == '\\') { |
174 | 64.9k | fuzzData.ptr++; |
175 | 64.9k | fuzzData.remaining--; |
176 | 64.9k | } |
177 | 126k | } |
178 | | |
179 | 356M | *fuzzData.outPtr++ = c; |
180 | 356M | } |
181 | | |
182 | 6.62k | if (fuzzData.outPtr > out) { |
183 | 2.17k | *size = fuzzData.outPtr - out; |
184 | 2.17k | *fuzzData.outPtr++ = '\0'; |
185 | 2.17k | return(out); |
186 | 2.17k | } |
187 | | |
188 | 4.44k | *size = 0; |
189 | 4.44k | return(NULL); |
190 | 6.62k | } |
191 | | |
192 | | /** |
193 | | * xmlFuzzReadEntities: |
194 | | * |
195 | | * Read entities like the main XML file, external DTDs, external parsed |
196 | | * entities from fuzz data. |
197 | | */ |
198 | | void |
199 | 4.44k | xmlFuzzReadEntities(void) { |
200 | 4.44k | size_t num = 0; |
201 | | |
202 | 11.1k | while (1) { |
203 | 11.1k | const char *url, *entity; |
204 | 11.1k | size_t urlSize, entitySize; |
205 | 11.1k | xmlFuzzEntityInfo *entityInfo; |
206 | | |
207 | 11.1k | url = xmlFuzzReadString(&urlSize); |
208 | 11.1k | if (url == NULL) break; |
209 | | |
210 | 7.00k | entity = xmlFuzzReadString(&entitySize); |
211 | 7.00k | if (entity == NULL) break; |
212 | | |
213 | 6.75k | if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) { |
214 | 6.64k | entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo)); |
215 | 6.64k | if (entityInfo == NULL) |
216 | 0 | break; |
217 | 6.64k | entityInfo->data = entity; |
218 | 6.64k | entityInfo->size = entitySize; |
219 | | |
220 | 6.64k | xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo); |
221 | | |
222 | 6.64k | if (num == 0) { |
223 | 4.43k | fuzzData.mainUrl = url; |
224 | 4.43k | fuzzData.mainEntity = entityInfo; |
225 | 4.43k | } |
226 | | |
227 | 6.64k | num++; |
228 | 6.64k | } |
229 | 6.75k | } |
230 | 4.44k | } |
231 | | |
232 | | /** |
233 | | * xmlFuzzMainUrl: |
234 | | * |
235 | | * Returns the main URL. |
236 | | */ |
237 | | const char * |
238 | 4.44k | xmlFuzzMainUrl(void) { |
239 | 4.44k | return(fuzzData.mainUrl); |
240 | 4.44k | } |
241 | | |
242 | | /** |
243 | | * xmlFuzzMainEntity: |
244 | | * @size: size of the main entity in bytes |
245 | | * |
246 | | * Returns the main entity. |
247 | | */ |
248 | | const char * |
249 | 4.44k | xmlFuzzMainEntity(size_t *size) { |
250 | 4.44k | if (fuzzData.mainEntity == NULL) |
251 | 12 | return(NULL); |
252 | 4.43k | *size = fuzzData.mainEntity->size; |
253 | 4.43k | return(fuzzData.mainEntity->data); |
254 | 4.44k | } |
255 | | |
256 | | /** |
257 | | * xmlFuzzEntityLoader: |
258 | | * |
259 | | * The entity loader for fuzz data. |
260 | | */ |
261 | | xmlParserInputPtr |
262 | | xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, |
263 | 12.3k | xmlParserCtxtPtr ctxt) { |
264 | 12.3k | xmlParserInputPtr input; |
265 | 12.3k | xmlFuzzEntityInfo *entity; |
266 | | |
267 | 12.3k | if (URL == NULL) |
268 | 18 | return(NULL); |
269 | 12.3k | entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL); |
270 | 12.3k | if (entity == NULL) |
271 | 8.31k | return(NULL); |
272 | | |
273 | 4.05k | input = xmlNewInputStream(ctxt); |
274 | 4.05k | input->filename = NULL; |
275 | 4.05k | input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size, |
276 | 4.05k | XML_CHAR_ENCODING_NONE); |
277 | 4.05k | if (input->buf == NULL) { |
278 | 0 | xmlFreeInputStream(input); |
279 | 0 | return(NULL); |
280 | 0 | } |
281 | 4.05k | input->base = input->cur = xmlBufContent(input->buf->buffer); |
282 | 4.05k | input->end = input->base + entity->size; |
283 | | |
284 | 4.05k | return input; |
285 | 4.05k | } |
286 | | |
287 | | /** |
288 | | * xmlFuzzExtractStrings: |
289 | | * |
290 | | * Extract C strings from input data. Use exact-size allocations to detect |
291 | | * potential memory errors. |
292 | | */ |
293 | | size_t |
294 | | xmlFuzzExtractStrings(const char *data, size_t size, char **strings, |
295 | 0 | size_t numStrings) { |
296 | 0 | const char *start = data; |
297 | 0 | const char *end = data + size; |
298 | 0 | size_t i = 0, ret; |
299 | |
|
300 | 0 | while (i < numStrings) { |
301 | 0 | size_t strSize = end - start; |
302 | 0 | const char *zero = memchr(start, 0, strSize); |
303 | |
|
304 | 0 | if (zero != NULL) |
305 | 0 | strSize = zero - start; |
306 | |
|
307 | 0 | strings[i] = xmlMalloc(strSize + 1); |
308 | 0 | memcpy(strings[i], start, strSize); |
309 | 0 | strings[i][strSize] = '\0'; |
310 | |
|
311 | 0 | i++; |
312 | 0 | if (zero != NULL) |
313 | 0 | start = zero + 1; |
314 | 0 | else |
315 | 0 | break; |
316 | 0 | } |
317 | |
|
318 | 0 | ret = i; |
319 | |
|
320 | 0 | while (i < numStrings) { |
321 | 0 | strings[i] = NULL; |
322 | 0 | i++; |
323 | 0 | } |
324 | |
|
325 | 0 | return(ret); |
326 | 0 | } |
327 | | |
328 | | char * |
329 | 0 | xmlSlurpFile(const char *path, size_t *sizeRet) { |
330 | 0 | FILE *file; |
331 | 0 | struct stat statbuf; |
332 | 0 | char *data; |
333 | 0 | size_t size; |
334 | |
|
335 | 0 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) |
336 | 0 | return(NULL); |
337 | 0 | size = statbuf.st_size; |
338 | 0 | file = fopen(path, "rb"); |
339 | 0 | if (file == NULL) |
340 | 0 | return(NULL); |
341 | 0 | data = xmlMalloc(size + 1); |
342 | 0 | if (data != NULL) { |
343 | 0 | if (fread(data, 1, size, file) != size) { |
344 | 0 | xmlFree(data); |
345 | 0 | data = NULL; |
346 | 0 | } else { |
347 | 0 | data[size] = 0; |
348 | 0 | if (sizeRet != NULL) |
349 | 0 | *sizeRet = size; |
350 | 0 | } |
351 | 0 | } |
352 | 0 | fclose(file); |
353 | |
|
354 | 0 | return(data); |
355 | 0 | } |
356 | | |