Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * fuzz.c: Common functions for fuzzing. |
3 | | * |
4 | | * See Copyright for the status of this software. |
5 | | */ |
6 | | |
7 | | #include <stdio.h> |
8 | | #include <stdlib.h> |
9 | | #include <string.h> |
10 | | #include <sys/stat.h> |
11 | | |
12 | | #include <libxml/hash.h> |
13 | | #include <libxml/parser.h> |
14 | | #include <libxml/parserInternals.h> |
15 | | #include <libxml/tree.h> |
16 | | #include <libxml/xmlIO.h> |
17 | | #include "fuzz.h" |
18 | | |
19 | | typedef struct { |
20 | | const char *data; |
21 | | size_t size; |
22 | | } xmlFuzzEntityInfo; |
23 | | |
24 | | /* Single static instance for now */ |
25 | | static struct { |
26 | | /* Original data */ |
27 | | const char *data; |
28 | | size_t size; |
29 | | |
30 | | /* Remaining data */ |
31 | | const char *ptr; |
32 | | size_t remaining; |
33 | | |
34 | | /* Buffer for unescaped strings */ |
35 | | char *outBuf; |
36 | | char *outPtr; /* Free space at end of buffer */ |
37 | | |
38 | | xmlHashTablePtr entities; /* Maps URLs to xmlFuzzEntityInfos */ |
39 | | |
40 | | /* The first entity is the main entity. */ |
41 | | const char *mainUrl; |
42 | | xmlFuzzEntityInfo *mainEntity; |
43 | | } fuzzData; |
44 | | |
45 | | /** |
46 | | * xmlFuzzErrorFunc: |
47 | | * |
48 | | * An error function that simply discards all errors. |
49 | | */ |
50 | | void |
51 | | xmlFuzzErrorFunc(void *ctx ATTRIBUTE_UNUSED, const char *msg ATTRIBUTE_UNUSED, |
52 | 1.06G | ...) { |
53 | 1.06G | } |
54 | | |
55 | | /** |
56 | | * xmlFuzzDataInit: |
57 | | * |
58 | | * Initialize fuzz data provider. |
59 | | */ |
60 | | void |
61 | 365k | xmlFuzzDataInit(const char *data, size_t size) { |
62 | 365k | fuzzData.data = data; |
63 | 365k | fuzzData.size = size; |
64 | 365k | fuzzData.ptr = data; |
65 | 365k | fuzzData.remaining = size; |
66 | | |
67 | 365k | fuzzData.outBuf = xmlMalloc(size + 1); |
68 | 365k | fuzzData.outPtr = fuzzData.outBuf; |
69 | | |
70 | 365k | fuzzData.entities = xmlHashCreate(8); |
71 | 365k | fuzzData.mainUrl = NULL; |
72 | 365k | fuzzData.mainEntity = NULL; |
73 | 365k | } |
74 | | |
75 | | /** |
76 | | * xmlFuzzDataFree: |
77 | | * |
78 | | * Cleanup fuzz data provider. |
79 | | */ |
80 | | void |
81 | 358k | xmlFuzzDataCleanup(void) { |
82 | 358k | xmlFree(fuzzData.outBuf); |
83 | 358k | xmlHashFree(fuzzData.entities, xmlHashDefaultDeallocator); |
84 | 358k | } |
85 | | |
86 | | /** |
87 | | * xmlFuzzReadInt: |
88 | | * @size: size of string in bytes |
89 | | * |
90 | | * Read an integer from the fuzz data. |
91 | | */ |
92 | | int |
93 | 365k | xmlFuzzReadInt() { |
94 | 365k | int ret; |
95 | | |
96 | 365k | if (fuzzData.remaining < sizeof(int)) |
97 | 17 | return(0); |
98 | 365k | memcpy(&ret, fuzzData.ptr, sizeof(int)); |
99 | 365k | fuzzData.ptr += sizeof(int); |
100 | 365k | fuzzData.remaining -= sizeof(int); |
101 | | |
102 | 365k | return ret; |
103 | 365k | } |
104 | | |
105 | | /** |
106 | | * xmlFuzzReadRemaining: |
107 | | * @size: size of string in bytes |
108 | | * |
109 | | * Read remaining bytes from fuzz data. |
110 | | */ |
111 | | const char * |
112 | 0 | xmlFuzzReadRemaining(size_t *size) { |
113 | 0 | const char *ret = fuzzData.ptr; |
114 | |
|
115 | 0 | *size = fuzzData.remaining; |
116 | 0 | fuzzData.ptr += fuzzData.remaining; |
117 | 0 | fuzzData.remaining = 0; |
118 | |
|
119 | 0 | return(ret); |
120 | 0 | } |
121 | | |
122 | | /* |
123 | | * xmlFuzzWriteString: |
124 | | * @out: output file |
125 | | * @str: string to write |
126 | | * |
127 | | * Write a random-length string to file in a format similar to |
128 | | * FuzzedDataProvider. Backslash followed by newline marks the end of the |
129 | | * string. Two backslashes are used to escape a backslash. |
130 | | */ |
131 | | void |
132 | 0 | xmlFuzzWriteString(FILE *out, const char *str) { |
133 | 0 | for (; *str; str++) { |
134 | 0 | int c = (unsigned char) *str; |
135 | 0 | putc(c, out); |
136 | 0 | if (c == '\\') |
137 | 0 | putc(c, out); |
138 | 0 | } |
139 | 0 | putc('\\', out); |
140 | 0 | putc('\n', out); |
141 | 0 | } |
142 | | |
143 | | /** |
144 | | * xmlFuzzReadString: |
145 | | * @size: size of string in bytes |
146 | | * |
147 | | * Read a random-length string from the fuzz data. |
148 | | * |
149 | | * The format is similar to libFuzzer's FuzzedDataProvider but treats |
150 | | * backslash followed by newline as end of string. This makes the fuzz data |
151 | | * more readable. A backslash character is escaped with another backslash. |
152 | | * |
153 | | * Returns a zero-terminated string or NULL if the fuzz data is exhausted. |
154 | | */ |
155 | | const char * |
156 | 1.81M | xmlFuzzReadString(size_t *size) { |
157 | 1.81M | const char *out = fuzzData.outPtr; |
158 | | |
159 | 2.75G | while (fuzzData.remaining > 0) { |
160 | 2.75G | int c = *fuzzData.ptr++; |
161 | 2.75G | fuzzData.remaining--; |
162 | | |
163 | 2.75G | if ((c == '\\') && (fuzzData.remaining > 0)) { |
164 | 1.84M | int c2 = *fuzzData.ptr; |
165 | | |
166 | 1.84M | if (c2 == '\n') { |
167 | 1.41M | fuzzData.ptr++; |
168 | 1.41M | fuzzData.remaining--; |
169 | 1.41M | *size = fuzzData.outPtr - out; |
170 | 1.41M | *fuzzData.outPtr++ = '\0'; |
171 | 1.41M | return(out); |
172 | 1.41M | } |
173 | 428k | if (c2 == '\\') { |
174 | 283k | fuzzData.ptr++; |
175 | 283k | fuzzData.remaining--; |
176 | 283k | } |
177 | 428k | } |
178 | | |
179 | 2.75G | *fuzzData.outPtr++ = c; |
180 | 2.75G | } |
181 | | |
182 | 407k | if (fuzzData.outPtr > out) { |
183 | 45.1k | *size = fuzzData.outPtr - out; |
184 | 45.1k | *fuzzData.outPtr++ = '\0'; |
185 | 45.1k | return(out); |
186 | 45.1k | } |
187 | | |
188 | 362k | *size = 0; |
189 | 362k | return(NULL); |
190 | 407k | } |
191 | | |
192 | | /** |
193 | | * xmlFuzzReadEntities: |
194 | | * |
195 | | * Read entities like the main XML file, external DTDs, external parsed |
196 | | * entities from fuzz data. |
197 | | */ |
198 | | void |
199 | 362k | xmlFuzzReadEntities(void) { |
200 | 362k | size_t num = 0; |
201 | | |
202 | 1.07M | while (1) { |
203 | 1.07M | const char *url, *entity; |
204 | 1.07M | size_t urlSize, entitySize; |
205 | 1.07M | xmlFuzzEntityInfo *entityInfo; |
206 | | |
207 | 1.07M | url = xmlFuzzReadString(&urlSize); |
208 | 1.07M | if (url == NULL) break; |
209 | | |
210 | 746k | entity = xmlFuzzReadString(&entitySize); |
211 | 746k | if (entity == NULL) break; |
212 | | |
213 | 711k | if (xmlHashLookup(fuzzData.entities, (xmlChar *)url) == NULL) { |
214 | 567k | entityInfo = xmlMalloc(sizeof(xmlFuzzEntityInfo)); |
215 | 567k | if (entityInfo == NULL) |
216 | 0 | break; |
217 | 567k | entityInfo->data = entity; |
218 | 567k | entityInfo->size = entitySize; |
219 | | |
220 | 567k | xmlHashAddEntry(fuzzData.entities, (xmlChar *)url, entityInfo); |
221 | | |
222 | 567k | if (num == 0) { |
223 | 359k | fuzzData.mainUrl = url; |
224 | 359k | fuzzData.mainEntity = entityInfo; |
225 | 359k | } |
226 | | |
227 | 567k | num++; |
228 | 567k | } |
229 | 711k | } |
230 | 362k | } |
231 | | |
232 | | /** |
233 | | * xmlFuzzMainUrl: |
234 | | * |
235 | | * Returns the main URL. |
236 | | */ |
237 | | const char * |
238 | 362k | xmlFuzzMainUrl(void) { |
239 | 362k | return(fuzzData.mainUrl); |
240 | 362k | } |
241 | | |
242 | | /** |
243 | | * xmlFuzzMainEntity: |
244 | | * @size: size of the main entity in bytes |
245 | | * |
246 | | * Returns the main entity. |
247 | | */ |
248 | | const char * |
249 | 362k | xmlFuzzMainEntity(size_t *size) { |
250 | 362k | if (fuzzData.mainEntity == NULL) |
251 | 2.32k | return(NULL); |
252 | 359k | *size = fuzzData.mainEntity->size; |
253 | 359k | return(fuzzData.mainEntity->data); |
254 | 362k | } |
255 | | |
256 | | /** |
257 | | * xmlFuzzEntityLoader: |
258 | | * |
259 | | * The entity loader for fuzz data. |
260 | | */ |
261 | | xmlParserInputPtr |
262 | | xmlFuzzEntityLoader(const char *URL, const char *ID ATTRIBUTE_UNUSED, |
263 | 5.36M | xmlParserCtxtPtr ctxt) { |
264 | 5.36M | xmlParserInputPtr input; |
265 | 5.36M | xmlFuzzEntityInfo *entity; |
266 | | |
267 | 5.36M | if (URL == NULL) |
268 | 4.69k | return(NULL); |
269 | 5.35M | entity = xmlHashLookup(fuzzData.entities, (xmlChar *) URL); |
270 | 5.35M | if (entity == NULL) |
271 | 270k | return(NULL); |
272 | | |
273 | 5.08M | input = xmlNewInputStream(ctxt); |
274 | 5.08M | input->filename = NULL; |
275 | 5.08M | input->buf = xmlParserInputBufferCreateMem(entity->data, entity->size, |
276 | 5.08M | XML_CHAR_ENCODING_NONE); |
277 | 5.08M | if (input->buf == NULL) { |
278 | 0 | xmlFreeInputStream(input); |
279 | 0 | return(NULL); |
280 | 0 | } |
281 | 5.08M | input->base = input->cur = xmlBufContent(input->buf->buffer); |
282 | 5.08M | input->end = input->base + entity->size; |
283 | | |
284 | 5.08M | return input; |
285 | 5.08M | } |
286 | | |
287 | | /** |
288 | | * xmlFuzzExtractStrings: |
289 | | * |
290 | | * Extract C strings from input data. Use exact-size allocations to detect |
291 | | * potential memory errors. |
292 | | */ |
293 | | size_t |
294 | | xmlFuzzExtractStrings(const char *data, size_t size, char **strings, |
295 | 0 | size_t numStrings) { |
296 | 0 | const char *start = data; |
297 | 0 | const char *end = data + size; |
298 | 0 | size_t i = 0, ret; |
299 | |
|
300 | 0 | while (i < numStrings) { |
301 | 0 | size_t strSize = end - start; |
302 | 0 | const char *zero = memchr(start, 0, strSize); |
303 | |
|
304 | 0 | if (zero != NULL) |
305 | 0 | strSize = zero - start; |
306 | |
|
307 | 0 | strings[i] = xmlMalloc(strSize + 1); |
308 | 0 | memcpy(strings[i], start, strSize); |
309 | 0 | strings[i][strSize] = '\0'; |
310 | |
|
311 | 0 | i++; |
312 | 0 | if (zero != NULL) |
313 | 0 | start = zero + 1; |
314 | 0 | else |
315 | 0 | break; |
316 | 0 | } |
317 | |
|
318 | 0 | ret = i; |
319 | |
|
320 | 0 | while (i < numStrings) { |
321 | 0 | strings[i] = NULL; |
322 | 0 | i++; |
323 | 0 | } |
324 | |
|
325 | 0 | return(ret); |
326 | 0 | } |
327 | | |
328 | | char * |
329 | 0 | xmlSlurpFile(const char *path, size_t *sizeRet) { |
330 | 0 | FILE *file; |
331 | 0 | struct stat statbuf; |
332 | 0 | char *data; |
333 | 0 | size_t size; |
334 | |
|
335 | 0 | if ((stat(path, &statbuf) != 0) || (!S_ISREG(statbuf.st_mode))) |
336 | 0 | return(NULL); |
337 | 0 | size = statbuf.st_size; |
338 | 0 | file = fopen(path, "rb"); |
339 | 0 | if (file == NULL) |
340 | 0 | return(NULL); |
341 | 0 | data = xmlMalloc(size + 1); |
342 | 0 | if (data != NULL) { |
343 | 0 | if (fread(data, 1, size, file) != size) { |
344 | 0 | xmlFree(data); |
345 | 0 | data = NULL; |
346 | 0 | } else { |
347 | 0 | data[size] = 0; |
348 | 0 | if (sizeRet != NULL) |
349 | 0 | *sizeRet = size; |
350 | 0 | } |
351 | 0 | } |
352 | 0 | fclose(file); |
353 | |
|
354 | 0 | return(data); |
355 | 0 | } |
356 | | |