Coverage Report

Created: 2026-03-31 06:16

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/expat/expat/fuzz/xml_lpm_fuzzer.cpp
Line
Count
Source
1
/*
2
                            __  __            _
3
                         ___\ \/ /_ __   __ _| |_
4
                        / _ \\  /| '_ \ / _` | __|
5
                       |  __//  \| |_) | (_| | |_
6
                        \___/_/\_\ .__/ \__,_|\__|
7
                                 |_| XML parser
8
9
   Copyright (c) 2022 Mark Brand <markbrand@google.com>
10
   Copyright (c) 2025 Sebastian Pipping <sebastian@pipping.org>
11
   Licensed under the MIT license:
12
13
   Permission is  hereby granted,  free of charge,  to any  person obtaining
14
   a  copy  of  this  software   and  associated  documentation  files  (the
15
   "Software"),  to  deal in  the  Software  without restriction,  including
16
   without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17
   distribute, sublicense, and/or sell copies of the Software, and to permit
18
   persons  to whom  the Software  is  furnished to  do so,  subject to  the
19
   following conditions:
20
21
   The above copyright  notice and this permission notice  shall be included
22
   in all copies or substantial portions of the Software.
23
24
   THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25
   EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26
   MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27
   NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28
   DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29
   OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30
   USE OR OTHER DEALINGS IN THE SOFTWARE.
31
*/
32
33
#if defined(NDEBUG)
34
#  undef NDEBUG // because checks below rely on assert(...)
35
#endif
36
37
#include <assert.h>
38
#include <stdint.h>
39
#include <vector>
40
41
#include "expat.h"
42
#include "xml_lpm_fuzzer.pb.h"
43
#include "src/libfuzzer/libfuzzer_macro.h"
44
45
static const char *g_encoding = nullptr;
46
static const char *g_external_entity = nullptr;
47
static size_t g_external_entity_size = 0;
48
49
void
50
28.6k
SetEncoding(const xml_lpm_fuzzer::Encoding &e) {
51
28.6k
  switch (e) {
52
14.4k
  case xml_lpm_fuzzer::Encoding::UTF8:
53
14.4k
    g_encoding = "UTF-8";
54
14.4k
    break;
55
56
382
  case xml_lpm_fuzzer::Encoding::UTF16:
57
382
    g_encoding = "UTF-16";
58
382
    break;
59
60
1.04k
  case xml_lpm_fuzzer::Encoding::ISO88591:
61
1.04k
    g_encoding = "ISO-8859-1";
62
1.04k
    break;
63
64
286
  case xml_lpm_fuzzer::Encoding::ASCII:
65
286
    g_encoding = "US-ASCII";
66
286
    break;
67
68
12.4k
  case xml_lpm_fuzzer::Encoding::NONE:
69
12.4k
    g_encoding = NULL;
70
12.4k
    break;
71
72
24
  default:
73
24
    g_encoding = "UNKNOWN";
74
24
    break;
75
28.6k
  }
76
28.6k
}
77
78
static int g_allocation_count = 0;
79
static std::vector<int> g_fail_allocations = {};
80
81
void *
82
3.14M
MallocHook(size_t size) {
83
3.14M
  g_allocation_count += 1;
84
3.14M
  for (auto index : g_fail_allocations) {
85
910k
    if (index == g_allocation_count) {
86
860
      return NULL;
87
860
    }
88
910k
  }
89
3.13M
  return malloc(size);
90
3.14M
}
91
92
void *
93
16.9k
ReallocHook(void *ptr, size_t size) {
94
16.9k
  g_allocation_count += 1;
95
16.9k
  for (auto index : g_fail_allocations) {
96
9.04k
    if (index == g_allocation_count) {
97
243
      return NULL;
98
243
    }
99
9.04k
  }
100
16.7k
  return realloc(ptr, size);
101
16.9k
}
102
103
void
104
3.13M
FreeHook(void *ptr) {
105
3.13M
  free(ptr);
106
3.13M
}
107
108
XML_Memory_Handling_Suite memory_handling_suite
109
    = {MallocHook, ReallocHook, FreeHook};
110
111
void InitializeParser(XML_Parser parser);
112
113
// We want a parse function that supports resumption, so that we can cover the
114
// suspend/resume code.
115
enum XML_Status
116
486k
Parse(XML_Parser parser, const char *input, int input_len, int is_final) {
117
486k
  enum XML_Status status = XML_Parse(parser, input, input_len, is_final);
118
489k
  while (status == XML_STATUS_SUSPENDED) {
119
3.30k
    status = XML_ResumeParser(parser);
120
3.30k
  }
121
486k
  return status;
122
486k
}
123
124
// When the fuzzer is compiled with instrumentation such as ASan, then the
125
// accesses in TouchString will fault if they access invalid memory (ie. detect
126
// either a use-after-free or buffer-overflow). By calling TouchString in each
127
// of the callbacks, we can check that the arguments meet the API specifications
128
// in terms of length/null-termination. no_optimize is used to ensure that the
129
// compiler has to emit actual memory reads, instead of removing them.
130
static volatile size_t no_optimize = 0;
131
static void
132
2.83M
TouchString(const XML_Char *ptr, int len = -1) {
133
2.83M
  if (! ptr) {
134
486k
    return;
135
486k
  }
136
137
2.35M
  if (len == -1) {
138
31.1M
    for (XML_Char value = *ptr++; value; value = *ptr++) {
139
29.7M
      no_optimize += value;
140
29.7M
    }
141
1.38M
  } else {
142
30.0M
    for (int i = 0; i < len; ++i) {
143
29.0M
      no_optimize += ptr[i];
144
29.0M
    }
145
964k
  }
146
2.35M
}
147
148
static void
149
7.92k
TouchNodeAndRecurse(XML_Content *content) {
150
7.92k
  switch (content->type) {
151
70
  case XML_CTYPE_EMPTY:
152
364
  case XML_CTYPE_ANY:
153
364
    assert(content->quant == XML_CQUANT_NONE);
154
364
    assert(content->name == NULL);
155
364
    assert(content->numchildren == 0);
156
364
    assert(content->children == NULL);
157
364
    break;
158
159
364
  case XML_CTYPE_MIXED:
160
236
    assert(content->quant == XML_CQUANT_NONE
161
236
           || content->quant == XML_CQUANT_REP);
162
236
    assert(content->name == NULL);
163
959
    for (unsigned int i = 0; i < content->numchildren; ++i) {
164
723
      assert(content->children[i].type == XML_CTYPE_NAME);
165
723
      assert(content->children[i].quant == XML_CQUANT_NONE);
166
723
      assert(content->children[i].numchildren == 0);
167
723
      assert(content->children[i].children == NULL);
168
723
      TouchString(content->children[i].name);
169
723
    }
170
236
    break;
171
172
2.65k
  case XML_CTYPE_NAME:
173
2.65k
    assert((content->quant == XML_CQUANT_NONE)
174
2.65k
           || (content->quant == XML_CQUANT_OPT)
175
2.65k
           || (content->quant == XML_CQUANT_REP)
176
2.65k
           || (content->quant == XML_CQUANT_PLUS));
177
2.65k
    assert(content->numchildren == 0);
178
2.65k
    assert(content->children == NULL);
179
2.65k
    TouchString(content->name);
180
2.65k
    break;
181
182
265
  case XML_CTYPE_CHOICE:
183
4.67k
  case XML_CTYPE_SEQ:
184
4.67k
    assert((content->quant == XML_CQUANT_NONE)
185
4.67k
           || (content->quant == XML_CQUANT_OPT)
186
4.67k
           || (content->quant == XML_CQUANT_REP)
187
4.67k
           || (content->quant == XML_CQUANT_PLUS));
188
4.67k
    assert(content->name == NULL);
189
10.8k
    for (unsigned int i = 0; i < content->numchildren; ++i) {
190
6.21k
      TouchNodeAndRecurse(&content->children[i]);
191
6.21k
    }
192
4.67k
    break;
193
194
0
  default:
195
0
    assert(false);
196
7.92k
  }
197
7.92k
}
198
199
static void XMLCALL
200
1.71k
ElementDeclHandler(void *userData, const XML_Char *name, XML_Content *model) {
201
1.71k
  TouchString(name);
202
1.71k
  TouchNodeAndRecurse(model);
203
1.71k
  XML_FreeContentModel((XML_Parser)userData, model);
204
1.71k
}
205
206
static void XMLCALL
207
AttlistDeclHandler(void *userData, const XML_Char *elname,
208
                   const XML_Char *attname, const XML_Char *atttype,
209
10.5k
                   const XML_Char *dflt, int isrequired) {
210
10.5k
  (void)userData;
211
10.5k
  TouchString(elname);
212
10.5k
  TouchString(attname);
213
10.5k
  TouchString(atttype);
214
10.5k
  TouchString(dflt);
215
10.5k
  (void)isrequired;
216
10.5k
}
217
218
static void XMLCALL
219
XmlDeclHandler(void *userData, const XML_Char *version,
220
2.60k
               const XML_Char *encoding, int standalone) {
221
2.60k
  (void)userData;
222
2.60k
  TouchString(version);
223
2.60k
  TouchString(encoding);
224
2.60k
  (void)standalone;
225
2.60k
}
226
227
static void XMLCALL
228
StartElementHandler(void *userData, const XML_Char *name,
229
753k
                    const XML_Char **atts) {
230
753k
  (void)userData;
231
753k
  TouchString(name);
232
777k
  for (size_t i = 0; atts[i] != NULL; ++i) {
233
24.0k
    TouchString(atts[i]);
234
24.0k
  }
235
753k
}
236
237
static void XMLCALL
238
14.9k
EndElementHandler(void *userData, const XML_Char *name) {
239
14.9k
  (void)userData;
240
14.9k
  TouchString(name);
241
14.9k
}
242
243
static void XMLCALL
244
808k
CharacterDataHandler(void *userData, const XML_Char *s, int len) {
245
808k
  (void)userData;
246
808k
  TouchString(s, len);
247
808k
}
248
249
static void XMLCALL
250
ProcessingInstructionHandler(void *userData, const XML_Char *target,
251
10.8k
                             const XML_Char *data) {
252
10.8k
  (void)userData;
253
10.8k
  TouchString(target);
254
10.8k
  TouchString(data);
255
10.8k
}
256
257
static void XMLCALL
258
11.6k
CommentHandler(void *userData, const XML_Char *data) {
259
11.6k
  TouchString(data);
260
  // Use the comment handler to trigger parser suspend, so that we can get
261
  // coverage of that code.
262
11.6k
  XML_StopParser((XML_Parser)userData, XML_TRUE);
263
11.6k
}
264
265
static void XMLCALL
266
3.86k
StartCdataSectionHandler(void *userData) {
267
3.86k
  (void)userData;
268
3.86k
}
269
270
static void XMLCALL
271
815
EndCdataSectionHandler(void *userData) {
272
815
  (void)userData;
273
815
}
274
275
static void XMLCALL
276
147k
DefaultHandler(void *userData, const XML_Char *s, int len) {
277
147k
  (void)userData;
278
147k
  TouchString(s, len);
279
147k
}
280
281
static void XMLCALL
282
StartDoctypeDeclHandler(void *userData, const XML_Char *doctypeName,
283
                        const XML_Char *sysid, const XML_Char *pubid,
284
131k
                        int has_internal_subset) {
285
131k
  (void)userData;
286
131k
  TouchString(doctypeName);
287
131k
  TouchString(sysid);
288
131k
  TouchString(pubid);
289
131k
  (void)has_internal_subset;
290
131k
}
291
292
static void XMLCALL
293
2.96k
EndDoctypeDeclHandler(void *userData) {
294
2.96k
  (void)userData;
295
2.96k
}
296
297
static void XMLCALL
298
EntityDeclHandler(void *userData, const XML_Char *entityName,
299
                  int is_parameter_entity, const XML_Char *value,
300
                  int value_length, const XML_Char *base,
301
                  const XML_Char *systemId, const XML_Char *publicId,
302
11.7k
                  const XML_Char *notationName) {
303
11.7k
  (void)userData;
304
11.7k
  TouchString(entityName);
305
11.7k
  (void)is_parameter_entity;
306
11.7k
  TouchString(value, value_length);
307
11.7k
  TouchString(base);
308
11.7k
  TouchString(systemId);
309
11.7k
  TouchString(publicId);
310
11.7k
  TouchString(notationName);
311
11.7k
}
312
313
static void XMLCALL
314
NotationDeclHandler(void *userData, const XML_Char *notationName,
315
                    const XML_Char *base, const XML_Char *systemId,
316
888
                    const XML_Char *publicId) {
317
888
  (void)userData;
318
888
  TouchString(notationName);
319
888
  TouchString(base);
320
888
  TouchString(systemId);
321
888
  TouchString(publicId);
322
888
}
323
324
static void XMLCALL
325
StartNamespaceDeclHandler(void *userData, const XML_Char *prefix,
326
8.05k
                          const XML_Char *uri) {
327
8.05k
  (void)userData;
328
8.05k
  TouchString(prefix);
329
8.05k
  TouchString(uri);
330
8.05k
}
331
332
static void XMLCALL
333
3.03k
EndNamespaceDeclHandler(void *userData, const XML_Char *prefix) {
334
3.03k
  (void)userData;
335
3.03k
  TouchString(prefix);
336
3.03k
}
337
338
static int XMLCALL
339
2.74k
NotStandaloneHandler(void *userData) {
340
2.74k
  (void)userData;
341
2.74k
  return XML_STATUS_OK;
342
2.74k
}
343
344
static int XMLCALL
345
ExternalEntityRefHandler(XML_Parser parser, const XML_Char *context,
346
                         const XML_Char *base, const XML_Char *systemId,
347
127k
                         const XML_Char *publicId) {
348
127k
  int rc = XML_STATUS_ERROR;
349
127k
  TouchString(context);
350
127k
  TouchString(base);
351
127k
  TouchString(systemId);
352
127k
  TouchString(publicId);
353
354
127k
  if (g_external_entity) {
355
125k
    XML_Parser ext_parser
356
125k
        = XML_ExternalEntityParserCreate(parser, context, g_encoding);
357
125k
    if (ext_parser != NULL) {
358
125k
      rc = Parse(ext_parser, g_external_entity, g_external_entity_size, 1);
359
125k
      XML_ParserFree(ext_parser);
360
125k
    }
361
125k
  }
362
363
127k
  return rc;
364
127k
}
365
366
static void XMLCALL
367
SkippedEntityHandler(void *userData, const XML_Char *entityName,
368
3.48k
                     int is_parameter_entity) {
369
3.48k
  (void)userData;
370
3.48k
  TouchString(entityName);
371
3.48k
  (void)is_parameter_entity;
372
3.48k
}
373
374
static int XMLCALL
375
UnknownEncodingHandler(void *encodingHandlerData, const XML_Char *name,
376
1.62k
                       XML_Encoding *info) {
377
1.62k
  (void)encodingHandlerData;
378
1.62k
  TouchString(name);
379
1.62k
  (void)info;
380
1.62k
  return XML_STATUS_ERROR;
381
1.62k
}
382
383
void
384
375k
InitializeParser(XML_Parser parser) {
385
375k
  XML_SetUserData(parser, (void *)parser);
386
375k
  XML_SetHashSalt(parser, 0x41414141);
387
375k
  XML_SetParamEntityParsing(parser, XML_PARAM_ENTITY_PARSING_ALWAYS);
388
389
375k
  XML_SetElementDeclHandler(parser, ElementDeclHandler);
390
375k
  XML_SetAttlistDeclHandler(parser, AttlistDeclHandler);
391
375k
  XML_SetXmlDeclHandler(parser, XmlDeclHandler);
392
375k
  XML_SetElementHandler(parser, StartElementHandler, EndElementHandler);
393
375k
  XML_SetCharacterDataHandler(parser, CharacterDataHandler);
394
375k
  XML_SetProcessingInstructionHandler(parser, ProcessingInstructionHandler);
395
375k
  XML_SetCommentHandler(parser, CommentHandler);
396
375k
  XML_SetCdataSectionHandler(parser, StartCdataSectionHandler,
397
375k
                             EndCdataSectionHandler);
398
  // XML_SetDefaultHandler disables entity expansion
399
375k
  XML_SetDefaultHandlerExpand(parser, DefaultHandler);
400
375k
  XML_SetDoctypeDeclHandler(parser, StartDoctypeDeclHandler,
401
375k
                            EndDoctypeDeclHandler);
402
  // Note: This is mutually exclusive with XML_SetUnparsedEntityDeclHandler,
403
  //       and there isn't any significant code change between the two.
404
375k
  XML_SetEntityDeclHandler(parser, EntityDeclHandler);
405
375k
  XML_SetNotationDeclHandler(parser, NotationDeclHandler);
406
375k
  XML_SetNamespaceDeclHandler(parser, StartNamespaceDeclHandler,
407
375k
                              EndNamespaceDeclHandler);
408
375k
  XML_SetNotStandaloneHandler(parser, NotStandaloneHandler);
409
375k
  XML_SetExternalEntityRefHandler(parser, ExternalEntityRefHandler);
410
375k
  XML_SetSkippedEntityHandler(parser, SkippedEntityHandler);
411
375k
  XML_SetUnknownEncodingHandler(parser, UnknownEncodingHandler, (void *)parser);
412
375k
}
413
414
28.6k
DEFINE_TEXT_PROTO_FUZZER(const xml_lpm_fuzzer::Testcase &testcase) {
415
28.6k
  g_external_entity = nullptr;
416
417
28.6k
  if (! testcase.actions_size()) {
418
5
    return;
419
5
  }
420
421
28.6k
  g_allocation_count = 0;
422
28.6k
  g_fail_allocations.clear();
423
31.6k
  for (int i = 0; i < testcase.fail_allocations_size(); ++i) {
424
3.03k
    g_fail_allocations.push_back(testcase.fail_allocations(i));
425
3.03k
  }
426
427
28.6k
  SetEncoding(testcase.encoding());
428
28.6k
  XML_Parser parser
429
28.6k
      = XML_ParserCreate_MM(g_encoding, &memory_handling_suite, "|");
430
28.6k
  InitializeParser(parser);
431
432
910k
  for (int i = 0; i < testcase.actions_size(); ++i) {
433
881k
    const auto &action = testcase.actions(i);
434
881k
    switch (action.action_case()) {
435
303k
    case xml_lpm_fuzzer::Action::kChunk:
436
303k
      if (XML_STATUS_ERROR
437
303k
          == Parse(parser, action.chunk().data(), action.chunk().size(), 0)) {
438
        // Force a reset after parse error.
439
236k
        XML_ParserReset(parser, g_encoding);
440
236k
        InitializeParser(parser);
441
236k
      }
442
303k
      break;
443
444
56.8k
    case xml_lpm_fuzzer::Action::kLastChunk:
445
56.8k
      Parse(parser, action.last_chunk().data(), action.last_chunk().size(), 1);
446
56.8k
      XML_ParserReset(parser, g_encoding);
447
56.8k
      InitializeParser(parser);
448
56.8k
      break;
449
450
53.0k
    case xml_lpm_fuzzer::Action::kReset:
451
53.0k
      XML_ParserReset(parser, g_encoding);
452
53.0k
      InitializeParser(parser);
453
53.0k
      break;
454
455
14.0k
    case xml_lpm_fuzzer::Action::kExternalEntity:
456
14.0k
      g_external_entity = action.external_entity().data();
457
14.0k
      g_external_entity_size = action.external_entity().size();
458
14.0k
      break;
459
460
453k
    default:
461
453k
      break;
462
881k
    }
463
881k
  }
464
465
28.6k
  XML_ParserFree(parser);
466
28.6k
}