/src/poppler/glib/tests/fuzzing/structured_text_fuzzer.cc
Line | Count | Source |
1 | | // Fuzzer for PDF structured text parser |
2 | | |
3 | | #include <stdint.h> |
4 | | #include <stdlib.h> |
5 | | #include <poppler.h> |
6 | | |
7 | | // Recursive function to exercise structure tree parsing |
8 | | static void exercise_structure_tree(PopplerStructureElementIter *iter) |
9 | 60.1k | { |
10 | 60.1k | if (!iter) { |
11 | 0 | return; |
12 | 0 | } |
13 | | |
14 | 188k | do { |
15 | 188k | PopplerStructureElement *element = poppler_structure_element_iter_get_element(iter); |
16 | 188k | if (!element) { |
17 | 0 | continue; |
18 | 0 | } |
19 | | |
20 | | // Exercise all getter functions |
21 | 188k | (void)poppler_structure_element_get_kind(element); |
22 | | |
23 | | // These return gchar* that should be freed to prevent memory leaks |
24 | 188k | gchar *id = poppler_structure_element_get_id(element); |
25 | 188k | g_free(id); |
26 | | |
27 | 188k | gchar *title = poppler_structure_element_get_title(element); |
28 | 188k | g_free(title); |
29 | | |
30 | 188k | gchar *language = poppler_structure_element_get_language(element); |
31 | 188k | g_free(language); |
32 | | |
33 | 188k | gchar *abbreviation = poppler_structure_element_get_abbreviation(element); |
34 | 188k | g_free(abbreviation); |
35 | | |
36 | 188k | gchar *alt_text = poppler_structure_element_get_alt_text(element); |
37 | 188k | g_free(alt_text); |
38 | | |
39 | 188k | gchar *actual_text = poppler_structure_element_get_actual_text(element); |
40 | 188k | g_free(actual_text); |
41 | | |
42 | | // Check if it's content and test text extraction |
43 | 188k | if (poppler_structure_element_is_content(element)) { |
44 | | // Test non-recursive mode |
45 | 124k | gchar *text = poppler_structure_element_get_text(element, POPPLER_STRUCTURE_GET_TEXT_NONE); |
46 | 124k | g_free(text); |
47 | | |
48 | | // Test recursive mode |
49 | 124k | text = poppler_structure_element_get_text(element, POPPLER_STRUCTURE_GET_TEXT_RECURSIVE); |
50 | 124k | g_free(text); |
51 | 124k | } |
52 | | |
53 | | // Recurse into children |
54 | 188k | PopplerStructureElementIter *child_iter = poppler_structure_element_iter_get_child(iter); |
55 | 188k | if (child_iter) { |
56 | 52.5k | exercise_structure_tree(child_iter); |
57 | 52.5k | poppler_structure_element_iter_free(child_iter); |
58 | 52.5k | } |
59 | | |
60 | 188k | g_object_unref(element); |
61 | | |
62 | 188k | } while (poppler_structure_element_iter_next(iter)); |
63 | 60.1k | } |
64 | | |
65 | | // Fuzzer entry point |
66 | | extern "C" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) |
67 | 8.54k | { |
68 | 8.54k | GError *err = NULL; |
69 | | |
70 | | // Load document from fuzzer input |
71 | 8.54k | GBytes *bytes = g_bytes_new_static(data, size); |
72 | 8.54k | PopplerDocument *doc = poppler_document_new_from_bytes(bytes, NULL, &err); |
73 | 8.54k | g_bytes_unref(bytes); |
74 | | |
75 | 8.54k | if (doc == NULL) { |
76 | 439 | if (err) { |
77 | 439 | g_error_free(err); |
78 | 439 | } |
79 | 439 | return 0; |
80 | 439 | } |
81 | | |
82 | | // Try to create iterator - if it fails, no structure tree exists |
83 | 8.10k | PopplerStructureElementIter *root_iter = poppler_structure_element_iter_new(doc); |
84 | 8.10k | if (root_iter) { |
85 | 7.61k | exercise_structure_tree(root_iter); |
86 | 7.61k | poppler_structure_element_iter_free(root_iter); |
87 | 7.61k | } |
88 | | |
89 | 8.10k | g_object_unref(doc); |
90 | 8.10k | return 0; |
91 | 8.54k | } |