/src/qpdf/fuzz/qpdf_pages_fuzzer.cc
Line | Count | Source |
1 | | #include <qpdf/Buffer.hh> |
2 | | #include <qpdf/BufferInputSource.hh> |
3 | | #include <qpdf/Pl_DCT.hh> |
4 | | #include <qpdf/Pl_Discard.hh> |
5 | | #include <qpdf/Pl_Flate.hh> |
6 | | #include <qpdf/Pl_PNGFilter.hh> |
7 | | #include <qpdf/Pl_TIFFPredictor.hh> |
8 | | #include <qpdf/QPDF.hh> |
9 | | #include <qpdf/QPDFAcroFormDocumentHelper.hh> |
10 | | #include <qpdf/QPDFOutlineDocumentHelper.hh> |
11 | | #include <qpdf/QPDFPageDocumentHelper.hh> |
12 | | #include <qpdf/QPDFPageLabelDocumentHelper.hh> |
13 | | #include <qpdf/QPDFPageObjectHelper.hh> |
14 | | #include <qpdf/QUtil.hh> |
15 | | #include <cstdlib> |
16 | | |
17 | | class DiscardContents: public QPDFObjectHandle::ParserCallbacks |
18 | | { |
19 | | public: |
20 | | ~DiscardContents() override = default; |
21 | | void |
22 | | handleObject(QPDFObjectHandle) override |
23 | 2.75M | { |
24 | 2.75M | } |
25 | | void |
26 | | handleEOF() override |
27 | 31.8k | { |
28 | 31.8k | } |
29 | | }; |
30 | | |
31 | | class FuzzHelper |
32 | | { |
33 | | public: |
34 | | FuzzHelper(unsigned char const* data, size_t size); |
35 | | void run(); |
36 | | |
37 | | private: |
38 | | std::shared_ptr<QPDF> getQpdf(); |
39 | | void testPages(); |
40 | | void doChecks(); |
41 | | |
42 | | Buffer input_buffer; |
43 | | Pl_Discard discard; |
44 | | }; |
45 | | |
46 | | FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) : |
47 | | // We do not modify data, so it is safe to remove the const for Buffer |
48 | | input_buffer(const_cast<unsigned char*>(data), size) |
49 | 114k | { |
50 | 114k | } |
51 | | |
52 | | std::shared_ptr<QPDF> |
53 | | FuzzHelper::getQpdf() |
54 | 89.1k | { |
55 | 89.1k | auto is = |
56 | 89.1k | std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer)); |
57 | 89.1k | auto qpdf = QPDF::create(); |
58 | 89.1k | qpdf->setMaxWarnings(200); |
59 | 89.1k | qpdf->processInputSource(is); |
60 | 89.1k | return qpdf; |
61 | 89.1k | } |
62 | | |
63 | | void |
64 | | FuzzHelper::testPages() |
65 | 13.5k | { |
66 | | // Parse all content streams, and exercise some helpers that |
67 | | // operate on pages. |
68 | 13.5k | std::shared_ptr<QPDF> q = getQpdf(); |
69 | 13.5k | QPDFPageDocumentHelper pdh(*q); |
70 | 13.5k | QPDFPageLabelDocumentHelper pldh(*q); |
71 | 13.5k | QPDFOutlineDocumentHelper odh(*q); |
72 | 13.5k | QPDFAcroFormDocumentHelper afdh(*q); |
73 | 13.5k | afdh.generateAppearancesIfNeeded(); |
74 | 13.5k | pdh.flattenAnnotations(); |
75 | 13.5k | DiscardContents discard_contents; |
76 | 13.5k | int pageno = 0; |
77 | 34.1k | for (auto& page: pdh.getAllPages()) { |
78 | 34.1k | ++pageno; |
79 | 34.1k | try { |
80 | 34.1k | page.coalesceContentStreams(); |
81 | 34.1k | page.parseContents(&discard_contents); |
82 | 34.1k | page.getImages(); |
83 | 34.1k | pldh.getLabelForPage(pageno); |
84 | 34.1k | QPDFObjectHandle page_obj(page.getObjectHandle()); |
85 | 34.1k | page_obj.getJSON(JSON::LATEST, true).unparse(); |
86 | 34.1k | odh.getOutlinesForPage(page_obj.getObjGen()); |
87 | | |
88 | 34.1k | for (auto& aoh: afdh.getWidgetAnnotationsForPage(page)) { |
89 | 512 | afdh.getFieldForAnnotation(aoh); |
90 | 512 | } |
91 | 34.1k | } catch (QPDFExc& e) { |
92 | 6.90k | std::cerr << "page " << pageno << ": " << e.what() << std::endl; |
93 | 6.90k | } |
94 | 34.1k | } |
95 | 13.5k | } |
96 | | |
97 | | void |
98 | | FuzzHelper::doChecks() |
99 | 101k | { |
100 | | // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during |
101 | | // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before |
102 | | // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally |
103 | | // occur legitimately and therefore must be allowed during normal operations. |
104 | 101k | Pl_DCT::setMemoryLimit(100'000'000); |
105 | 101k | Pl_DCT::setScanLimit(50); |
106 | | |
107 | 101k | Pl_PNGFilter::setMemoryLimit(1'000'000); |
108 | 101k | Pl_TIFFPredictor::setMemoryLimit(1'000'000); |
109 | 101k | Pl_Flate::setMemoryLimit(1'000'000); |
110 | | |
111 | | // Do not decompress corrupt data. This may cause extended runtime within jpeglib without |
112 | | // exercising additional code paths in qpdf, and potentially causing counterproductive timeouts. |
113 | 101k | Pl_DCT::setThrowOnCorruptData(true); |
114 | | |
115 | | // Get as much coverage as possible in parts of the library that |
116 | | // might benefit from fuzzing. |
117 | 101k | std::cerr << "\ninfo: starting testPages\n"; |
118 | 101k | testPages(); |
119 | 101k | } |
120 | | |
121 | | void |
122 | | FuzzHelper::run() |
123 | 80.5k | { |
124 | | // The goal here is that you should be able to throw anything at |
125 | | // libqpdf and it will respond without any memory errors and never |
126 | | // do anything worse than throwing a QPDFExc or |
127 | | // std::runtime_error. Throwing any other kind of exception, |
128 | | // segfaulting, or having a memory error (when built with |
129 | | // appropriate sanitizers) will all cause abnormal exit. |
130 | 80.5k | try { |
131 | 80.5k | doChecks(); |
132 | 80.5k | } catch (QPDFExc const& e) { |
133 | 47.9k | std::cerr << "QPDFExc: " << e.what() << std::endl; |
134 | 47.9k | } catch (std::runtime_error const& e) { |
135 | 822 | std::cerr << "runtime_error: " << e.what() << std::endl; |
136 | 822 | } |
137 | 80.5k | } |
138 | | |
139 | | extern "C" int |
140 | | LLVMFuzzerTestOneInput(unsigned char const* data, size_t size) |
141 | 114k | { |
142 | 114k | #ifndef _WIN32 |
143 | | // Used by jpeg library to work around false positives in memory |
144 | | // sanitizer. |
145 | 114k | setenv("JSIMD_FORCENONE", "1", 1); |
146 | 114k | #endif |
147 | 114k | FuzzHelper f(data, size); |
148 | 114k | f.run(); |
149 | 114k | return 0; |
150 | 114k | } |