/src/qpdf/fuzz/qpdf_pages_fuzzer.cc
Line | Count | Source |
1 | | #include <qpdf/Buffer.hh> |
2 | | #include <qpdf/BufferInputSource.hh> |
3 | | #include <qpdf/Pl_DCT.hh> |
4 | | #include <qpdf/Pl_Discard.hh> |
5 | | #include <qpdf/Pl_Flate.hh> |
6 | | #include <qpdf/Pl_PNGFilter.hh> |
7 | | #include <qpdf/Pl_RunLength.hh> |
8 | | #include <qpdf/Pl_TIFFPredictor.hh> |
9 | | #include <qpdf/QPDF.hh> |
10 | | #include <qpdf/QPDFAcroFormDocumentHelper.hh> |
11 | | #include <qpdf/QPDFOutlineDocumentHelper.hh> |
12 | | #include <qpdf/QPDFPageDocumentHelper.hh> |
13 | | #include <qpdf/QPDFPageLabelDocumentHelper.hh> |
14 | | #include <qpdf/QPDFPageObjectHelper.hh> |
15 | | |
16 | | #include <chrono> |
17 | | #include <cstdlib> |
18 | | #include <iostream> |
19 | | |
20 | | class FuzzHelper |
21 | | { |
22 | | public: |
23 | | FuzzHelper(unsigned char const* data, size_t size); |
24 | | void run(); |
25 | | |
26 | | private: |
27 | | std::shared_ptr<QPDF> getQpdf(); |
28 | | void testPages(); |
29 | | void doChecks(); |
30 | | |
31 | | void |
32 | | info(std::string const& msg, int pageno = 0) const |
33 | 285k | { |
34 | 285k | const std::chrono::duration<double> elapsed{std::chrono::steady_clock::now() - start}; |
35 | | |
36 | 285k | std::cerr << elapsed.count() << " info - " << msg; |
37 | 285k | if (pageno > 0) { |
38 | 39.0k | std::cerr << " page " << pageno; |
39 | 39.0k | } |
40 | 285k | std::cerr << '\n'; |
41 | 285k | } |
42 | | |
43 | | Buffer input_buffer; |
44 | | Pl_Discard discard; |
45 | | const std::chrono::time_point<std::chrono::steady_clock> start; |
46 | | }; |
47 | | |
48 | | FuzzHelper::FuzzHelper(unsigned char const* data, size_t size) : |
49 | | // We do not modify data, so it is safe to remove the const for Buffer |
50 | 300k | input_buffer(const_cast<unsigned char*>(data), size), |
51 | 300k | start(std::chrono::steady_clock::now()) |
52 | 300k | { |
53 | 300k | } |
54 | | |
55 | | std::shared_ptr<QPDF> |
56 | | FuzzHelper::getQpdf() |
57 | 263k | { |
58 | 263k | auto is = |
59 | 263k | std::shared_ptr<InputSource>(new BufferInputSource("fuzz input", &this->input_buffer)); |
60 | 263k | auto qpdf = QPDF::create(); |
61 | 263k | qpdf->setMaxWarnings(200); |
62 | 263k | qpdf->processInputSource(is); |
63 | 263k | return qpdf; |
64 | 263k | } |
65 | | |
66 | | void |
67 | | FuzzHelper::testPages() |
68 | 46.3k | { |
69 | | // Parse all content streams, and exercise some helpers that |
70 | | // operate on pages. |
71 | 46.3k | std::shared_ptr<QPDF> q = getQpdf(); |
72 | 46.3k | info("getQpdf done"); |
73 | 46.3k | QPDFPageDocumentHelper pdh(*q); |
74 | 46.3k | QPDFPageLabelDocumentHelper pldh(*q); |
75 | 46.3k | QPDFOutlineDocumentHelper odh(*q); |
76 | 46.3k | QPDFAcroFormDocumentHelper afdh(*q); |
77 | 46.3k | afdh.generateAppearancesIfNeeded(); |
78 | 46.3k | info("generateAppearancesIfNeeded done"); |
79 | 46.3k | pdh.flattenAnnotations(); |
80 | 46.3k | info("flattenAnnotations done"); |
81 | 46.3k | int pageno = 0; |
82 | 46.3k | for (auto& page: pdh.getAllPages()) { |
83 | 39.0k | ++pageno; |
84 | 39.0k | try { |
85 | 39.0k | info("start page", pageno); |
86 | 39.0k | page.coalesceContentStreams(); |
87 | 39.0k | info("coalesceContentStreams done"); |
88 | 39.0k | page.parseContents(nullptr); |
89 | 39.0k | info("parseContents done"); |
90 | 39.0k | page.getImages(); |
91 | 39.0k | info("getImages done"); |
92 | 39.0k | pldh.getLabelForPage(pageno); |
93 | 39.0k | info("getLabelForPage done"); |
94 | 39.0k | QPDFObjectHandle page_obj(page.getObjectHandle()); |
95 | 39.0k | page_obj.getJSON(JSON::LATEST, true).unparse(); |
96 | 39.0k | info("getJSON done"); |
97 | 39.0k | odh.getOutlinesForPage(page_obj); |
98 | 39.0k | info("getOutlinesForPage done"); |
99 | | |
100 | 39.0k | for (auto& aoh: afdh.getWidgetAnnotationsForPage(page)) { |
101 | 3.12k | afdh.getFieldForAnnotation(aoh); |
102 | 3.12k | } |
103 | 39.0k | } catch (QPDFExc& e) { |
104 | 9.48k | std::cerr << "page " << pageno << ": " << e.what() << '\n'; |
105 | 9.48k | } |
106 | 39.0k | } |
107 | 46.3k | } |
108 | | |
109 | | void |
110 | | FuzzHelper::doChecks() |
111 | 284k | { |
112 | | // Limit the memory used to decompress JPEG files during fuzzing. Excessive memory use during |
113 | | // fuzzing is due to corrupt JPEG data which sometimes cannot be detected before |
114 | | // jpeg_start_decompress is called. During normal use of qpdf very large JPEGs can occasionally |
115 | | // occur legitimately and therefore must be allowed during normal operations. |
116 | 284k | Pl_DCT::setMemoryLimit(100'000'000); |
117 | 284k | Pl_DCT::setScanLimit(50); |
118 | | |
119 | 284k | Pl_PNGFilter::setMemoryLimit(1'000'000); |
120 | 284k | Pl_RunLength::setMemoryLimit(1'000'000); |
121 | 284k | Pl_TIFFPredictor::setMemoryLimit(1'000'000); |
122 | 284k | Pl_Flate::memory_limit(200'000); |
123 | | |
124 | | // Do not decompress corrupt data. This may cause extended runtime within jpeglib without |
125 | | // exercising additional code paths in qpdf, and potentially causing counterproductive timeouts. |
126 | 284k | Pl_DCT::setThrowOnCorruptData(true); |
127 | | |
128 | | // Get as much coverage as possible in parts of the library that |
129 | | // might benefit from fuzzing. |
130 | 284k | std::cerr << "\ninfo: starting testPages\n"; |
131 | 284k | testPages(); |
132 | 284k | } |
133 | | |
134 | | void |
135 | | FuzzHelper::run() |
136 | 263k | { |
137 | | // The goal here is that you should be able to throw anything at |
138 | | // libqpdf and it will respond without any memory errors and never |
139 | | // do anything worse than throwing a QPDFExc or |
140 | | // std::runtime_error. Throwing any other kind of exception, |
141 | | // segfaulting, or having a memory error (when built with |
142 | | // appropriate sanitizers) will all cause abnormal exit. |
143 | 263k | try { |
144 | 263k | doChecks(); |
145 | 263k | } catch (QPDFExc const& e) { |
146 | 174k | std::cerr << "QPDFExc: " << e.what() << '\n'; |
147 | 174k | } catch (std::runtime_error const& e) { |
148 | 650 | std::cerr << "runtime_error: " << e.what() << '\n'; |
149 | 650 | } |
150 | 263k | } |
151 | | |
152 | | extern "C" int |
153 | | LLVMFuzzerTestOneInput(unsigned char const* data, size_t size) |
154 | 300k | { |
155 | 300k | #ifndef _WIN32 |
156 | | // Used by jpeg library to work around false positives in memory |
157 | | // sanitizer. |
158 | 300k | setenv("JSIMD_FORCENONE", "1", 1); |
159 | 300k | #endif |
160 | 300k | FuzzHelper f(data, size); |
161 | 300k | f.run(); |
162 | 300k | return 0; |
163 | 300k | } |