/src/kdegraphics-mobipocket/lib/decompressor.cpp
Line | Count | Source |
1 | | // SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl> |
2 | | // RLE decompressor based on FBReader |
3 | | // SPDX-FileCopyrightText: 2004-2008 Geometer Plus <contact@geometerplus.com> |
4 | | // Huffdic decompressor based on Python code by Igor Skochinsky |
5 | | // SPDX-License-Identifier: GPL-2.0-or-later |
6 | | |
7 | | #include "decompressor.h" |
8 | | |
9 | | #include "bitreader_p.h" |
10 | | |
11 | | #include <QVector> |
12 | | #include <QtEndian> |
13 | | |
14 | | #include <vector> |
15 | | |
16 | | // clang-format off |
17 | | static const unsigned char TOKEN_CODE[256] = { |
18 | | 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, |
19 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
20 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
21 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
22 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
23 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
24 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
25 | | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
26 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
27 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
28 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
29 | | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, |
30 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
31 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
32 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
33 | | 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, |
34 | | }; |
35 | | // clang-format on |
36 | | |
37 | | namespace Mobipocket |
38 | | { |
39 | | |
40 | | class NOOPDecompressor : public Decompressor |
41 | | { |
42 | | public: |
43 | | NOOPDecompressor() |
44 | 0 | { |
45 | 0 | valid = true; |
46 | 0 | } |
47 | | QByteArray decompress(const QByteArray &data) override |
48 | 0 | { |
49 | 0 | return data; |
50 | 0 | } |
51 | | }; |
52 | | |
53 | | class RLEDecompressor : public Decompressor |
54 | | { |
55 | | public: |
56 | | RLEDecompressor() |
57 | 0 | { |
58 | 0 | valid = true; |
59 | 0 | } |
60 | | QByteArray decompress(const QByteArray &data) override; |
61 | | }; |
62 | | |
63 | | class HuffdicDecompressor : public Decompressor |
64 | | { |
65 | | public: |
66 | | HuffdicDecompressor() = delete; |
67 | | HuffdicDecompressor(const HuffdicDecompressor &) = delete; |
68 | | HuffdicDecompressor(const QVector<QByteArray> &huffData); |
69 | | QByteArray decompress(const QByteArray &data) override; |
70 | | |
71 | | private: |
72 | | bool unpack(std::vector<char> &buf, BitReader reader, int depth) const; |
73 | | const QVector<QByteArray> dicts; |
74 | | quint32 entry_bits; |
75 | | quint32 dict1[256]; |
76 | | quint32 dict2[64]; |
77 | | }; |
78 | | |
79 | | QByteArray RLEDecompressor::decompress(const QByteArray &data) |
80 | 0 | { |
81 | 0 | QByteArray ret; |
82 | 0 | ret.reserve(8192); |
83 | |
|
84 | 0 | int i = 0; |
85 | 0 | int maxIndex = data.size() - 1; |
86 | |
|
87 | 0 | while (i < data.size()) { |
88 | 0 | unsigned char token = data.at(i++); |
89 | 0 | switch (TOKEN_CODE[token]) { |
90 | 0 | case 0: |
91 | 0 | ret.append(token); |
92 | 0 | break; |
93 | 0 | case 1: |
94 | 0 | if ((i + token > maxIndex + 1)) { |
95 | 0 | return ret; |
96 | 0 | } |
97 | 0 | ret.append(data.mid(i, token)); |
98 | 0 | i += token; |
99 | 0 | break; |
100 | 0 | case 2: |
101 | 0 | ret.append(' '); |
102 | 0 | ret.append(token ^ 0x80); |
103 | 0 | break; |
104 | 0 | case 3: |
105 | 0 | { |
106 | 0 | if (i > maxIndex) { |
107 | 0 | return ret; |
108 | 0 | } |
109 | 0 | quint16 N = token << 8; |
110 | 0 | N += (unsigned char)data.at(i++); |
111 | 0 | quint16 copyLength = (N & 7) + 3; |
112 | 0 | quint16 shift = (N & 0x3fff) / 8; |
113 | 0 | if ((shift < 1) || (shift > ret.size())) { |
114 | 0 | return ret; |
115 | 0 | } |
116 | 0 | auto shifted = ret.size() - shift; |
117 | 0 | for (auto j = shifted; j < shifted + copyLength; j++) { |
118 | 0 | ret.append(ret.at(j)); |
119 | 0 | } |
120 | 0 | } |
121 | 0 | break; |
122 | 0 | } |
123 | 0 | } |
124 | 0 | return ret; |
125 | 0 | } |
126 | | |
127 | | HuffdicDecompressor::HuffdicDecompressor(const QVector<QByteArray> &huffData) |
128 | 0 | : dicts(huffData.mid(1)) |
129 | 0 | { |
130 | 0 | if (dicts.empty()) |
131 | 0 | return; |
132 | | |
133 | 0 | if ((dicts[0].size() < 18) || !dicts[0].startsWith("CDIC")) |
134 | 0 | return; |
135 | | |
136 | 0 | const QByteArray &huff1 = huffData[0]; |
137 | 0 | if ((huff1.size() < 24) || !huff1.startsWith("HUFF")) |
138 | 0 | return; |
139 | | |
140 | 0 | quint32 off1 = qFromBigEndian<quint32>(huff1.constData() + 16); |
141 | 0 | quint32 off2 = qFromBigEndian<quint32>(huff1.constData() + 20); |
142 | 0 | if (((off1 + 256 * 4) > huff1.size()) || ((off2 + 64 * 4) > huff1.size())) |
143 | 0 | return; |
144 | | |
145 | 0 | memcpy(dict1, huff1.data() + off1, 256 * 4); |
146 | 0 | memcpy(dict2, huff1.data() + off2, 64 * 4); |
147 | |
|
148 | 0 | entry_bits = qFromBigEndian<quint32>(dicts[0].constData() + 12); |
149 | 0 | if (entry_bits > 32) |
150 | 0 | return; |
151 | | |
152 | 0 | valid = true; |
153 | 0 | } |
154 | | |
155 | | QByteArray HuffdicDecompressor::decompress(const QByteArray &data) |
156 | 0 | { |
157 | 0 | std::vector<char> buf; |
158 | 0 | buf.reserve(4096); |
159 | 0 | if (!unpack(buf, BitReader(data), 0)) { |
160 | 0 | valid = false; |
161 | 0 | } |
162 | 0 | return QByteArray(buf.data(), buf.size()); |
163 | 0 | } |
164 | | |
165 | | bool HuffdicDecompressor::unpack(std::vector<char> &buf, BitReader reader, int depth) const |
166 | 0 | { |
167 | | // These two checks are fairly arbitrary, due to lack of an actual specification |
168 | | // Both exceed typical real world files by far, but are useful to protect against |
169 | | // 'ZIP bomb' style attacks |
170 | 0 | if (depth > 32) { |
171 | 0 | return false; |
172 | 0 | } else if (buf.size() > 16 * 1024 * 1024) { |
173 | 0 | return false; |
174 | 0 | } |
175 | | |
176 | 0 | auto dict_count = dicts.size(); |
177 | 0 | quint32 entry_mask = (quint64(1) << entry_bits) - 1; |
178 | |
|
179 | 0 | while (reader.left()) { |
180 | 0 | quint32 dw = reader.read(); |
181 | 0 | quint32 v = dict1[dw >> 24]; |
182 | 0 | quint8 codelen = v & 0x1F; |
183 | 0 | if (!codelen) |
184 | 0 | return false; |
185 | 0 | quint32 code = dw >> (32 - codelen); |
186 | 0 | quint32 r = (v >> 8); |
187 | 0 | if (!(v & 0x80)) { |
188 | 0 | while (code < dict2[(codelen - 1) * 2]) { |
189 | 0 | codelen++; |
190 | 0 | code = dw >> (32 - codelen); |
191 | 0 | } |
192 | 0 | r = dict2[(codelen - 1) * 2 + 1]; |
193 | 0 | } |
194 | 0 | r -= code; |
195 | 0 | if (!reader.eat(codelen)) |
196 | 0 | return true; |
197 | 0 | quint32 dict_no = quint64(r) >> entry_bits; |
198 | 0 | if (dict_no >= dict_count) { |
199 | 0 | return false; |
200 | 0 | } |
201 | 0 | QByteArrayView dict = dicts.at(dict_no); |
202 | 0 | auto dict_size = dict.size(); |
203 | |
|
204 | 0 | quint32 off1 = 16 + (r & entry_mask) * 2; |
205 | 0 | if (off1 > (dict_size - 2)) { |
206 | 0 | return false; |
207 | 0 | } |
208 | | |
209 | 0 | quint16 off2 = 16 + qFromBigEndian<quint16>(dict.constData() + off1); |
210 | 0 | if (off2 > (dict_size - 2)) { |
211 | 0 | return false; |
212 | 0 | } |
213 | | |
214 | 0 | quint16 blen = qFromBigEndian<quint16>(dict.constData() + off2); |
215 | 0 | if ((blen & 0x7fff) > (dict_size - 2 - off2)) { |
216 | 0 | return false; |
217 | 0 | } |
218 | | |
219 | 0 | auto slice = dict.mid(off2 + 2, (blen & 0x7fff)); |
220 | 0 | if (blen & 0x8000) { |
221 | 0 | buf.insert(buf.end(), slice.begin(), slice.end()); |
222 | 0 | } else { |
223 | 0 | if (!unpack(buf, BitReader(slice), depth + 1)) { |
224 | 0 | return false; |
225 | 0 | } |
226 | 0 | } |
227 | 0 | } |
228 | 0 | return true; |
229 | 0 | } |
230 | | |
231 | | std::unique_ptr<Decompressor> Decompressor::create(quint8 type, const QVector<QByteArray> &auxData) |
232 | 0 | { |
233 | 0 | switch (type) { |
234 | 0 | case 1: |
235 | 0 | return std::make_unique<NOOPDecompressor>(); |
236 | 0 | case 2: |
237 | 0 | return std::make_unique<RLEDecompressor>(); |
238 | 0 | case 'H': |
239 | 0 | return std::make_unique<HuffdicDecompressor>(auxData); |
240 | 0 | default: |
241 | 0 | return nullptr; |
242 | 0 | } |
243 | 0 | } |
244 | | } |