Coverage Report

Created: 2026-05-31 06:50

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/kdegraphics-mobipocket/lib/mobipocket.cpp
Line
Count
Source
1
// SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl>
2
// SPDX-License-Identifier: GPL-2.0-or-later
3
4
#include "mobipocket.h"
5
#include "decompressor.h"
6
#include "pdb_p.h"
7
#include "qmobipocket_debug.h"
8
9
#include <QBuffer>
10
#include <QIODevice>
11
#include <QImageReader>
12
#include <QRegularExpression>
13
#include <QStringConverter>
14
#include <QtEndian>
15
16
namespace Mobipocket
17
{
18
19
struct DocumentPrivate {
20
    DocumentPrivate(QIODevice *d)
21
0
        : pdb(d)
22
0
    {
23
0
    }
24
    PDB pdb;
25
    std::unique_ptr<Decompressor> dec;
26
    quint16 ntextrecords = 0;
27
    quint16 maxRecordSize = 0;
28
    bool valid = false;
29
30
    // number of first record holding image. Usually it is directly after end of text, but not always
31
    quint16 firstImageRecord = 0;
32
    QMap<Document::MetaKey, QString> metadata;
33
    QStringDecoder toUtf16;
34
    bool drm = false;
35
    quint32 extraflags = 0;
36
37
    // index of Thumbnail image in image list. May be specified in EXTH.
38
    int thumbnailIndex = -1;
39
    // index of Cover image in image list. May be specified in EXTH.
40
    int coverIndex = -1;
41
42
    void init();
43
    void findFirstImage();
44
    void parseEXTH(QByteArrayView data);
45
    void parseHtmlHead(const QString &data);
46
};
47
48
void DocumentPrivate::parseHtmlHead(const QString &data)
49
0
{
50
0
    static const QRegularExpression title(QLatin1String("<dc:title.*>(.*)</dc:title>"),
51
0
                                          QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
52
0
    static const QRegularExpression author(QLatin1String("<dc:creator.*>(.*)</dc:creator>"),
53
0
                                           QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
54
0
    static const QRegularExpression copyright(QLatin1String("<dc:rights.*>(.*)</dc:rights>"),
55
0
                                              QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
56
0
    static const QRegularExpression subject(QLatin1String("<dc:subject.*>(.*)</dc:subject>"),
57
0
                                            QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
58
0
    static const QRegularExpression description(QLatin1String("<dc:description.*>(.*)</dc:description>"),
59
0
                                                QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
60
61
    // title could have been already taken from MOBI record
62
0
    if (!metadata.contains(Document::Title)) {
63
0
        if (const auto titleMatch = title.match(data); titleMatch.hasMatch())
64
0
            metadata[Document::Title] = titleMatch.captured(1);
65
0
    }
66
0
    if (const auto authorMatch = author.match(data); authorMatch.hasMatch())
67
0
        metadata[Document::Author] = authorMatch.captured(1);
68
0
    if (const auto copyrightMatch = copyright.match(data); copyrightMatch.hasMatch())
69
0
        metadata[Document::Copyright] = copyrightMatch.captured(1);
70
0
    if (const auto subjectMatch = subject.match(data); subjectMatch.hasMatch())
71
0
        metadata[Document::Subject] = subjectMatch.captured(1);
72
0
    if (const auto descriptionMatch = description.match(data); descriptionMatch.hasMatch())
73
0
        metadata[Document::Description] = descriptionMatch.captured(1);
74
0
}
75
76
namespace
77
{
78
    const QVector<QByteArray> getHuffRecords(const PDB &pdb)
79
0
    {
80
0
        const QByteArray header = pdb.getRecord(0);
81
0
        if (header[1] != 'H') {
82
0
            return {};
83
0
        }
84
85
0
        quint32 huff_ofs = qFromBigEndian<quint32>(header.constData() + 0x70);
86
0
        quint32 huff_num = qFromBigEndian<quint32>(header.constData() + 0x74);
87
88
        // Check for overflow and out-of-bounds access
89
0
        if (((huff_ofs + huff_num) < huff_num) || ((huff_ofs + huff_num) > pdb.recordCount())) {
90
0
            return {};
91
0
        }
92
93
0
        QVector<QByteArray> records(huff_num);
94
0
        for (quint32 i = 0; i < huff_num; i++) {
95
0
            if (auto r = pdb.getRecord(huff_ofs + i); r.isNull()) {
96
0
                return {};
97
0
            } else {
98
0
                records[i] = r;
99
0
            }
100
0
        }
101
0
        return records;
102
0
    };
103
}
104
105
void DocumentPrivate::init()
106
0
{
107
0
    quint32 encoding = 0;
108
109
0
    if (!pdb.isValid())
110
0
        return;
111
0
    QByteArray mhead = pdb.getRecord(0);
112
0
    if (mhead.isNull() || mhead.size() < 14)
113
0
        return;
114
115
0
    dec = Decompressor::create(mhead[1], getHuffRecords(pdb));
116
0
    if ((int)mhead[12] != 0 || (int)mhead[13] != 0)
117
0
        drm = true;
118
0
    if (!dec)
119
0
        return;
120
121
0
    ntextrecords = qFromBigEndian<quint16>(mhead.constData() + 8);
122
0
    maxRecordSize = qFromBigEndian<quint16>(mhead.constData() + 10);
123
0
    if (mhead.size() > 31)
124
0
        encoding = qFromBigEndian<quint32>(mhead.constData() + 28);
125
0
    if (encoding == 65001) {
126
0
        toUtf16 = QStringDecoder(QStringDecoder::Utf8);
127
0
    } else {
128
0
        toUtf16 = QStringDecoder("windows-1252");
129
0
        if (!toUtf16.isValid()) {
130
0
            qCWarning(QMOBIPOCKET_LOG) << "Text codec \"windows-1252\" not supported by Qt library, falling back to Latin1";
131
0
            toUtf16 = QStringDecoder(QStringConverter::Latin1);
132
0
        }
133
0
    }
134
135
0
    parseEXTH(mhead);
136
137
0
    if (mhead.size() >= 244) {
138
0
        quint32 exthoffs = qFromBigEndian<quint32>(mhead.constData() + 20);
139
0
        if ((exthoffs + 16) > 244) {
140
0
            extraflags = qFromBigEndian<quint32>(mhead.constData() + 240);
141
0
        }
142
0
    }
143
144
    // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records
145
0
    if (metadata.size() < 2 && !drm)
146
0
        parseHtmlHead(toUtf16(dec->decompress(pdb.getRecord(1))));
147
0
    valid = true;
148
0
}
149
150
void DocumentPrivate::findFirstImage()
151
0
{
152
0
    firstImageRecord = ntextrecords + 1;
153
0
    while (firstImageRecord < pdb.recordCount()) {
154
0
        QByteArray rec = pdb.getRecord(firstImageRecord);
155
0
        if (rec.isNull())
156
0
            return;
157
0
        QBuffer buf(&rec);
158
0
        buf.open(QIODevice::ReadOnly);
159
0
        QImageReader r(&buf);
160
0
        if (r.canRead())
161
0
            return;
162
0
        firstImageRecord++;
163
0
    }
164
0
}
165
166
void DocumentPrivate::parseEXTH(QByteArrayView data)
167
0
{
168
0
    if (data.size() < 92)
169
0
        return;
170
171
    // try to get name
172
0
    qint32 nameoffset = qFromBigEndian<quint32>(data.constData() + 84);
173
0
    qint32 namelen = qFromBigEndian<quint32>(data.constData() + 88);
174
0
    const qint32 ssize = qint32(data.size());
175
0
    if (nameoffset >= 0 && namelen >= 0 && nameoffset <= ssize && namelen <= ssize - nameoffset) {
176
0
        metadata[Document::Title] = toUtf16(data.mid(nameoffset, namelen));
177
0
    }
178
179
0
    const quint32 size = quint32(data.size());
180
0
    quint32 exthoffs = qFromBigEndian<quint32>(data.constData() + 20);
181
0
    if (exthoffs > size - 28) {
182
0
        return;
183
0
    }
184
185
0
    if (data.mid(exthoffs + 16, 4) != "EXTH")
186
0
        return;
187
0
    quint32 records = qFromBigEndian<quint32>(data.constData() + exthoffs + 24);
188
0
    quint32 offset = exthoffs + 28;
189
0
    for (unsigned int i = 0; i < records; i++) {
190
0
        if (offset > size - 8)
191
0
            break;
192
0
        quint32 type = qFromBigEndian<quint32>(data.constData() + offset);
193
0
        quint32 len = qFromBigEndian<quint32>(data.constData() + offset + 4);
194
0
        if (len < 8 || len > size - offset)
195
0
            break;
196
0
        switch (type) {
197
0
        case 100:
198
0
            metadata[Document::Author] = toUtf16(data.mid(offset + 8, len - 8));
199
0
            break;
200
0
        case 103:
201
0
            metadata[Document::Description] = toUtf16(data.mid(offset + 8, len - 8));
202
0
            break;
203
0
        case 105:
204
0
            metadata[Document::Subject] = toUtf16(data.mid(offset + 8, len - 8));
205
0
            break;
206
0
        case 109:
207
0
            metadata[Document::Copyright] = toUtf16(data.mid(offset + 8, len - 8));
208
0
            break;
209
0
        case 201:
210
0
            if (len >= 12)
211
0
                coverIndex = qFromBigEndian<quint32>(data.constData() + offset + 8);
212
0
            break;
213
0
        case 202:
214
0
            if (len >= 12)
215
0
                thumbnailIndex = qFromBigEndian<quint32>(data.constData() + offset + 8);
216
0
            break;
217
0
        default:
218
            // ignore
219
0
            break;
220
0
        }
221
0
        offset += len;
222
0
    }
223
0
}
224
225
Document::Document(QIODevice *dev)
226
0
    : d(new DocumentPrivate(dev))
227
0
{
228
0
    Q_ASSERT(dev->openMode() & QIODevice::ReadOnly);
229
0
    Q_ASSERT(!dev->isSequential());
230
0
    d->init();
231
0
}
232
233
Document::~Document()
234
0
{
235
0
    delete d;
236
0
}
237
238
namespace
239
{
240
constexpr qsizetype preTrailingDataLength(QByteArrayView data, quint32 flags)
241
0
{
242
0
    if (flags == 0) {
243
0
        return data.size();
244
0
    }
245
246
0
    for (int i = 31; i > 0; i--) {
247
0
        if ((flags & (1u << i)) == 0) {
248
0
            continue;
249
0
        }
250
251
0
        qsizetype chopN = 0;
252
0
        for (int j = 0; j < 4; j++) {
253
0
            if (j + 1 > data.size()) {
254
0
                return 0;
255
0
            }
256
0
            quint8 l = data.at(data.size() - (j + 1));
257
0
            chopN |= (l & 0x7f) << (7 * j);
258
0
            if (l & 0x80) {
259
0
                break;
260
0
            }
261
0
        }
262
0
        data.chop(std::min<qsizetype>(chopN, data.size()));
263
0
    }
264
0
    if ((flags & 0x1) && !data.isEmpty()) {
265
0
        quint8 l = data.back() & 0x3;
266
0
        data.chop(std::min<qsizetype>(l + 1, data.size()));
267
0
    }
268
0
    return data.size();
269
0
}
270
static_assert(preTrailingDataLength({"0\x00", 2}, 0x0) == 2);
271
static_assert(preTrailingDataLength({"0\x00", 2}, 0x1) == 1);
272
static_assert(preTrailingDataLength({"0\x01", 2}, 0x1) == 0);
273
static_assert(preTrailingDataLength({"0\x02", 2}, 0x1) == 0);
274
static_assert(preTrailingDataLength({"abcd\x03", 5}, 0x1) == 1);
275
static_assert(preTrailingDataLength({"abcd\x81", 5}, 0x2) == 4);
276
static_assert(preTrailingDataLength({"\x02\x01", 2}, 0x2) == 0);
277
static_assert(preTrailingDataLength({"\x80\x02", 2}, 0x2) == 0);
278
static_assert(preTrailingDataLength({"abcd\x85", 5}, 0x2) == 0);
279
static_assert(preTrailingDataLength({"abc\x01\x7f\x82", 6}, 0x2) == 4);
280
static_assert(preTrailingDataLength({"abc\x01\x80\x02", 6}, 0x2) == 4);
281
static_assert(preTrailingDataLength({"abc\x01\x7f\x82", 6}, 0x3) == 2);
282
static_assert(preTrailingDataLength({"abc\x81\x80\x02", 6}, 0x6) == 3);
283
static_assert(preTrailingDataLength({"abc\x00\x81\x81", 6}, 0x7) == 3);
284
} // namespace
285
286
QString Document::text(int size) const
287
0
{
288
0
    QByteArray whole;
289
0
    for (int i = 1; i < d->ntextrecords + 1; i++) {
290
0
        auto record = d->pdb.getRecord(i);
291
0
        record.resize(preTrailingDataLength(record, d->extraflags));
292
0
        QByteArray decompressedRecord = d->dec->decompress(record);
293
0
        whole += decompressedRecord;
294
0
        if (!d->dec->isValid()) {
295
0
            d->valid = false;
296
0
            return QString();
297
0
        }
298
0
        if (size != -1 && whole.size() > size)
299
0
            break;
300
0
    }
301
0
    return d->toUtf16(whole);
302
0
}
303
304
int Document::imageCount() const
305
0
{
306
    // FIXME: don't count FLIS and FCIS records
307
0
    return d->pdb.recordCount() - d->ntextrecords;
308
0
}
309
310
bool Document::isValid() const
311
0
{
312
0
    return d->valid;
313
0
}
314
315
QImage Document::getImage(int i) const
316
0
{
317
0
    if (!d->firstImageRecord)
318
0
        d->findFirstImage();
319
320
0
    if ((i < 0) || (i > std::numeric_limits<quint16>::max()) //
321
0
        || (d->firstImageRecord + i) >= d->pdb.recordCount()) {
322
0
        return {};
323
0
    }
324
325
0
    QByteArray rec = d->pdb.getRecord(d->firstImageRecord + i);
326
0
    return (rec.isNull()) ? QImage() : QImage::fromData(rec);
327
0
}
328
329
QMap<Document::MetaKey, QString> Document::metadata() const
330
0
{
331
0
    return d->metadata;
332
0
}
333
334
bool Document::hasDRM() const
335
0
{
336
0
    return d->drm;
337
0
}
338
339
QImage Document::thumbnail() const
340
0
{
341
0
    if (QImage img = getImage(d->thumbnailIndex); !img.isNull()) {
342
0
        return img;
343
0
    }
344
345
    // Fall back to cover image, or return an empty image
346
0
    return getImage(d->coverIndex);
347
0
}
348
349
}