Coverage Report

Created: 2026-01-25 07:18

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/kdegraphics-mobipocket/lib/mobipocket.cpp
Line
Count
Source
1
// SPDX-FileCopyrightText: 2008 by Jakub Stachowski <qbast@go2.pl>
2
// SPDX-License-Identifier: GPL-2.0-or-later
3
4
#include "mobipocket.h"
5
#include "decompressor.h"
6
#include "pdb_p.h"
7
#include "qmobipocket_debug.h"
8
9
#include <QBuffer>
10
#include <QIODevice>
11
#include <QImageReader>
12
#include <QRegularExpression>
13
#include <QStringConverter>
14
#include <QtEndian>
15
16
namespace Mobipocket
17
{
18
19
struct DocumentPrivate 
20
{
21
    DocumentPrivate(QIODevice *d)
22
0
        : pdb(d)
23
0
    {
24
0
    }
25
    PDB pdb;
26
    std::unique_ptr<Decompressor> dec;
27
    quint16 ntextrecords = 0;
28
    quint16 maxRecordSize = 0;
29
    bool valid = false;
30
31
    // number of first record holding image. Usually it is directly after end of text, but not always
32
    quint16 firstImageRecord = 0;
33
    QMap<Document::MetaKey, QString> metadata;
34
    QStringDecoder toUtf16;
35
    bool drm = false;
36
    quint32 extraflags = 0;
37
38
    // index of Thumbnail image in image list. May be specified in EXTH.
39
    int thumbnailIndex = -1;
40
    // index of Cover image in image list. May be specified in EXTH.
41
    int coverIndex = -1;
42
43
    void init();
44
    void findFirstImage();
45
    void parseEXTH(QByteArrayView data);
46
    void parseHtmlHead(const QString &data);
47
};
48
49
void DocumentPrivate::parseHtmlHead(const QString &data)
50
0
{
51
0
    static const QRegularExpression title(QLatin1String("<dc:title.*>(.*)</dc:title>"),
52
0
                                          QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
53
0
    static const QRegularExpression author(QLatin1String("<dc:creator.*>(.*)</dc:creator>"),
54
0
                                           QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
55
0
    static const QRegularExpression copyright(QLatin1String("<dc:rights.*>(.*)</dc:rights>"),
56
0
                                              QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
57
0
    static const QRegularExpression subject(QLatin1String("<dc:subject.*>(.*)</dc:subject>"),
58
0
                                            QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
59
0
    static const QRegularExpression description(QLatin1String("<dc:description.*>(.*)</dc:description>"),
60
0
                                                QRegularExpression::CaseInsensitiveOption | QRegularExpression::InvertedGreedinessOption);
61
62
    // title could have been already taken from MOBI record
63
0
    if (!metadata.contains(Document::Title)) {
64
0
        if (const auto titleMatch = title.match(data); titleMatch.hasMatch())
65
0
            metadata[Document::Title] = titleMatch.captured(1);
66
0
    }
67
0
    if (const auto authorMatch = author.match(data); authorMatch.hasMatch())
68
0
        metadata[Document::Author] = authorMatch.captured(1);
69
0
    if (const auto copyrightMatch = copyright.match(data); copyrightMatch.hasMatch())
70
0
        metadata[Document::Copyright] = copyrightMatch.captured(1);
71
0
    if (const auto subjectMatch = subject.match(data); subjectMatch.hasMatch())
72
0
        metadata[Document::Subject] = subjectMatch.captured(1);
73
0
    if (const auto descriptionMatch = description.match(data); descriptionMatch.hasMatch())
74
0
        metadata[Document::Description] = descriptionMatch.captured(1);
75
0
}
76
77
namespace
78
{
79
    const QVector<QByteArray> getHuffRecords(const PDB &pdb)
80
0
    {
81
0
        const QByteArray header = pdb.getRecord(0);
82
0
        if (header[1] != 'H') {
83
0
            return {};
84
0
        }
85
86
0
        quint32 huff_ofs = qFromBigEndian<quint32>(header.constData() + 0x70);
87
0
        quint32 huff_num = qFromBigEndian<quint32>(header.constData() + 0x74);
88
89
        // Check for overflow and out-of-bounds access
90
0
        if (((huff_ofs + huff_num) < huff_num) || ((huff_ofs + huff_num) > pdb.recordCount())) {
91
0
            return {};
92
0
        }
93
94
0
        QVector<QByteArray> records(huff_num);
95
0
        for (quint32 i = 0; i < huff_num; i++) {
96
0
            if (auto r = pdb.getRecord(huff_ofs + i); r.isNull()) {
97
0
                return {};
98
0
            } else {
99
0
                records[i] = r;
100
0
            }
101
0
        }
102
0
        return records;
103
0
    };
104
}
105
106
void DocumentPrivate::init()
107
0
{
108
0
    quint32 encoding = 0;
109
110
0
    if (!pdb.isValid())
111
0
        return;
112
0
    QByteArray mhead = pdb.getRecord(0);
113
0
    if (mhead.isNull() || mhead.size() < 14)
114
0
        return;
115
116
0
    dec = Decompressor::create(mhead[1], getHuffRecords(pdb));
117
0
    if ((int)mhead[12] != 0 || (int)mhead[13] != 0)
118
0
        drm = true;
119
0
    if (!dec)
120
0
        return;
121
122
0
    ntextrecords = qFromBigEndian<quint16>(mhead.constData() + 8);
123
0
    maxRecordSize = qFromBigEndian<quint16>(mhead.constData() + 10);
124
0
    if (mhead.size() > 31)
125
0
        encoding = qFromBigEndian<quint32>(mhead.constData() + 28);
126
0
    if (encoding == 65001) {
127
0
        toUtf16 = QStringDecoder(QStringDecoder::Utf8);
128
0
    } else {
129
0
        toUtf16 = QStringDecoder("windows-1252");
130
0
        if (!toUtf16.isValid()) {
131
0
            qCWarning(QMOBIPOCKET_LOG) << "Text codec \"windows-1252\" not supported by Qt library, falling back to Latin1";
132
0
            toUtf16 = QStringDecoder(QStringConverter::Latin1);
133
0
        }
134
0
    }
135
0
    if (mhead.size() >= 92)
136
0
        parseEXTH(mhead);
137
138
0
    if (mhead.size() >= 244) {
139
0
        quint32 exthoffs = qFromBigEndian<quint32>(mhead.constData() + 20);
140
0
        if ((exthoffs + 16) > 244) {
141
0
            extraflags = qFromBigEndian<quint32>(mhead.constData() + 240);
142
0
        }
143
0
    }
144
145
    // try getting metadata from HTML if nothing or only title was recovered from MOBI and EXTH records
146
0
    if (metadata.size() < 2 && !drm)
147
0
        parseHtmlHead(toUtf16(dec->decompress(pdb.getRecord(1))));
148
0
    valid = true;
149
0
}
150
151
void DocumentPrivate::findFirstImage()
152
0
{
153
0
    firstImageRecord = ntextrecords + 1;
154
0
    while (firstImageRecord < pdb.recordCount()) {
155
0
        QByteArray rec = pdb.getRecord(firstImageRecord);
156
0
        if (rec.isNull())
157
0
            return;
158
0
        QBuffer buf(&rec);
159
0
        buf.open(QIODevice::ReadOnly);
160
0
        QImageReader r(&buf);
161
0
        if (r.canRead())
162
0
            return;
163
0
        firstImageRecord++;
164
0
    }
165
0
}
166
167
void DocumentPrivate::parseEXTH(QByteArrayView data)
168
0
{
169
    // try to get name
170
0
    if (data.size() >= 92) {
171
0
        qint32 nameoffset = qFromBigEndian<quint32>(data.constData() + 84);
172
0
        qint32 namelen = qFromBigEndian<quint32>(data.constData() + 88);
173
0
        if ((nameoffset + namelen) <= data.size()) {
174
0
            metadata[Document::Title] = toUtf16(data.mid(nameoffset, namelen));
175
0
        }
176
0
    }
177
178
0
    quint32 exthoffs = qFromBigEndian<quint32>(data.constData() + 20);
179
0
    if (exthoffs + 28 > quint32(data.size())) {
180
0
        return;
181
0
    }
182
183
0
    if (data.mid(exthoffs + 16, 4) != "EXTH")
184
0
        return;
185
0
    quint32 records = qFromBigEndian<quint32>(data.constData() + exthoffs + 24);
186
0
    quint32 offset = exthoffs + 28;
187
0
    for (unsigned int i = 0; i < records; i++) {
188
0
        if (offset + 8 > quint32(data.size()))
189
0
            break;
190
0
        quint32 type = qFromBigEndian<quint32>(data.constData() + offset);
191
0
        quint32 len = qFromBigEndian<quint32>(data.constData() + offset + 4);
192
0
        if (offset + len > quint32(data.size()))
193
0
            break;
194
0
        switch (type) {
195
0
        case 100:
196
0
            metadata[Document::Author] = toUtf16(data.mid(offset + 8, len - 8));
197
0
            break;
198
0
        case 103:
199
0
            metadata[Document::Description] = toUtf16(data.mid(offset + 8, len - 8));
200
0
            break;
201
0
        case 105:
202
0
            metadata[Document::Subject] = toUtf16(data.mid(offset + 8, len - 8));
203
0
            break;
204
0
        case 109:
205
0
            metadata[Document::Copyright] = toUtf16(data.mid(offset + 8, len - 8));
206
0
            break;
207
0
        case 201:
208
0
            coverIndex = qFromBigEndian<quint32>(data.constData() + offset + 8);
209
0
            break;
210
0
        case 202:
211
0
            thumbnailIndex = qFromBigEndian<quint32>(data.constData() + offset + 8);
212
0
            break;
213
0
        default:
214
            // ignore
215
0
            break;
216
0
        }
217
0
        offset += len;
218
0
    }
219
0
}
220
221
Document::Document(QIODevice *dev)
222
0
    : d(new DocumentPrivate(dev))
223
0
{
224
0
    Q_ASSERT(dev->openMode() & QIODevice::ReadOnly);
225
0
    Q_ASSERT(!dev->isSequential());
226
0
    d->init();
227
0
}
228
229
Document::~Document()
230
0
{
231
0
    delete d;
232
0
}
233
234
namespace
235
{
236
constexpr qsizetype preTrailingDataLength(QByteArrayView data, quint32 flags)
237
0
{
238
0
    if (flags == 0) {
239
0
        return data.size();
240
0
    }
241
242
0
    for (int i = 31; i > 0; i--) {
243
0
        if ((flags & (1u << i)) == 0) {
244
0
            continue;
245
0
        }
246
247
0
        qsizetype chopN = 0;
248
0
        for (int j = 0; j < 4; j++) {
249
0
            if (j + 1 > data.size()) {
250
0
                return 0;
251
0
            }
252
0
            quint8 l = data.at(data.size() - (j + 1));
253
0
            chopN |= (l & 0x7f) << (7 * j);
254
0
            if (l & 0x80) {
255
0
                break;
256
0
            }
257
0
        }
258
0
        data.chop(std::min<qsizetype>(chopN, data.size()));
259
0
    }
260
0
    if ((flags & 0x1) && !data.isEmpty()) {
261
0
        quint8 l = data.back() & 0x3;
262
0
        data.chop(std::min<qsizetype>(l + 1, data.size()));
263
0
    }
264
0
    return data.size();
265
0
}
266
static_assert(preTrailingDataLength({"0\x00", 2}, 0x0) == 2);
267
static_assert(preTrailingDataLength({"0\x00", 2}, 0x1) == 1);
268
static_assert(preTrailingDataLength({"0\x01", 2}, 0x1) == 0);
269
static_assert(preTrailingDataLength({"0\x02", 2}, 0x1) == 0);
270
static_assert(preTrailingDataLength({"abcd\x03", 5}, 0x1) == 1);
271
static_assert(preTrailingDataLength({"abcd\x81", 5}, 0x2) == 4);
272
static_assert(preTrailingDataLength({"\x02\x01", 2}, 0x2) == 0);
273
static_assert(preTrailingDataLength({"\x80\x02", 2}, 0x2) == 0);
274
static_assert(preTrailingDataLength({"abcd\x85", 5}, 0x2) == 0);
275
static_assert(preTrailingDataLength({"abc\x01\x7f\x82", 6}, 0x2) == 4);
276
static_assert(preTrailingDataLength({"abc\x01\x80\x02", 6}, 0x2) == 4);
277
static_assert(preTrailingDataLength({"abc\x01\x7f\x82", 6}, 0x3) == 2);
278
static_assert(preTrailingDataLength({"abc\x81\x80\x02", 6}, 0x6) == 3);
279
static_assert(preTrailingDataLength({"abc\x00\x81\x81", 6}, 0x7) == 3);
280
} // namespace
281
282
QString Document::text(int size) const
283
0
{
284
0
    QByteArray whole;
285
0
    for (int i = 1; i < d->ntextrecords + 1; i++) {
286
0
        auto record = d->pdb.getRecord(i);
287
0
        record.resize(preTrailingDataLength(record, d->extraflags));
288
0
        QByteArray decompressedRecord = d->dec->decompress(record);
289
0
        whole += decompressedRecord;
290
0
        if (!d->dec->isValid()) {
291
0
            d->valid = false;
292
0
            return QString();
293
0
        }
294
0
        if (size != -1 && whole.size() > size)
295
0
            break;
296
0
    }
297
0
    return d->toUtf16(whole);
298
0
}
299
300
int Document::imageCount() const
301
0
{
302
    // FIXME: don't count FLIS and FCIS records
303
0
    return d->pdb.recordCount() - d->ntextrecords;
304
0
}
305
306
bool Document::isValid() const
307
0
{
308
0
    return d->valid;
309
0
}
310
311
QImage Document::getImage(int i) const
312
0
{
313
0
    if (!d->firstImageRecord)
314
0
        d->findFirstImage();
315
316
0
    if ((i < 0) || (i > std::numeric_limits<quint16>::max()) //
317
0
        || (d->firstImageRecord + i) >= d->pdb.recordCount()) {
318
0
        return {};
319
0
    }
320
321
0
    QByteArray rec = d->pdb.getRecord(d->firstImageRecord + i);
322
0
    return (rec.isNull()) ? QImage() : QImage::fromData(rec);
323
0
}
324
325
QMap<Document::MetaKey, QString> Document::metadata() const
326
0
{
327
0
    return d->metadata;
328
0
}
329
330
bool Document::hasDRM() const
331
0
{
332
0
    return d->drm;
333
0
}
334
335
QImage Document::thumbnail() const
336
0
{
337
0
    if (QImage img = getImage(d->thumbnailIndex); !img.isNull()) {
338
0
        return img;
339
0
    }
340
341
    // Fall back to cover image, or return an empty image
342
0
    return getImage(d->coverIndex);
343
0
}
344
345
}