Coverage Report

Created: 2026-03-30 09:00

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/gdal/frmts/pdf/pdfio.cpp
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * Project:  PDF driver
4
 * Purpose:  GDALDataset driver for PDF dataset.
5
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "gdal_pdf.h"
14
15
#ifdef HAVE_POPPLER
16
17
#include "pdfio.h"
18
19
#include "cpl_vsi.h"
20
21
static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f)
22
44.2k
{
23
44.2k
    VSIFSeekL(f, 0, SEEK_END);
24
44.2k
    vsi_l_offset nSize = VSIFTellL(f);
25
44.2k
    VSIFSeekL(f, 0, SEEK_SET);
26
44.2k
    return nSize;
27
44.2k
}
28
29
/************************************************************************/
30
/*                          VSIPDFFileStream()                          */
31
/************************************************************************/
32
33
VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename,
34
                                   Object &&dictA)
35
44.2k
    : BaseStream(std::move(dictA),
36
44.2k
                 static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))),
37
44.2k
      poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn)
38
44.2k
{
39
44.2k
}
40
41
/************************************************************************/
42
/*                          VSIPDFFileStream()                          */
43
/************************************************************************/
44
45
VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn,
46
                                   vsi_l_offset startA, bool limitedA,
47
                                   vsi_l_offset lengthA, Object &&dictA)
48
5.03M
    : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)),
49
5.03M
      poParent(poParentIn), poFilename(poParentIn->poFilename),
50
5.03M
      f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA)
51
5.03M
{
52
5.03M
}
53
54
/************************************************************************/
55
/*                         ~VSIPDFFileStream()                          */
56
/************************************************************************/
57
58
VSIPDFFileStream::~VSIPDFFileStream()
59
5.07M
{
60
5.07M
    close();
61
5.07M
    if (poParent == nullptr)
62
44.2k
    {
63
44.2k
        delete poFilename;
64
44.2k
    }
65
5.07M
}
66
67
/************************************************************************/
68
/*                                copy()                                */
69
/************************************************************************/
70
71
#if POPPLER_MAJOR_VERSION > 26 ||                                              \
72
    (POPPLER_MAJOR_VERSION == 26 && POPPLER_MINOR_VERSION >= 2)
73
std::unique_ptr<BaseStream> VSIPDFFileStream::copy()
74
{
75
    return std::make_unique<VSIPDFFileStream>(poParent, nStart, bLimited,
76
                                              nLength, dict.copy());
77
}
78
#else
79
BaseStream *VSIPDFFileStream::copy()
80
0
{
81
0
    return new VSIPDFFileStream(poParent, nStart, bLimited, nLength,
82
0
                                dict.copy());
83
0
}
84
#endif
85
86
/************************************************************************/
87
/*                           makeSubStream()                            */
88
/************************************************************************/
89
90
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
91
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5)
92
std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA,
93
                                                        bool limitedA,
94
                                                        Goffset lengthA,
95
                                                        Object &&dictA)
96
{
97
    return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA,
98
                                              std::move(dictA));
99
}
100
#else
101
Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA,
102
                                        Goffset lengthA, Object &&dictA)
103
5.03M
{
104
5.03M
    return new VSIPDFFileStream(this, startA, limitedA, lengthA,
105
5.03M
                                std::move(dictA));
106
5.03M
}
107
#endif
108
109
/************************************************************************/
110
/*                               getPos()                               */
111
/************************************************************************/
112
113
Goffset VSIPDFFileStream::getPos()
114
801M
{
115
801M
    return static_cast<Goffset>(nCurrentPos);
116
801M
}
117
118
/************************************************************************/
119
/*                              getStart()                              */
120
/************************************************************************/
121
122
Goffset VSIPDFFileStream::getStart()
123
94.6k
{
124
94.6k
    return static_cast<Goffset>(nStart);
125
94.6k
}
126
127
/************************************************************************/
128
/*                              getKind()                               */
129
/************************************************************************/
130
131
StreamKind VSIPDFFileStream::getKind() const
132
163
{
133
163
    return strFile;
134
163
}
135
136
/************************************************************************/
137
/*                            getFileName()                             */
138
/************************************************************************/
139
140
GooString *VSIPDFFileStream::getFileName()
141
88.4k
{
142
88.4k
    return poFilename;
143
88.4k
}
144
145
/************************************************************************/
146
/*                             FillBuffer()                             */
147
/************************************************************************/
148
149
int VSIPDFFileStream::FillBuffer()
150
57.3M
{
151
57.3M
    if (nBufferLength == 0)
152
1.40M
        return FALSE;
153
55.9M
    if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE)
154
40.6M
        return FALSE;
155
156
15.2M
    nPosInBuffer = 0;
157
15.2M
    int nToRead;
158
15.2M
    if (!bLimited)
159
12.7M
        nToRead = BUFFER_SIZE;
160
2.52M
    else if (nCurrentPos + BUFFER_SIZE > nStart + nLength)
161
1.25M
        nToRead = static_cast<int>(nStart + nLength - nCurrentPos);
162
1.26M
    else
163
1.26M
        nToRead = BUFFER_SIZE;
164
15.2M
    if (nToRead < 0)
165
0
        return FALSE;
166
15.2M
    nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f));
167
15.2M
    if (nBufferLength == 0)
168
80.4k
        return FALSE;
169
170
    // Since we now report a non-zero length (as BaseStream::length member),
171
    // PDFDoc::getPage() can go to the linearized mode if the file is
172
    // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if
173
    // pageCache is not null, it would try to access the stream (str) through
174
    // getPageCount(), but we have just freed and nullify str before in
175
    // PDFFreeDoc(). So make as if the file is not linearized to avoid those
176
    // issues... All this is due to our attempt of avoiding cross-heap issues
177
    // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member.
178
15.1M
    if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX)
179
181k
    {
180
181k
        for (int i = 0;
181
156M
             i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++)
182
156M
        {
183
156M
            if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) ==
184
156M
                0)
185
21.7k
            {
186
21.7k
                bFoundLinearizedHint = true;
187
21.7k
                memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized "));
188
21.7k
                break;
189
21.7k
            }
190
156M
        }
191
181k
    }
192
193
15.1M
    return TRUE;
194
15.2M
}
195
196
/************************************************************************/
197
/*                              getChar()                               */
198
/************************************************************************/
199
200
/* The unoptimized version performs a bit less since we must go through */
201
/* the whole virtual I/O chain for each character reading. We save a few */
202
/* percent with this extra internal caching */
203
204
int VSIPDFFileStream::getChar()
205
10.2G
{
206
#ifdef unoptimized_version
207
    GByte chRead;
208
    if (bLimited && nCurrentPos >= nStart + nLength)
209
        return EOF;
210
    if (VSIFReadL(&chRead, 1, 1, f) == 0)
211
        return EOF;
212
#else
213
10.2G
    if (nPosInBuffer == nBufferLength)
214
56.9M
    {
215
56.9M
        if (!FillBuffer() || nPosInBuffer >= nBufferLength)
216
41.9M
            return EOF;
217
56.9M
    }
218
219
10.2G
    GByte chRead = abyBuffer[nPosInBuffer];
220
10.2G
    nPosInBuffer++;
221
10.2G
#endif
222
10.2G
    nCurrentPos++;
223
10.2G
    return chRead;
224
10.2G
}
225
226
/************************************************************************/
227
/*                         getUnfilteredChar()                          */
228
/************************************************************************/
229
230
int VSIPDFFileStream::getUnfilteredChar()
231
0
{
232
0
    return getChar();
233
0
}
234
235
/************************************************************************/
236
/*                              lookChar()                              */
237
/************************************************************************/
238
239
int VSIPDFFileStream::lookChar()
240
59.4M
{
241
#ifdef unoptimized_version
242
    int nPosBefore = nCurrentPos;
243
    int chRead = getChar();
244
    if (chRead == EOF)
245
        return EOF;
246
    VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET);
247
    return chRead;
248
#else
249
59.4M
    int chRead = getChar();
250
59.4M
    if (chRead == EOF)
251
80.0k
        return EOF;
252
59.3M
    nPosInBuffer--;
253
59.3M
    nCurrentPos--;
254
59.3M
    return chRead;
255
59.4M
#endif
256
59.4M
}
257
258
/************************************************************************/
259
/*                               reset()                                */
260
/************************************************************************/
261
262
#if POPPLER_MAJOR_VERSION > 25
263
bool VSIPDFFileStream::rewind()
264
#elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2
265
bool VSIPDFFileStream::reset()
266
#else
267
void VSIPDFFileStream::reset()
268
#endif
269
5.09M
{
270
5.09M
    nSavedPos = VSIFTellL(f);
271
5.09M
    bHasSavedPos = TRUE;
272
5.09M
    VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET);
273
5.09M
    nPosInBuffer = -1;
274
5.09M
    nBufferLength = -1;
275
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
276
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
277
    return true;
278
#endif
279
5.09M
}
280
281
/************************************************************************/
282
/*                          unfilteredReset()                           */
283
/************************************************************************/
284
285
#if POPPLER_MAJOR_VERSION > 25
286
bool VSIPDFFileStream::unfilteredRewind()
287
{
288
    return rewind();
289
}
290
#elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3
291
bool VSIPDFFileStream::unfilteredReset()
292
{
293
    return reset();
294
}
295
#else
296
void VSIPDFFileStream::unfilteredReset()
297
0
{
298
0
    reset();
299
0
}
300
#endif
301
302
/************************************************************************/
303
/*                               close()                                */
304
/************************************************************************/
305
306
void VSIPDFFileStream::close()
307
9.94M
{
308
9.94M
    if (bHasSavedPos)
309
4.95M
    {
310
4.95M
        nCurrentPos = nSavedPos;
311
4.95M
        VSIFSeekL(f, nCurrentPos, SEEK_SET);
312
4.95M
    }
313
9.94M
    bHasSavedPos = FALSE;
314
9.94M
    nSavedPos = 0;
315
9.94M
}
316
317
/************************************************************************/
318
/*                               setPos()                               */
319
/************************************************************************/
320
321
void VSIPDFFileStream::setPos(Goffset pos, int dir)
322
1.69M
{
323
1.69M
    if (dir >= 0)
324
1.37M
    {
325
1.37M
        VSIFSeekL(f, nCurrentPos = pos, SEEK_SET);
326
1.37M
    }
327
322k
    else
328
322k
    {
329
322k
        if (bLimited == false)
330
322k
        {
331
322k
            VSIFSeekL(f, 0, SEEK_END);
332
322k
        }
333
0
        else
334
0
        {
335
0
            VSIFSeekL(f, nStart + nLength, SEEK_SET);
336
0
        }
337
322k
        vsi_l_offset size = VSIFTellL(f);
338
322k
        vsi_l_offset newpos = static_cast<vsi_l_offset>(pos);
339
322k
        if (newpos > size)
340
23.7k
            newpos = size;
341
322k
        VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET);
342
322k
    }
343
1.69M
    nPosInBuffer = -1;
344
1.69M
    nBufferLength = -1;
345
1.69M
}
346
347
/************************************************************************/
348
/*                             moveStart()                              */
349
/************************************************************************/
350
351
void VSIPDFFileStream::moveStart(Goffset delta)
352
35.6k
{
353
35.6k
    nStart += delta;
354
35.6k
    nCurrentPos = nStart;
355
35.6k
    VSIFSeekL(f, nCurrentPos, SEEK_SET);
356
35.6k
    nPosInBuffer = -1;
357
35.6k
    nBufferLength = -1;
358
35.6k
}
359
360
/************************************************************************/
361
/*                            hasGetChars()                             */
362
/************************************************************************/
363
364
bool VSIPDFFileStream::hasGetChars()
365
607k
{
366
607k
    return true;
367
607k
}
368
369
/************************************************************************/
370
/*                              getChars()                              */
371
/************************************************************************/
372
373
int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer)
374
607k
{
375
607k
    int nRead = 0;
376
1.11M
    while (nRead < nChars)
377
732k
    {
378
732k
        int nToRead = nChars - nRead;
379
732k
        if (nPosInBuffer == nBufferLength)
380
346k
        {
381
346k
            if (!bLimited && nToRead > BUFFER_SIZE)
382
0
            {
383
0
                int nJustRead =
384
0
                    static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f));
385
0
                nPosInBuffer = -1;
386
0
                nBufferLength = -1;
387
0
                nCurrentPos += nJustRead;
388
0
                nRead += nJustRead;
389
0
                break;
390
0
            }
391
346k
            else if (!FillBuffer() || nPosInBuffer >= nBufferLength)
392
223k
                break;
393
346k
        }
394
508k
        if (nToRead > nBufferLength - nPosInBuffer)
395
124k
            nToRead = nBufferLength - nPosInBuffer;
396
397
508k
        memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead);
398
508k
        nPosInBuffer += nToRead;
399
508k
        nCurrentPos += nToRead;
400
508k
        nRead += nToRead;
401
508k
    }
402
607k
    return nRead;
403
607k
}
404
405
#endif