Coverage Report

Created: 2025-06-09 07:43

/src/gdal/frmts/pdf/pdfio.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  PDF driver
4
 * Purpose:  GDALDataset driver for PDF dataset.
5
 * Author:   Even Rouault, <even dot rouault at spatialys.com>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
#include "gdal_pdf.h"
14
15
#ifdef HAVE_POPPLER
16
17
#include "pdfio.h"
18
19
#include "cpl_vsi.h"
20
21
static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f)
22
11.5k
{
23
11.5k
    VSIFSeekL(f, 0, SEEK_END);
24
11.5k
    vsi_l_offset nSize = VSIFTellL(f);
25
11.5k
    VSIFSeekL(f, 0, SEEK_SET);
26
11.5k
    return nSize;
27
11.5k
}
28
29
/************************************************************************/
30
/*                         VSIPDFFileStream()                           */
31
/************************************************************************/
32
33
VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename,
34
                                   Object &&dictA)
35
11.5k
    : BaseStream(std::move(dictA),
36
11.5k
                 static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))),
37
11.5k
      poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn)
38
11.5k
{
39
11.5k
}
40
41
/************************************************************************/
42
/*                         VSIPDFFileStream()                           */
43
/************************************************************************/
44
45
VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn,
46
                                   vsi_l_offset startA, bool limitedA,
47
                                   vsi_l_offset lengthA, Object &&dictA)
48
2.42M
    : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)),
49
2.42M
      poParent(poParentIn), poFilename(poParentIn->poFilename),
50
2.42M
      f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA)
51
2.42M
{
52
2.42M
}
53
54
/************************************************************************/
55
/*                        ~VSIPDFFileStream()                           */
56
/************************************************************************/
57
58
VSIPDFFileStream::~VSIPDFFileStream()
59
2.43M
{
60
2.43M
    close();
61
2.43M
    if (poParent == nullptr)
62
11.5k
    {
63
11.5k
        delete poFilename;
64
11.5k
    }
65
2.43M
}
66
67
/************************************************************************/
68
/*                                  copy()                              */
69
/************************************************************************/
70
71
BaseStream *VSIPDFFileStream::copy()
72
0
{
73
0
    return new VSIPDFFileStream(poParent, nStart, bLimited, nLength,
74
0
                                dict.copy());
75
0
}
76
77
/************************************************************************/
78
/*                             makeSubStream()                          */
79
/************************************************************************/
80
81
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
82
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5)
83
std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA,
84
                                                        bool limitedA,
85
                                                        Goffset lengthA,
86
                                                        Object &&dictA)
87
{
88
    return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA,
89
                                              std::move(dictA));
90
}
91
#else
92
Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA,
93
                                        Goffset lengthA, Object &&dictA)
94
2.42M
{
95
2.42M
    return new VSIPDFFileStream(this, startA, limitedA, lengthA,
96
2.42M
                                std::move(dictA));
97
2.42M
}
98
#endif
99
100
/************************************************************************/
101
/*                                 getPos()                             */
102
/************************************************************************/
103
104
Goffset VSIPDFFileStream::getPos()
105
122M
{
106
122M
    return static_cast<Goffset>(nCurrentPos);
107
122M
}
108
109
/************************************************************************/
110
/*                                getStart()                            */
111
/************************************************************************/
112
113
Goffset VSIPDFFileStream::getStart()
114
23.2k
{
115
23.2k
    return static_cast<Goffset>(nStart);
116
23.2k
}
117
118
/************************************************************************/
119
/*                             getKind()                                */
120
/************************************************************************/
121
122
StreamKind VSIPDFFileStream::getKind() const
123
30
{
124
30
    return strFile;
125
30
}
126
127
/************************************************************************/
128
/*                           getFileName()                               */
129
/************************************************************************/
130
131
GooString *VSIPDFFileStream::getFileName()
132
23.0k
{
133
23.0k
    return poFilename;
134
23.0k
}
135
136
/************************************************************************/
137
/*                             FillBuffer()                             */
138
/************************************************************************/
139
140
int VSIPDFFileStream::FillBuffer()
141
7.71M
{
142
7.71M
    if (nBufferLength == 0)
143
1.01k
        return FALSE;
144
7.71M
    if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE)
145
2.34M
        return FALSE;
146
147
5.36M
    nPosInBuffer = 0;
148
5.36M
    int nToRead;
149
5.36M
    if (!bLimited)
150
3.27M
        nToRead = BUFFER_SIZE;
151
2.08M
    else if (nCurrentPos + BUFFER_SIZE > nStart + nLength)
152
1.06M
        nToRead = static_cast<int>(nStart + nLength - nCurrentPos);
153
1.02M
    else
154
1.02M
        nToRead = BUFFER_SIZE;
155
5.36M
    if (nToRead < 0)
156
0
        return FALSE;
157
5.36M
    nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f));
158
5.36M
    if (nBufferLength == 0)
159
7.19k
        return FALSE;
160
161
    // Since we now report a non-zero length (as BaseStream::length member),
162
    // PDFDoc::getPage() can go to the linearized mode if the file is
163
    // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if
164
    // pageCache is not null, it would try to access the stream (str) through
165
    // getPageCount(), but we have just freed and nullify str before in
166
    // PDFFreeDoc(). So make as if the file is not linearized to avoid those
167
    // issues... All this is due to our attempt of avoiding cross-heap issues
168
    // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member.
169
5.35M
    if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX)
170
46.8k
    {
171
46.8k
        for (int i = 0;
172
41.7M
             i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++)
173
41.6M
        {
174
41.6M
            if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) ==
175
41.6M
                0)
176
6.34k
            {
177
6.34k
                bFoundLinearizedHint = true;
178
6.34k
                memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized "));
179
6.34k
                break;
180
6.34k
            }
181
41.6M
        }
182
46.8k
    }
183
184
5.35M
    return TRUE;
185
5.36M
}
186
187
/************************************************************************/
188
/*                                getChar()                             */
189
/************************************************************************/
190
191
/* The unoptimized version performs a bit less since we must go through */
192
/* the whole virtual I/O chain for each character reading. We save a few */
193
/* percent with this extra internal caching */
194
195
int VSIPDFFileStream::getChar()
196
2.53G
{
197
#ifdef unoptimized_version
198
    GByte chRead;
199
    if (bLimited && nCurrentPos >= nStart + nLength)
200
        return EOF;
201
    if (VSIFReadL(&chRead, 1, 1, f) == 0)
202
        return EOF;
203
#else
204
2.53G
    if (nPosInBuffer == nBufferLength)
205
7.51M
    {
206
7.51M
        if (!FillBuffer() || nPosInBuffer >= nBufferLength)
207
2.18M
            return EOF;
208
7.51M
    }
209
210
2.53G
    GByte chRead = abyBuffer[nPosInBuffer];
211
2.53G
    nPosInBuffer++;
212
2.53G
#endif
213
2.53G
    nCurrentPos++;
214
2.53G
    return chRead;
215
2.53G
}
216
217
/************************************************************************/
218
/*                       getUnfilteredChar()                            */
219
/************************************************************************/
220
221
int VSIPDFFileStream::getUnfilteredChar()
222
0
{
223
0
    return getChar();
224
0
}
225
226
/************************************************************************/
227
/*                               lookChar()                             */
228
/************************************************************************/
229
230
int VSIPDFFileStream::lookChar()
231
13.1M
{
232
#ifdef unoptimized_version
233
    int nPosBefore = nCurrentPos;
234
    int chRead = getChar();
235
    if (chRead == EOF)
236
        return EOF;
237
    VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET);
238
    return chRead;
239
#else
240
13.1M
    int chRead = getChar();
241
13.1M
    if (chRead == EOF)
242
20.6k
        return EOF;
243
13.1M
    nPosInBuffer--;
244
13.1M
    nCurrentPos--;
245
13.1M
    return chRead;
246
13.1M
#endif
247
13.1M
}
248
249
/************************************************************************/
250
/*                                reset()                               */
251
/************************************************************************/
252
253
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
254
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
255
bool VSIPDFFileStream::reset()
256
#else
257
void VSIPDFFileStream::reset()
258
#endif
259
2.48M
{
260
2.48M
    nSavedPos = VSIFTellL(f);
261
2.48M
    bHasSavedPos = TRUE;
262
2.48M
    VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET);
263
2.48M
    nPosInBuffer = -1;
264
2.48M
    nBufferLength = -1;
265
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
266
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2)
267
    return true;
268
#endif
269
2.48M
}
270
271
/************************************************************************/
272
/*                         unfilteredReset()                            */
273
/************************************************************************/
274
275
#if POPPLER_MAJOR_VERSION > 25 ||                                              \
276
    (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3)
277
bool VSIPDFFileStream::unfilteredReset()
278
{
279
    return reset();
280
}
281
#else
282
void VSIPDFFileStream::unfilteredReset()
283
0
{
284
0
    reset();
285
0
}
286
#endif
287
288
/************************************************************************/
289
/*                                close()                               */
290
/************************************************************************/
291
292
void VSIPDFFileStream::close()
293
4.87M
{
294
4.87M
    if (bHasSavedPos)
295
2.45M
    {
296
2.45M
        nCurrentPos = nSavedPos;
297
2.45M
        VSIFSeekL(f, nCurrentPos, SEEK_SET);
298
2.45M
    }
299
4.87M
    bHasSavedPos = FALSE;
300
4.87M
    nSavedPos = 0;
301
4.87M
}
302
303
/************************************************************************/
304
/*                               setPos()                               */
305
/************************************************************************/
306
307
void VSIPDFFileStream::setPos(Goffset pos, int dir)
308
1.11M
{
309
1.11M
    if (dir >= 0)
310
1.04M
    {
311
1.04M
        VSIFSeekL(f, nCurrentPos = pos, SEEK_SET);
312
1.04M
    }
313
67.1k
    else
314
67.1k
    {
315
67.1k
        if (bLimited == false)
316
67.1k
        {
317
67.1k
            VSIFSeekL(f, 0, SEEK_END);
318
67.1k
        }
319
0
        else
320
0
        {
321
0
            VSIFSeekL(f, nStart + nLength, SEEK_SET);
322
0
        }
323
67.1k
        vsi_l_offset size = VSIFTellL(f);
324
67.1k
        vsi_l_offset newpos = static_cast<vsi_l_offset>(pos);
325
67.1k
        if (newpos > size)
326
4.17k
            newpos = size;
327
67.1k
        VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET);
328
67.1k
    }
329
1.11M
    nPosInBuffer = -1;
330
1.11M
    nBufferLength = -1;
331
1.11M
}
332
333
/************************************************************************/
334
/*                            moveStart()                               */
335
/************************************************************************/
336
337
void VSIPDFFileStream::moveStart(Goffset delta)
338
11.2k
{
339
11.2k
    nStart += delta;
340
11.2k
    nCurrentPos = nStart;
341
11.2k
    VSIFSeekL(f, nCurrentPos, SEEK_SET);
342
11.2k
    nPosInBuffer = -1;
343
11.2k
    nBufferLength = -1;
344
11.2k
}
345
346
/************************************************************************/
347
/*                          hasGetChars()                               */
348
/************************************************************************/
349
350
bool VSIPDFFileStream::hasGetChars()
351
504k
{
352
504k
    return true;
353
504k
}
354
355
/************************************************************************/
356
/*                            getChars()                                */
357
/************************************************************************/
358
359
int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer)
360
504k
{
361
504k
    int nRead = 0;
362
885k
    while (nRead < nChars)
363
548k
    {
364
548k
        int nToRead = nChars - nRead;
365
548k
        if (nPosInBuffer == nBufferLength)
366
194k
        {
367
194k
            if (!bLimited && nToRead > BUFFER_SIZE)
368
0
            {
369
0
                int nJustRead =
370
0
                    static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f));
371
0
                nPosInBuffer = -1;
372
0
                nBufferLength = -1;
373
0
                nCurrentPos += nJustRead;
374
0
                nRead += nJustRead;
375
0
                break;
376
0
            }
377
194k
            else if (!FillBuffer() || nPosInBuffer >= nBufferLength)
378
167k
                break;
379
194k
        }
380
380k
        if (nToRead > nBufferLength - nPosInBuffer)
381
43.6k
            nToRead = nBufferLength - nPosInBuffer;
382
383
380k
        memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead);
384
380k
        nPosInBuffer += nToRead;
385
380k
        nCurrentPos += nToRead;
386
380k
        nRead += nToRead;
387
380k
    }
388
504k
    return nRead;
389
504k
}
390
391
#endif