/src/gdal/frmts/pdf/pdfio.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: PDF driver |
4 | | * Purpose: GDALDataset driver for PDF dataset. |
5 | | * Author: Even Rouault, <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "gdal_pdf.h" |
14 | | |
15 | | #ifdef HAVE_POPPLER |
16 | | |
17 | | #include "pdfio.h" |
18 | | |
19 | | #include "cpl_vsi.h" |
20 | | |
21 | | static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f) |
22 | 11.5k | { |
23 | 11.5k | VSIFSeekL(f, 0, SEEK_END); |
24 | 11.5k | vsi_l_offset nSize = VSIFTellL(f); |
25 | 11.5k | VSIFSeekL(f, 0, SEEK_SET); |
26 | 11.5k | return nSize; |
27 | 11.5k | } |
28 | | |
29 | | /************************************************************************/ |
30 | | /* VSIPDFFileStream() */ |
31 | | /************************************************************************/ |
32 | | |
33 | | VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename, |
34 | | Object &&dictA) |
35 | 11.5k | : BaseStream(std::move(dictA), |
36 | 11.5k | static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))), |
37 | 11.5k | poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn) |
38 | 11.5k | { |
39 | 11.5k | } |
40 | | |
41 | | /************************************************************************/ |
42 | | /* VSIPDFFileStream() */ |
43 | | /************************************************************************/ |
44 | | |
45 | | VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn, |
46 | | vsi_l_offset startA, bool limitedA, |
47 | | vsi_l_offset lengthA, Object &&dictA) |
48 | 2.42M | : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)), |
49 | 2.42M | poParent(poParentIn), poFilename(poParentIn->poFilename), |
50 | 2.42M | f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA) |
51 | 2.42M | { |
52 | 2.42M | } |
53 | | |
54 | | /************************************************************************/ |
55 | | /* ~VSIPDFFileStream() */ |
56 | | /************************************************************************/ |
57 | | |
58 | | VSIPDFFileStream::~VSIPDFFileStream() |
59 | 2.43M | { |
60 | 2.43M | close(); |
61 | 2.43M | if (poParent == nullptr) |
62 | 11.5k | { |
63 | 11.5k | delete poFilename; |
64 | 11.5k | } |
65 | 2.43M | } |
66 | | |
67 | | /************************************************************************/ |
68 | | /* copy() */ |
69 | | /************************************************************************/ |
70 | | |
71 | | BaseStream *VSIPDFFileStream::copy() |
72 | 0 | { |
73 | 0 | return new VSIPDFFileStream(poParent, nStart, bLimited, nLength, |
74 | 0 | dict.copy()); |
75 | 0 | } |
76 | | |
77 | | /************************************************************************/ |
78 | | /* makeSubStream() */ |
79 | | /************************************************************************/ |
80 | | |
81 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
82 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5) |
83 | | std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA, |
84 | | bool limitedA, |
85 | | Goffset lengthA, |
86 | | Object &&dictA) |
87 | | { |
88 | | return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA, |
89 | | std::move(dictA)); |
90 | | } |
91 | | #else |
92 | | Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA, |
93 | | Goffset lengthA, Object &&dictA) |
94 | 2.42M | { |
95 | 2.42M | return new VSIPDFFileStream(this, startA, limitedA, lengthA, |
96 | 2.42M | std::move(dictA)); |
97 | 2.42M | } |
98 | | #endif |
99 | | |
100 | | /************************************************************************/ |
101 | | /* getPos() */ |
102 | | /************************************************************************/ |
103 | | |
104 | | Goffset VSIPDFFileStream::getPos() |
105 | 122M | { |
106 | 122M | return static_cast<Goffset>(nCurrentPos); |
107 | 122M | } |
108 | | |
109 | | /************************************************************************/ |
110 | | /* getStart() */ |
111 | | /************************************************************************/ |
112 | | |
113 | | Goffset VSIPDFFileStream::getStart() |
114 | 23.2k | { |
115 | 23.2k | return static_cast<Goffset>(nStart); |
116 | 23.2k | } |
117 | | |
118 | | /************************************************************************/ |
119 | | /* getKind() */ |
120 | | /************************************************************************/ |
121 | | |
122 | | StreamKind VSIPDFFileStream::getKind() const |
123 | 30 | { |
124 | 30 | return strFile; |
125 | 30 | } |
126 | | |
127 | | /************************************************************************/ |
128 | | /* getFileName() */ |
129 | | /************************************************************************/ |
130 | | |
131 | | GooString *VSIPDFFileStream::getFileName() |
132 | 23.0k | { |
133 | 23.0k | return poFilename; |
134 | 23.0k | } |
135 | | |
136 | | /************************************************************************/ |
137 | | /* FillBuffer() */ |
138 | | /************************************************************************/ |
139 | | |
140 | | int VSIPDFFileStream::FillBuffer() |
141 | 7.71M | { |
142 | 7.71M | if (nBufferLength == 0) |
143 | 1.01k | return FALSE; |
144 | 7.71M | if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE) |
145 | 2.34M | return FALSE; |
146 | | |
147 | 5.36M | nPosInBuffer = 0; |
148 | 5.36M | int nToRead; |
149 | 5.36M | if (!bLimited) |
150 | 3.27M | nToRead = BUFFER_SIZE; |
151 | 2.08M | else if (nCurrentPos + BUFFER_SIZE > nStart + nLength) |
152 | 1.06M | nToRead = static_cast<int>(nStart + nLength - nCurrentPos); |
153 | 1.02M | else |
154 | 1.02M | nToRead = BUFFER_SIZE; |
155 | 5.36M | if (nToRead < 0) |
156 | 0 | return FALSE; |
157 | 5.36M | nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f)); |
158 | 5.36M | if (nBufferLength == 0) |
159 | 7.19k | return FALSE; |
160 | | |
161 | | // Since we now report a non-zero length (as BaseStream::length member), |
162 | | // PDFDoc::getPage() can go to the linearized mode if the file is |
163 | | // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if |
164 | | // pageCache is not null, it would try to access the stream (str) through |
165 | | // getPageCount(), but we have just freed and nullify str before in |
166 | | // PDFFreeDoc(). So make as if the file is not linearized to avoid those |
167 | | // issues... All this is due to our attempt of avoiding cross-heap issues |
168 | | // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member. |
169 | 5.35M | if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX) |
170 | 46.8k | { |
171 | 46.8k | for (int i = 0; |
172 | 41.7M | i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++) |
173 | 41.6M | { |
174 | 41.6M | if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) == |
175 | 41.6M | 0) |
176 | 6.34k | { |
177 | 6.34k | bFoundLinearizedHint = true; |
178 | 6.34k | memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized ")); |
179 | 6.34k | break; |
180 | 6.34k | } |
181 | 41.6M | } |
182 | 46.8k | } |
183 | | |
184 | 5.35M | return TRUE; |
185 | 5.36M | } |
186 | | |
187 | | /************************************************************************/ |
188 | | /* getChar() */ |
189 | | /************************************************************************/ |
190 | | |
191 | | /* The unoptimized version performs a bit less since we must go through */ |
192 | | /* the whole virtual I/O chain for each character reading. We save a few */ |
193 | | /* percent with this extra internal caching */ |
194 | | |
195 | | int VSIPDFFileStream::getChar() |
196 | 2.53G | { |
197 | | #ifdef unoptimized_version |
198 | | GByte chRead; |
199 | | if (bLimited && nCurrentPos >= nStart + nLength) |
200 | | return EOF; |
201 | | if (VSIFReadL(&chRead, 1, 1, f) == 0) |
202 | | return EOF; |
203 | | #else |
204 | 2.53G | if (nPosInBuffer == nBufferLength) |
205 | 7.51M | { |
206 | 7.51M | if (!FillBuffer() || nPosInBuffer >= nBufferLength) |
207 | 2.18M | return EOF; |
208 | 7.51M | } |
209 | | |
210 | 2.53G | GByte chRead = abyBuffer[nPosInBuffer]; |
211 | 2.53G | nPosInBuffer++; |
212 | 2.53G | #endif |
213 | 2.53G | nCurrentPos++; |
214 | 2.53G | return chRead; |
215 | 2.53G | } |
216 | | |
217 | | /************************************************************************/ |
218 | | /* getUnfilteredChar() */ |
219 | | /************************************************************************/ |
220 | | |
221 | | int VSIPDFFileStream::getUnfilteredChar() |
222 | 0 | { |
223 | 0 | return getChar(); |
224 | 0 | } |
225 | | |
226 | | /************************************************************************/ |
227 | | /* lookChar() */ |
228 | | /************************************************************************/ |
229 | | |
230 | | int VSIPDFFileStream::lookChar() |
231 | 13.1M | { |
232 | | #ifdef unoptimized_version |
233 | | int nPosBefore = nCurrentPos; |
234 | | int chRead = getChar(); |
235 | | if (chRead == EOF) |
236 | | return EOF; |
237 | | VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET); |
238 | | return chRead; |
239 | | #else |
240 | 13.1M | int chRead = getChar(); |
241 | 13.1M | if (chRead == EOF) |
242 | 20.6k | return EOF; |
243 | 13.1M | nPosInBuffer--; |
244 | 13.1M | nCurrentPos--; |
245 | 13.1M | return chRead; |
246 | 13.1M | #endif |
247 | 13.1M | } |
248 | | |
249 | | /************************************************************************/ |
250 | | /* reset() */ |
251 | | /************************************************************************/ |
252 | | |
253 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
254 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2) |
255 | | bool VSIPDFFileStream::reset() |
256 | | #else |
257 | | void VSIPDFFileStream::reset() |
258 | | #endif |
259 | 2.48M | { |
260 | 2.48M | nSavedPos = VSIFTellL(f); |
261 | 2.48M | bHasSavedPos = TRUE; |
262 | 2.48M | VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET); |
263 | 2.48M | nPosInBuffer = -1; |
264 | 2.48M | nBufferLength = -1; |
265 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
266 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2) |
267 | | return true; |
268 | | #endif |
269 | 2.48M | } |
270 | | |
271 | | /************************************************************************/ |
272 | | /* unfilteredReset() */ |
273 | | /************************************************************************/ |
274 | | |
275 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
276 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3) |
277 | | bool VSIPDFFileStream::unfilteredReset() |
278 | | { |
279 | | return reset(); |
280 | | } |
281 | | #else |
282 | | void VSIPDFFileStream::unfilteredReset() |
283 | 0 | { |
284 | 0 | reset(); |
285 | 0 | } |
286 | | #endif |
287 | | |
288 | | /************************************************************************/ |
289 | | /* close() */ |
290 | | /************************************************************************/ |
291 | | |
292 | | void VSIPDFFileStream::close() |
293 | 4.87M | { |
294 | 4.87M | if (bHasSavedPos) |
295 | 2.45M | { |
296 | 2.45M | nCurrentPos = nSavedPos; |
297 | 2.45M | VSIFSeekL(f, nCurrentPos, SEEK_SET); |
298 | 2.45M | } |
299 | 4.87M | bHasSavedPos = FALSE; |
300 | 4.87M | nSavedPos = 0; |
301 | 4.87M | } |
302 | | |
303 | | /************************************************************************/ |
304 | | /* setPos() */ |
305 | | /************************************************************************/ |
306 | | |
307 | | void VSIPDFFileStream::setPos(Goffset pos, int dir) |
308 | 1.11M | { |
309 | 1.11M | if (dir >= 0) |
310 | 1.04M | { |
311 | 1.04M | VSIFSeekL(f, nCurrentPos = pos, SEEK_SET); |
312 | 1.04M | } |
313 | 67.1k | else |
314 | 67.1k | { |
315 | 67.1k | if (bLimited == false) |
316 | 67.1k | { |
317 | 67.1k | VSIFSeekL(f, 0, SEEK_END); |
318 | 67.1k | } |
319 | 0 | else |
320 | 0 | { |
321 | 0 | VSIFSeekL(f, nStart + nLength, SEEK_SET); |
322 | 0 | } |
323 | 67.1k | vsi_l_offset size = VSIFTellL(f); |
324 | 67.1k | vsi_l_offset newpos = static_cast<vsi_l_offset>(pos); |
325 | 67.1k | if (newpos > size) |
326 | 4.17k | newpos = size; |
327 | 67.1k | VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET); |
328 | 67.1k | } |
329 | 1.11M | nPosInBuffer = -1; |
330 | 1.11M | nBufferLength = -1; |
331 | 1.11M | } |
332 | | |
333 | | /************************************************************************/ |
334 | | /* moveStart() */ |
335 | | /************************************************************************/ |
336 | | |
337 | | void VSIPDFFileStream::moveStart(Goffset delta) |
338 | 11.2k | { |
339 | 11.2k | nStart += delta; |
340 | 11.2k | nCurrentPos = nStart; |
341 | 11.2k | VSIFSeekL(f, nCurrentPos, SEEK_SET); |
342 | 11.2k | nPosInBuffer = -1; |
343 | 11.2k | nBufferLength = -1; |
344 | 11.2k | } |
345 | | |
346 | | /************************************************************************/ |
347 | | /* hasGetChars() */ |
348 | | /************************************************************************/ |
349 | | |
350 | | bool VSIPDFFileStream::hasGetChars() |
351 | 504k | { |
352 | 504k | return true; |
353 | 504k | } |
354 | | |
355 | | /************************************************************************/ |
356 | | /* getChars() */ |
357 | | /************************************************************************/ |
358 | | |
359 | | int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer) |
360 | 504k | { |
361 | 504k | int nRead = 0; |
362 | 885k | while (nRead < nChars) |
363 | 548k | { |
364 | 548k | int nToRead = nChars - nRead; |
365 | 548k | if (nPosInBuffer == nBufferLength) |
366 | 194k | { |
367 | 194k | if (!bLimited && nToRead > BUFFER_SIZE) |
368 | 0 | { |
369 | 0 | int nJustRead = |
370 | 0 | static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f)); |
371 | 0 | nPosInBuffer = -1; |
372 | 0 | nBufferLength = -1; |
373 | 0 | nCurrentPos += nJustRead; |
374 | 0 | nRead += nJustRead; |
375 | 0 | break; |
376 | 0 | } |
377 | 194k | else if (!FillBuffer() || nPosInBuffer >= nBufferLength) |
378 | 167k | break; |
379 | 194k | } |
380 | 380k | if (nToRead > nBufferLength - nPosInBuffer) |
381 | 43.6k | nToRead = nBufferLength - nPosInBuffer; |
382 | | |
383 | 380k | memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead); |
384 | 380k | nPosInBuffer += nToRead; |
385 | 380k | nCurrentPos += nToRead; |
386 | 380k | nRead += nToRead; |
387 | 380k | } |
388 | 504k | return nRead; |
389 | 504k | } |
390 | | |
391 | | #endif |