/src/gdal/frmts/pdf/pdfio.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: PDF driver |
4 | | * Purpose: GDALDataset driver for PDF dataset. |
5 | | * Author: Even Rouault, <even dot rouault at spatialys.com> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2010-2013, Even Rouault <even dot rouault at spatialys.com> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | #include "gdal_pdf.h" |
14 | | |
15 | | #ifdef HAVE_POPPLER |
16 | | |
17 | | #include "pdfio.h" |
18 | | |
19 | | #include "cpl_vsi.h" |
20 | | |
21 | | static vsi_l_offset VSIPDFFileStreamGetSize(VSILFILE *f) |
22 | 44.2k | { |
23 | 44.2k | VSIFSeekL(f, 0, SEEK_END); |
24 | 44.2k | vsi_l_offset nSize = VSIFTellL(f); |
25 | 44.2k | VSIFSeekL(f, 0, SEEK_SET); |
26 | 44.2k | return nSize; |
27 | 44.2k | } |
28 | | |
29 | | /************************************************************************/ |
30 | | /* VSIPDFFileStream() */ |
31 | | /************************************************************************/ |
32 | | |
33 | | VSIPDFFileStream::VSIPDFFileStream(VSILFILE *fIn, const char *pszFilename, |
34 | | Object &&dictA) |
35 | 44.2k | : BaseStream(std::move(dictA), |
36 | 44.2k | static_cast<Goffset>(VSIPDFFileStreamGetSize(fIn))), |
37 | 44.2k | poParent(nullptr), poFilename(new GooString(pszFilename)), f(fIn) |
38 | 44.2k | { |
39 | 44.2k | } |
40 | | |
41 | | /************************************************************************/ |
42 | | /* VSIPDFFileStream() */ |
43 | | /************************************************************************/ |
44 | | |
45 | | VSIPDFFileStream::VSIPDFFileStream(VSIPDFFileStream *poParentIn, |
46 | | vsi_l_offset startA, bool limitedA, |
47 | | vsi_l_offset lengthA, Object &&dictA) |
48 | 5.03M | : BaseStream(std::move(dictA), static_cast<Goffset>(lengthA)), |
49 | 5.03M | poParent(poParentIn), poFilename(poParentIn->poFilename), |
50 | 5.03M | f(poParentIn->f), nStart(startA), bLimited(limitedA), nLength(lengthA) |
51 | 5.03M | { |
52 | 5.03M | } |
53 | | |
54 | | /************************************************************************/ |
55 | | /* ~VSIPDFFileStream() */ |
56 | | /************************************************************************/ |
57 | | |
58 | | VSIPDFFileStream::~VSIPDFFileStream() |
59 | 5.07M | { |
60 | 5.07M | close(); |
61 | 5.07M | if (poParent == nullptr) |
62 | 44.2k | { |
63 | 44.2k | delete poFilename; |
64 | 44.2k | } |
65 | 5.07M | } |
66 | | |
67 | | /************************************************************************/ |
68 | | /* copy() */ |
69 | | /************************************************************************/ |
70 | | |
71 | | #if POPPLER_MAJOR_VERSION > 26 || \ |
72 | | (POPPLER_MAJOR_VERSION == 26 && POPPLER_MINOR_VERSION >= 2) |
73 | | std::unique_ptr<BaseStream> VSIPDFFileStream::copy() |
74 | | { |
75 | | return std::make_unique<VSIPDFFileStream>(poParent, nStart, bLimited, |
76 | | nLength, dict.copy()); |
77 | | } |
78 | | #else |
79 | | BaseStream *VSIPDFFileStream::copy() |
80 | 0 | { |
81 | 0 | return new VSIPDFFileStream(poParent, nStart, bLimited, nLength, |
82 | 0 | dict.copy()); |
83 | 0 | } |
84 | | #endif |
85 | | |
86 | | /************************************************************************/ |
87 | | /* makeSubStream() */ |
88 | | /************************************************************************/ |
89 | | |
90 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
91 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 5) |
92 | | std::unique_ptr<Stream> VSIPDFFileStream::makeSubStream(Goffset startA, |
93 | | bool limitedA, |
94 | | Goffset lengthA, |
95 | | Object &&dictA) |
96 | | { |
97 | | return std::make_unique<VSIPDFFileStream>(this, startA, limitedA, lengthA, |
98 | | std::move(dictA)); |
99 | | } |
100 | | #else |
101 | | Stream *VSIPDFFileStream::makeSubStream(Goffset startA, bool limitedA, |
102 | | Goffset lengthA, Object &&dictA) |
103 | 5.03M | { |
104 | 5.03M | return new VSIPDFFileStream(this, startA, limitedA, lengthA, |
105 | 5.03M | std::move(dictA)); |
106 | 5.03M | } |
107 | | #endif |
108 | | |
109 | | /************************************************************************/ |
110 | | /* getPos() */ |
111 | | /************************************************************************/ |
112 | | |
113 | | Goffset VSIPDFFileStream::getPos() |
114 | 801M | { |
115 | 801M | return static_cast<Goffset>(nCurrentPos); |
116 | 801M | } |
117 | | |
118 | | /************************************************************************/ |
119 | | /* getStart() */ |
120 | | /************************************************************************/ |
121 | | |
122 | | Goffset VSIPDFFileStream::getStart() |
123 | 94.6k | { |
124 | 94.6k | return static_cast<Goffset>(nStart); |
125 | 94.6k | } |
126 | | |
127 | | /************************************************************************/ |
128 | | /* getKind() */ |
129 | | /************************************************************************/ |
130 | | |
131 | | StreamKind VSIPDFFileStream::getKind() const |
132 | 163 | { |
133 | 163 | return strFile; |
134 | 163 | } |
135 | | |
136 | | /************************************************************************/ |
137 | | /* getFileName() */ |
138 | | /************************************************************************/ |
139 | | |
140 | | GooString *VSIPDFFileStream::getFileName() |
141 | 88.4k | { |
142 | 88.4k | return poFilename; |
143 | 88.4k | } |
144 | | |
145 | | /************************************************************************/ |
146 | | /* FillBuffer() */ |
147 | | /************************************************************************/ |
148 | | |
149 | | int VSIPDFFileStream::FillBuffer() |
150 | 57.3M | { |
151 | 57.3M | if (nBufferLength == 0) |
152 | 1.40M | return FALSE; |
153 | 55.9M | if (nBufferLength != -1 && nBufferLength < BUFFER_SIZE) |
154 | 40.6M | return FALSE; |
155 | | |
156 | 15.2M | nPosInBuffer = 0; |
157 | 15.2M | int nToRead; |
158 | 15.2M | if (!bLimited) |
159 | 12.7M | nToRead = BUFFER_SIZE; |
160 | 2.52M | else if (nCurrentPos + BUFFER_SIZE > nStart + nLength) |
161 | 1.25M | nToRead = static_cast<int>(nStart + nLength - nCurrentPos); |
162 | 1.26M | else |
163 | 1.26M | nToRead = BUFFER_SIZE; |
164 | 15.2M | if (nToRead < 0) |
165 | 0 | return FALSE; |
166 | 15.2M | nBufferLength = static_cast<int>(VSIFReadL(abyBuffer, 1, nToRead, f)); |
167 | 15.2M | if (nBufferLength == 0) |
168 | 80.4k | return FALSE; |
169 | | |
170 | | // Since we now report a non-zero length (as BaseStream::length member), |
171 | | // PDFDoc::getPage() can go to the linearized mode if the file is |
172 | | // linearized, and thus create a pageCache. If so, in PDFDoc::~PDFDoc(), if |
173 | | // pageCache is not null, it would try to access the stream (str) through |
174 | | // getPageCount(), but we have just freed and nullify str before in |
175 | | // PDFFreeDoc(). So make as if the file is not linearized to avoid those |
176 | | // issues... All this is due to our attempt of avoiding cross-heap issues |
177 | | // with allocation and liberation of VSIPDFFileStream as PDFDoc::str member. |
178 | 15.1M | if (nCurrentPos == 0 || nCurrentPos == VSI_L_OFFSET_MAX) |
179 | 181k | { |
180 | 181k | for (int i = 0; |
181 | 156M | i < nBufferLength - static_cast<int>(strlen("/Linearized ")); i++) |
182 | 156M | { |
183 | 156M | if (memcmp(abyBuffer + i, "/Linearized ", strlen("/Linearized ")) == |
184 | 156M | 0) |
185 | 21.7k | { |
186 | 21.7k | bFoundLinearizedHint = true; |
187 | 21.7k | memcpy(abyBuffer + i, "/XXXXXXXXXX ", strlen("/Linearized ")); |
188 | 21.7k | break; |
189 | 21.7k | } |
190 | 156M | } |
191 | 181k | } |
192 | | |
193 | 15.1M | return TRUE; |
194 | 15.2M | } |
195 | | |
196 | | /************************************************************************/ |
197 | | /* getChar() */ |
198 | | /************************************************************************/ |
199 | | |
200 | | /* The unoptimized version performs a bit less since we must go through */ |
201 | | /* the whole virtual I/O chain for each character reading. We save a few */ |
202 | | /* percent with this extra internal caching */ |
203 | | |
204 | | int VSIPDFFileStream::getChar() |
205 | 10.2G | { |
206 | | #ifdef unoptimized_version |
207 | | GByte chRead; |
208 | | if (bLimited && nCurrentPos >= nStart + nLength) |
209 | | return EOF; |
210 | | if (VSIFReadL(&chRead, 1, 1, f) == 0) |
211 | | return EOF; |
212 | | #else |
213 | 10.2G | if (nPosInBuffer == nBufferLength) |
214 | 56.9M | { |
215 | 56.9M | if (!FillBuffer() || nPosInBuffer >= nBufferLength) |
216 | 41.9M | return EOF; |
217 | 56.9M | } |
218 | | |
219 | 10.2G | GByte chRead = abyBuffer[nPosInBuffer]; |
220 | 10.2G | nPosInBuffer++; |
221 | 10.2G | #endif |
222 | 10.2G | nCurrentPos++; |
223 | 10.2G | return chRead; |
224 | 10.2G | } |
225 | | |
226 | | /************************************************************************/ |
227 | | /* getUnfilteredChar() */ |
228 | | /************************************************************************/ |
229 | | |
230 | | int VSIPDFFileStream::getUnfilteredChar() |
231 | 0 | { |
232 | 0 | return getChar(); |
233 | 0 | } |
234 | | |
235 | | /************************************************************************/ |
236 | | /* lookChar() */ |
237 | | /************************************************************************/ |
238 | | |
239 | | int VSIPDFFileStream::lookChar() |
240 | 59.4M | { |
241 | | #ifdef unoptimized_version |
242 | | int nPosBefore = nCurrentPos; |
243 | | int chRead = getChar(); |
244 | | if (chRead == EOF) |
245 | | return EOF; |
246 | | VSIFSeekL(f, nCurrentPos = nPosBefore, SEEK_SET); |
247 | | return chRead; |
248 | | #else |
249 | 59.4M | int chRead = getChar(); |
250 | 59.4M | if (chRead == EOF) |
251 | 80.0k | return EOF; |
252 | 59.3M | nPosInBuffer--; |
253 | 59.3M | nCurrentPos--; |
254 | 59.3M | return chRead; |
255 | 59.4M | #endif |
256 | 59.4M | } |
257 | | |
258 | | /************************************************************************/ |
259 | | /* reset() */ |
260 | | /************************************************************************/ |
261 | | |
262 | | #if POPPLER_MAJOR_VERSION > 25 |
263 | | bool VSIPDFFileStream::rewind() |
264 | | #elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2 |
265 | | bool VSIPDFFileStream::reset() |
266 | | #else |
267 | | void VSIPDFFileStream::reset() |
268 | | #endif |
269 | 5.09M | { |
270 | 5.09M | nSavedPos = VSIFTellL(f); |
271 | 5.09M | bHasSavedPos = TRUE; |
272 | 5.09M | VSIFSeekL(f, nCurrentPos = nStart, SEEK_SET); |
273 | 5.09M | nPosInBuffer = -1; |
274 | 5.09M | nBufferLength = -1; |
275 | | #if POPPLER_MAJOR_VERSION > 25 || \ |
276 | | (POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 2) |
277 | | return true; |
278 | | #endif |
279 | 5.09M | } |
280 | | |
281 | | /************************************************************************/ |
282 | | /* unfilteredReset() */ |
283 | | /************************************************************************/ |
284 | | |
285 | | #if POPPLER_MAJOR_VERSION > 25 |
286 | | bool VSIPDFFileStream::unfilteredRewind() |
287 | | { |
288 | | return rewind(); |
289 | | } |
290 | | #elif POPPLER_MAJOR_VERSION == 25 && POPPLER_MINOR_VERSION >= 3 |
291 | | bool VSIPDFFileStream::unfilteredReset() |
292 | | { |
293 | | return reset(); |
294 | | } |
295 | | #else |
296 | | void VSIPDFFileStream::unfilteredReset() |
297 | 0 | { |
298 | 0 | reset(); |
299 | 0 | } |
300 | | #endif |
301 | | |
302 | | /************************************************************************/ |
303 | | /* close() */ |
304 | | /************************************************************************/ |
305 | | |
306 | | void VSIPDFFileStream::close() |
307 | 9.94M | { |
308 | 9.94M | if (bHasSavedPos) |
309 | 4.95M | { |
310 | 4.95M | nCurrentPos = nSavedPos; |
311 | 4.95M | VSIFSeekL(f, nCurrentPos, SEEK_SET); |
312 | 4.95M | } |
313 | 9.94M | bHasSavedPos = FALSE; |
314 | 9.94M | nSavedPos = 0; |
315 | 9.94M | } |
316 | | |
317 | | /************************************************************************/ |
318 | | /* setPos() */ |
319 | | /************************************************************************/ |
320 | | |
321 | | void VSIPDFFileStream::setPos(Goffset pos, int dir) |
322 | 1.69M | { |
323 | 1.69M | if (dir >= 0) |
324 | 1.37M | { |
325 | 1.37M | VSIFSeekL(f, nCurrentPos = pos, SEEK_SET); |
326 | 1.37M | } |
327 | 322k | else |
328 | 322k | { |
329 | 322k | if (bLimited == false) |
330 | 322k | { |
331 | 322k | VSIFSeekL(f, 0, SEEK_END); |
332 | 322k | } |
333 | 0 | else |
334 | 0 | { |
335 | 0 | VSIFSeekL(f, nStart + nLength, SEEK_SET); |
336 | 0 | } |
337 | 322k | vsi_l_offset size = VSIFTellL(f); |
338 | 322k | vsi_l_offset newpos = static_cast<vsi_l_offset>(pos); |
339 | 322k | if (newpos > size) |
340 | 23.7k | newpos = size; |
341 | 322k | VSIFSeekL(f, nCurrentPos = size - newpos, SEEK_SET); |
342 | 322k | } |
343 | 1.69M | nPosInBuffer = -1; |
344 | 1.69M | nBufferLength = -1; |
345 | 1.69M | } |
346 | | |
347 | | /************************************************************************/ |
348 | | /* moveStart() */ |
349 | | /************************************************************************/ |
350 | | |
351 | | void VSIPDFFileStream::moveStart(Goffset delta) |
352 | 35.6k | { |
353 | 35.6k | nStart += delta; |
354 | 35.6k | nCurrentPos = nStart; |
355 | 35.6k | VSIFSeekL(f, nCurrentPos, SEEK_SET); |
356 | 35.6k | nPosInBuffer = -1; |
357 | 35.6k | nBufferLength = -1; |
358 | 35.6k | } |
359 | | |
360 | | /************************************************************************/ |
361 | | /* hasGetChars() */ |
362 | | /************************************************************************/ |
363 | | |
364 | | bool VSIPDFFileStream::hasGetChars() |
365 | 607k | { |
366 | 607k | return true; |
367 | 607k | } |
368 | | |
369 | | /************************************************************************/ |
370 | | /* getChars() */ |
371 | | /************************************************************************/ |
372 | | |
373 | | int VSIPDFFileStream::getChars(int nChars, unsigned char *buffer) |
374 | 607k | { |
375 | 607k | int nRead = 0; |
376 | 1.11M | while (nRead < nChars) |
377 | 732k | { |
378 | 732k | int nToRead = nChars - nRead; |
379 | 732k | if (nPosInBuffer == nBufferLength) |
380 | 346k | { |
381 | 346k | if (!bLimited && nToRead > BUFFER_SIZE) |
382 | 0 | { |
383 | 0 | int nJustRead = |
384 | 0 | static_cast<int>(VSIFReadL(buffer + nRead, 1, nToRead, f)); |
385 | 0 | nPosInBuffer = -1; |
386 | 0 | nBufferLength = -1; |
387 | 0 | nCurrentPos += nJustRead; |
388 | 0 | nRead += nJustRead; |
389 | 0 | break; |
390 | 0 | } |
391 | 346k | else if (!FillBuffer() || nPosInBuffer >= nBufferLength) |
392 | 223k | break; |
393 | 346k | } |
394 | 508k | if (nToRead > nBufferLength - nPosInBuffer) |
395 | 124k | nToRead = nBufferLength - nPosInBuffer; |
396 | | |
397 | 508k | memcpy(buffer + nRead, abyBuffer + nPosInBuffer, nToRead); |
398 | 508k | nPosInBuffer += nToRead; |
399 | 508k | nCurrentPos += nToRead; |
400 | 508k | nRead += nToRead; |
401 | 508k | } |
402 | 607k | return nRead; |
403 | 607k | } |
404 | | |
405 | | #endif |