/src/xpdf-4.06/xpdf/XRef.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // XRef.cc |
4 | | // |
5 | | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdlib.h> |
12 | | #include <stddef.h> |
13 | | #include <string.h> |
14 | | #include <ctype.h> |
15 | | #include <limits.h> |
16 | | #include "gmem.h" |
17 | | #include "gmempp.h" |
18 | | #include "gfile.h" |
19 | | #include "Object.h" |
20 | | #include "Stream.h" |
21 | | #include "Lexer.h" |
22 | | #include "Parser.h" |
23 | | #include "Dict.h" |
24 | | #include "Error.h" |
25 | | #include "ErrorCodes.h" |
26 | | #include "XRef.h" |
27 | | |
28 | | //------------------------------------------------------------------------ |
29 | | |
30 | 43.6k | #define xrefSearchSize 1024 // read this many bytes at end of file |
31 | | // to look for 'startxref' |
32 | | |
33 | | //------------------------------------------------------------------------ |
34 | | // Permission bits |
35 | | //------------------------------------------------------------------------ |
36 | | |
37 | 12.8k | #define permPrint (1<<2) |
38 | 12.8k | #define permChange (1<<3) |
39 | 12.8k | #define permCopy (1<<4) |
40 | 12.8k | #define permNotes (1<<5) |
41 | 43.6k | #define defPermFlags 0xfffc |
42 | | |
43 | | //------------------------------------------------------------------------ |
44 | | // XRefPosSet |
45 | | //------------------------------------------------------------------------ |
46 | | |
47 | | class XRefPosSet { |
48 | | public: |
49 | | |
50 | | XRefPosSet(); |
51 | | ~XRefPosSet(); |
52 | | void add(GFileOffset pos); |
53 | | GBool check(GFileOffset pos); |
54 | 935 | int getLength() { return len; } |
55 | 1.04k | GFileOffset get(int idx) { return tab[idx]; } |
56 | | |
57 | | private: |
58 | | |
59 | | int find(GFileOffset pos); |
60 | | |
61 | | GFileOffset *tab; |
62 | | int size; |
63 | | int len; |
64 | | }; |
65 | | |
66 | 937 | XRefPosSet::XRefPosSet() { |
67 | 937 | size = 16; |
68 | 937 | len = 0; |
69 | 937 | tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset)); |
70 | 937 | } |
71 | | |
72 | 935 | XRefPosSet::~XRefPosSet() { |
73 | 935 | gfree(tab); |
74 | 935 | } |
75 | | |
76 | 1.04k | void XRefPosSet::add(GFileOffset pos) { |
77 | 1.04k | int i; |
78 | | |
79 | 1.04k | i = find(pos); |
80 | 1.04k | if (i < len && tab[i] == pos) { |
81 | 0 | return; |
82 | 0 | } |
83 | 1.04k | if (len == size) { |
84 | 0 | if (size > INT_MAX / 2) { |
85 | 0 | gMemError("Integer overflow in XRefPosSet::add()"); |
86 | 0 | } |
87 | 0 | size *= 2; |
88 | 0 | tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset)); |
89 | 0 | } |
90 | 1.04k | if (i < len) { |
91 | 46 | memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset)); |
92 | 46 | } |
93 | 1.04k | tab[i] = pos; |
94 | 1.04k | ++len; |
95 | 1.04k | } |
96 | | |
97 | 1.05k | GBool XRefPosSet::check(GFileOffset pos) { |
98 | 1.05k | int i; |
99 | | |
100 | 1.05k | i = find(pos); |
101 | 1.05k | return i < len && tab[i] == pos; |
102 | 1.05k | } |
103 | | |
104 | 2.10k | int XRefPosSet::find(GFileOffset pos) { |
105 | 2.10k | int a, b, m; |
106 | | |
107 | 2.10k | a = - 1; |
108 | 2.10k | b = len; |
109 | | // invariant: tab[a] < pos < tab[b] |
110 | 2.38k | while (b - a > 1) { |
111 | 288 | m = (a + b) / 2; |
112 | 288 | if (tab[m] < pos) { |
113 | 144 | a = m; |
114 | 144 | } else if (tab[m] > pos) { |
115 | 136 | b = m; |
116 | 136 | } else { |
117 | 8 | return m; |
118 | 8 | } |
119 | 288 | } |
120 | 2.09k | return b; |
121 | 2.10k | } |
122 | | |
123 | | //------------------------------------------------------------------------ |
124 | | // ObjectStream |
125 | | //------------------------------------------------------------------------ |
126 | | |
127 | | class ObjectStream { |
128 | | public: |
129 | | |
130 | | // Create an object stream, using object number <objStrNum>, |
131 | | // generation 0. |
132 | | ObjectStream(XRef *xref, int objStrNumA, int recursion); |
133 | | |
134 | 47.7k | GBool isOk() { return ok; } |
135 | | |
136 | | ~ObjectStream(); |
137 | | |
138 | | // Return the object number of this object stream. |
139 | 63.4k | int getObjStrNum() { return objStrNum; } |
140 | | |
141 | | // Get the <objIdx>th object from this stream, which should be |
142 | | // object number <objNum>, generation 0. |
143 | | Object *getObject(int objIdx, int objNum, Object *obj); |
144 | | |
145 | | private: |
146 | | |
147 | | int objStrNum; // object number of the object stream |
148 | | int nObjects; // number of objects in the stream |
149 | | Object *objs; // the objects (length = nObjects) |
150 | | int *objNums; // the object numbers (length = nObjects) |
151 | | GBool ok; |
152 | | }; |
153 | | |
154 | 47.7k | ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion) { |
155 | 47.7k | Stream *str; |
156 | 47.7k | Lexer *lexer; |
157 | 47.7k | Parser *parser; |
158 | 47.7k | int *offsets; |
159 | 47.7k | Object objStr, obj1, obj2; |
160 | 47.7k | int first, i; |
161 | | |
162 | 47.7k | objStrNum = objStrNumA; |
163 | 47.7k | nObjects = 0; |
164 | 47.7k | objs = NULL; |
165 | 47.7k | objNums = NULL; |
166 | 47.7k | ok = gFalse; |
167 | | |
168 | 47.7k | if (!xref->fetch(objStrNum, 0, &objStr, recursion)->isStream()) { |
169 | 928 | goto err1; |
170 | 928 | } |
171 | | |
172 | 46.8k | if (!objStr.streamGetDict()->lookup("N", &obj1, recursion)->isInt()) { |
173 | 180 | obj1.free(); |
174 | 180 | goto err1; |
175 | 180 | } |
176 | 46.6k | nObjects = obj1.getInt(); |
177 | 46.6k | obj1.free(); |
178 | 46.6k | if (nObjects <= 0) { |
179 | 98 | goto err1; |
180 | 98 | } |
181 | | |
182 | 46.5k | if (!objStr.streamGetDict()->lookup("First", &obj1, recursion)->isInt()) { |
183 | 34.3k | obj1.free(); |
184 | 34.3k | goto err1; |
185 | 34.3k | } |
186 | 12.1k | first = obj1.getInt(); |
187 | 12.1k | obj1.free(); |
188 | 12.1k | if (first < 0) { |
189 | 86 | goto err1; |
190 | 86 | } |
191 | | |
192 | | // this is an arbitrary limit to avoid integer overflow problems |
193 | | // in the 'new Object[nObjects]' call (Acrobat apparently limits |
194 | | // object streams to 100-200 objects) |
195 | 12.0k | if (nObjects > 1000000) { |
196 | 81 | error(errSyntaxError, -1, "Too many objects in an object stream"); |
197 | 81 | goto err1; |
198 | 81 | } |
199 | 12.0k | objs = new Object[nObjects]; |
200 | 12.0k | objNums = (int *)gmallocn(nObjects, sizeof(int)); |
201 | 12.0k | offsets = (int *)gmallocn(nObjects, sizeof(int)); |
202 | | |
203 | | // parse the header: object numbers and offsets |
204 | 12.0k | objStr.streamReset(); |
205 | 12.0k | obj1.initNull(); |
206 | 12.0k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first); |
207 | 12.0k | lexer = new Lexer(xref, str); |
208 | 12.0k | parser = new Parser(xref, lexer, gFalse); |
209 | 236k | for (i = 0; i < nObjects; ++i) { |
210 | 229k | parser->getObj(&obj1, gTrue); |
211 | 229k | parser->getObj(&obj2, gTrue); |
212 | 229k | if (!obj1.isInt() || !obj2.isInt()) { |
213 | 2.05k | obj1.free(); |
214 | 2.05k | obj2.free(); |
215 | 2.05k | delete parser; |
216 | 2.05k | gfree(offsets); |
217 | 2.05k | goto err2; |
218 | 2.05k | } |
219 | 227k | objNums[i] = obj1.getInt(); |
220 | 227k | offsets[i] = obj2.getInt(); |
221 | 227k | obj1.free(); |
222 | 227k | obj2.free(); |
223 | 227k | if (objNums[i] < 0 || offsets[i] < 0 || |
224 | 226k | (i > 0 && offsets[i] < offsets[i-1])) { |
225 | 2.95k | delete parser; |
226 | 2.95k | gfree(offsets); |
227 | 2.95k | goto err2; |
228 | 2.95k | } |
229 | 227k | } |
230 | 6.99k | lexer->skipToEOF(); |
231 | 6.99k | delete parser; |
232 | | |
233 | | // skip to the first object - this generally shouldn't be needed, |
234 | | // because offsets[0] is normally 0, but just in case... |
235 | 6.99k | if (offsets[0] > 0) { |
236 | 62 | objStr.getStream()->discardChars(offsets[0]); |
237 | 62 | } |
238 | | |
239 | | // parse the objects |
240 | 65.6k | for (i = 0; i < nObjects; ++i) { |
241 | 58.6k | obj1.initNull(); |
242 | 58.6k | if (i == nObjects - 1) { |
243 | 6.99k | str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0); |
244 | 51.6k | } else { |
245 | 51.6k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, |
246 | 51.6k | offsets[i+1] - offsets[i]); |
247 | 51.6k | } |
248 | 58.6k | lexer = new Lexer(xref, str); |
249 | 58.6k | parser = new Parser(xref, lexer, gFalse); |
250 | 58.6k | parser->getObj(&objs[i]); |
251 | 58.6k | lexer->skipToEOF(); |
252 | 58.6k | delete parser; |
253 | 58.6k | } |
254 | | |
255 | 6.99k | gfree(offsets); |
256 | 6.99k | ok = gTrue; |
257 | | |
258 | 12.0k | err2: |
259 | 12.0k | objStr.streamClose(); |
260 | 47.7k | err1: |
261 | 47.7k | objStr.free(); |
262 | 47.7k | } |
263 | | |
264 | 47.7k | ObjectStream::~ObjectStream() { |
265 | 47.7k | int i; |
266 | | |
267 | 47.7k | if (objs) { |
268 | 362k | for (i = 0; i < nObjects; ++i) { |
269 | 350k | objs[i].free(); |
270 | 350k | } |
271 | 12.0k | delete[] objs; |
272 | 12.0k | } |
273 | 47.7k | gfree(objNums); |
274 | 47.7k | } |
275 | | |
276 | 45.1k | Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) { |
277 | 45.1k | if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) { |
278 | 142 | obj->initNull(); |
279 | 45.0k | } else { |
280 | 45.0k | objs[objIdx].copy(obj); |
281 | 45.0k | } |
282 | 45.1k | return obj; |
283 | 45.1k | } |
284 | | |
285 | | //------------------------------------------------------------------------ |
286 | | // XRef |
287 | | //------------------------------------------------------------------------ |
288 | | |
289 | 43.6k | XRef::XRef(BaseStream *strA, GBool repair) { |
290 | 43.6k | GFileOffset pos; |
291 | 43.6k | Object obj; |
292 | 43.6k | XRefPosSet *posSet; |
293 | 43.6k | int i; |
294 | | |
295 | 43.6k | ok = gTrue; |
296 | 43.6k | errCode = errNone; |
297 | 43.6k | repaired = gFalse; |
298 | 43.6k | size = 0; |
299 | 43.6k | last = -1; |
300 | 43.6k | entries = NULL; |
301 | 43.6k | lastStartxrefPos = 0; |
302 | 43.6k | xrefTablePos = NULL; |
303 | 43.6k | xrefTablePosLen = 0; |
304 | 43.6k | streamEnds = NULL; |
305 | 43.6k | streamEndsLen = 0; |
306 | 5.62M | for (i = 0; i < objStrCacheSize; ++i) { |
307 | 5.58M | objStrs[i] = NULL; |
308 | 5.58M | objStrLastUse[i] = 0; |
309 | 5.58M | } |
310 | 43.6k | objStrCacheLength = 0; |
311 | 43.6k | objStrTime = 0; |
312 | | |
313 | 43.6k | encrypted = gFalse; |
314 | 43.6k | permFlags = defPermFlags; |
315 | 43.6k | ownerPasswordOk = gFalse; |
316 | | |
317 | 741k | for (i = 0; i < xrefCacheSize; ++i) { |
318 | 697k | cache[i].num = -1; |
319 | 697k | } |
320 | | |
321 | 43.6k | #if MULTITHREADED |
322 | 43.6k | gInitMutex(&objStrsMutex); |
323 | 43.6k | gInitMutex(&cacheMutex); |
324 | 43.6k | #endif |
325 | | |
326 | 43.6k | str = strA; |
327 | 43.6k | start = str->getStart(); |
328 | | |
329 | | // if the 'repair' flag is set, try to reconstruct the xref table |
330 | 43.6k | if (repair) { |
331 | 21.7k | if (!(ok = constructXRef())) { |
332 | 5.81k | errCode = errDamaged; |
333 | 5.81k | return; |
334 | 5.81k | } |
335 | 15.9k | repaired = gTrue; |
336 | | |
337 | | // if the 'repair' flag is not set, read the xref table |
338 | 21.8k | } else { |
339 | | |
340 | | // read the trailer |
341 | 21.8k | pos = getStartXref(); |
342 | 21.8k | if (pos == 0) { |
343 | 20.8k | errCode = errDamaged; |
344 | 20.8k | ok = gFalse; |
345 | 20.8k | return; |
346 | 20.8k | } |
347 | | |
348 | | // read the xref table |
349 | 937 | posSet = new XRefPosSet(); |
350 | 1.04k | while (readXRef(&pos, posSet, gFalse)) ; |
351 | 937 | xrefTablePosLen = posSet->getLength(); |
352 | 937 | xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen, |
353 | 937 | sizeof(GFileOffset)); |
354 | 1.98k | for (i = 0; i < xrefTablePosLen; ++i) { |
355 | 1.04k | xrefTablePos[i] = posSet->get(i); |
356 | 1.04k | } |
357 | 937 | delete posSet; |
358 | 937 | if (!ok) { |
359 | 804 | errCode = errDamaged; |
360 | 804 | return; |
361 | 804 | } |
362 | 937 | } |
363 | | |
364 | | // get the root dictionary (catalog) object |
365 | 16.0k | trailerDict.dictLookupNF("Root", &obj); |
366 | 16.0k | if (obj.isRef()) { |
367 | 15.9k | rootNum = obj.getRefNum(); |
368 | 15.9k | rootGen = obj.getRefGen(); |
369 | 15.9k | obj.free(); |
370 | 15.9k | } else { |
371 | 178 | obj.free(); |
372 | 178 | if (!(ok = constructXRef())) { |
373 | 43 | errCode = errDamaged; |
374 | 43 | return; |
375 | 43 | } |
376 | 178 | } |
377 | | |
378 | | // now set the trailer dictionary's xref pointer so we can fetch |
379 | | // indirect objects from it |
380 | 16.0k | trailerDict.getDict()->setXRef(this); |
381 | 16.0k | } |
382 | | |
383 | 43.5k | XRef::~XRef() { |
384 | 43.5k | int i; |
385 | | |
386 | 739k | for (i = 0; i < xrefCacheSize; ++i) { |
387 | 696k | if (cache[i].num >= 0) { |
388 | 150k | cache[i].obj.free(); |
389 | 150k | } |
390 | 696k | } |
391 | 43.5k | gfree(entries); |
392 | 43.5k | trailerDict.free(); |
393 | 43.5k | if (xrefTablePos) { |
394 | 935 | gfree(xrefTablePos); |
395 | 935 | } |
396 | 43.5k | if (streamEnds) { |
397 | 13.2k | gfree(streamEnds); |
398 | 13.2k | } |
399 | 5.61M | for (i = 0; i < objStrCacheSize; ++i) { |
400 | 5.57M | if (objStrs[i]) { |
401 | 6.50k | delete objStrs[i]; |
402 | 6.50k | } |
403 | 5.57M | } |
404 | 43.5k | #if MULTITHREADED |
405 | 43.5k | gDestroyMutex(&objStrsMutex); |
406 | 43.5k | gDestroyMutex(&cacheMutex); |
407 | 43.5k | #endif |
408 | 43.5k | } |
409 | | |
410 | | // Read the 'startxref' position. |
411 | 21.8k | GFileOffset XRef::getStartXref() { |
412 | 21.8k | char buf[xrefSearchSize+1]; |
413 | 21.8k | char *p; |
414 | 21.8k | int n, i; |
415 | | |
416 | | // read last xrefSearchSize bytes |
417 | 21.8k | str->setPos(xrefSearchSize, -1); |
418 | 21.8k | n = str->getBlock(buf, xrefSearchSize); |
419 | 21.8k | buf[n] = '\0'; |
420 | | |
421 | | // find startxref |
422 | 21.1M | for (i = n - 9; i >= 0; --i) { |
423 | 21.1M | if (!strncmp(&buf[i], "startxref", 9)) { |
424 | 1.43k | break; |
425 | 1.43k | } |
426 | 21.1M | } |
427 | 21.8k | if (i < 0) { |
428 | 20.3k | return 0; |
429 | 20.3k | } |
430 | 3.37k | for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ; |
431 | 1.43k | lastXRefPos = strToFileOffset(p); |
432 | 1.43k | lastStartxrefPos = str->getPos() - n + i; |
433 | | |
434 | 1.43k | return lastXRefPos; |
435 | 21.8k | } |
436 | | |
437 | | // Read one xref table section. Also reads the associated trailer |
438 | | // dictionary, and returns the prev pointer (if any). The [hybrid] |
439 | | // flag is true when following the XRefStm link in a hybrid-reference |
440 | | // file. |
441 | 1.05k | GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) { |
442 | 1.05k | Parser *parser; |
443 | 1.05k | Object obj; |
444 | 1.05k | GBool more; |
445 | 1.05k | char buf[100]; |
446 | 1.05k | int n, i; |
447 | | |
448 | | // check for a loop in the xref tables |
449 | 1.05k | if (posSet->check(*pos)) { |
450 | 8 | error(errSyntaxWarning, -1, "Infinite loop in xref table"); |
451 | 8 | return gFalse; |
452 | 8 | } |
453 | 1.04k | posSet->add(*pos); |
454 | | |
455 | | // the xref data should either be "xref ..." (for an xref table) or |
456 | | // "nn gg obj << ... >> stream ..." (for an xref stream); possibly |
457 | | // preceded by whitespace |
458 | 1.04k | str->setPos(start + *pos); |
459 | 1.04k | n = str->getBlock(buf, 100); |
460 | 1.84k | for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ; |
461 | | |
462 | | // parse an old-style xref table |
463 | 1.04k | if (!hybrid && |
464 | 1.03k | i + 4 < n && |
465 | 909 | buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' && |
466 | 67 | Lexer::isSpace(buf[i+4])) { |
467 | 67 | more = readXRefTable(pos, i + 5, posSet); |
468 | | |
469 | | // parse an xref stream |
470 | 979 | } else { |
471 | 979 | obj.initNull(); |
472 | 979 | parser = new Parser(NULL, |
473 | 979 | new Lexer(NULL, |
474 | 979 | str->makeSubStream(start + *pos, gFalse, 0, &obj)), |
475 | 979 | gTrue); |
476 | 979 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
477 | 522 | goto err; |
478 | 522 | } |
479 | 457 | obj.free(); |
480 | 457 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
481 | 117 | goto err; |
482 | 117 | } |
483 | 340 | obj.free(); |
484 | 340 | if (!parser->getObj(&obj, gTrue)->isCmd("obj")) { |
485 | 27 | goto err; |
486 | 27 | } |
487 | 313 | obj.free(); |
488 | 313 | if (!parser->getObj(&obj)->isStream()) { |
489 | 44 | goto err; |
490 | 44 | } |
491 | 269 | more = readXRefStream(obj.getStream(), pos); |
492 | 269 | obj.free(); |
493 | 269 | delete parser; |
494 | 269 | } |
495 | | |
496 | 336 | return more; |
497 | | |
498 | 710 | err: |
499 | 710 | obj.free(); |
500 | 710 | delete parser; |
501 | 710 | if (hybrid) { |
502 | 1 | error(errSyntaxError, -1, "Invalid XRefStm link in trailer"); |
503 | 709 | } else { |
504 | 709 | ok = gFalse; |
505 | 709 | } |
506 | 710 | return gFalse; |
507 | 1.04k | } |
508 | | |
509 | 67 | GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) { |
510 | 67 | XRefEntry entry; |
511 | 67 | Parser *parser; |
512 | 67 | Object obj, obj2; |
513 | 67 | char buf[6]; |
514 | 67 | GFileOffset off, pos2; |
515 | 67 | GBool more; |
516 | 67 | int first, n, digit, newSize, gen, i, c; |
517 | | |
518 | 67 | str->setPos(start + *pos + offset); |
519 | | |
520 | 468 | while (1) { |
521 | 3.15k | do { |
522 | 3.15k | c = str->getChar(); |
523 | 3.15k | } while (Lexer::isSpace(c)); |
524 | 468 | if (c == 't') { |
525 | 51 | if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) { |
526 | 1 | goto err1; |
527 | 1 | } |
528 | 50 | break; |
529 | 51 | } |
530 | 417 | if (c < '0' || c > '9') { |
531 | 1 | goto err1; |
532 | 1 | } |
533 | 416 | first = 0; |
534 | 4.24k | do { |
535 | 4.24k | digit = c - '0'; |
536 | 4.24k | if (first > (INT_MAX - digit) / 10) { |
537 | 0 | goto err1; |
538 | 0 | } |
539 | 4.24k | first = (first * 10) + digit; |
540 | 4.24k | c = str->getChar(); |
541 | 4.24k | } while (c >= '0' && c <= '9'); |
542 | 416 | if (!Lexer::isSpace(c)) { |
543 | 4 | goto err1; |
544 | 4 | } |
545 | 519 | do { |
546 | 519 | c = str->getChar(); |
547 | 519 | } while (Lexer::isSpace(c)); |
548 | 412 | n = 0; |
549 | 1.22k | do { |
550 | 1.22k | digit = c - '0'; |
551 | 1.22k | if (n > (INT_MAX - digit) / 10) { |
552 | 0 | goto err1; |
553 | 0 | } |
554 | 1.22k | n = (n * 10) + digit; |
555 | 1.22k | c = str->getChar(); |
556 | 1.22k | } while (c >= '0' && c <= '9'); |
557 | 412 | if (!Lexer::isSpace(c)) { |
558 | 1 | goto err1; |
559 | 1 | } |
560 | 411 | if (first > INT_MAX - n) { |
561 | 0 | goto err1; |
562 | 0 | } |
563 | 411 | if (first + n > size) { |
564 | 40 | newSize = size ? size : 512; |
565 | 40 | do { |
566 | 40 | if (newSize > INT_MAX / 2) { |
567 | 0 | goto err1; |
568 | 0 | } |
569 | 40 | newSize <<= 1; |
570 | 40 | } while (first + n > newSize); |
571 | 40 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
572 | 41.0k | for (i = size; i < newSize; ++i) { |
573 | 40.9k | entries[i].offset = (GFileOffset)-1; |
574 | 40.9k | entries[i].type = xrefEntryFree; |
575 | 40.9k | } |
576 | 40 | size = newSize; |
577 | 40 | } |
578 | 3.08k | for (i = first; i < first + n; ++i) { |
579 | 5.35k | do { |
580 | 5.35k | c = str->getChar(); |
581 | 5.35k | } while (Lexer::isSpace(c)); |
582 | 2.68k | off = 0; |
583 | 26.9k | do { |
584 | 26.9k | off = (off * 10) + (c - '0'); |
585 | 26.9k | c = str->getChar(); |
586 | 26.9k | } while (c >= '0' && c <= '9'); |
587 | 2.68k | if (!Lexer::isSpace(c)) { |
588 | 8 | goto err1; |
589 | 8 | } |
590 | 2.67k | entry.offset = off; |
591 | 2.68k | do { |
592 | 2.68k | c = str->getChar(); |
593 | 2.68k | } while (Lexer::isSpace(c)); |
594 | 2.67k | gen = 0; |
595 | 13.7k | do { |
596 | 13.7k | gen = (gen * 10) + (c - '0'); |
597 | 13.7k | c = str->getChar(); |
598 | 13.7k | } while (c >= '0' && c <= '9'); |
599 | 2.67k | if (!Lexer::isSpace(c)) { |
600 | 2 | goto err1; |
601 | 2 | } |
602 | 2.67k | entry.gen = gen; |
603 | 2.70k | do { |
604 | 2.70k | c = str->getChar(); |
605 | 2.70k | } while (Lexer::isSpace(c)); |
606 | 2.67k | if (c == 'n') { |
607 | 1.34k | entry.type = xrefEntryUncompressed; |
608 | 1.34k | } else if (c == 'f') { |
609 | 1.32k | entry.type = xrefEntryFree; |
610 | 1.32k | } else { |
611 | 0 | goto err1; |
612 | 0 | } |
613 | 2.67k | c = str->getChar(); |
614 | 2.67k | if (!Lexer::isSpace(c)) { |
615 | 0 | goto err1; |
616 | 0 | } |
617 | 2.67k | if (entries[i].offset == (GFileOffset)-1) { |
618 | 938 | entries[i] = entry; |
619 | | // PDF files of patents from the IBM Intellectual Property |
620 | | // Network have a bug: the xref table claims to start at 1 |
621 | | // instead of 0. |
622 | 938 | if (i == 1 && first == 1 && |
623 | 3 | entries[1].offset == 0 && entries[1].gen == 65535 && |
624 | 2 | entries[1].type == xrefEntryFree) { |
625 | 2 | i = first = 0; |
626 | 2 | entries[0] = entries[1]; |
627 | 2 | entries[1].offset = (GFileOffset)-1; |
628 | 2 | } |
629 | 938 | if (i > last) { |
630 | 97 | last = i; |
631 | 97 | } |
632 | 938 | } |
633 | 2.67k | } |
634 | 411 | } |
635 | | |
636 | | // read the trailer dictionary |
637 | 50 | obj.initNull(); |
638 | 50 | parser = new Parser(NULL, |
639 | 50 | new Lexer(NULL, |
640 | 50 | str->makeSubStream(str->getPos(), gFalse, 0, &obj)), |
641 | 50 | gTrue); |
642 | 50 | parser->getObj(&obj); |
643 | 50 | delete parser; |
644 | 50 | if (!obj.isDict()) { |
645 | 0 | obj.free(); |
646 | 0 | goto err1; |
647 | 0 | } |
648 | | |
649 | | // get the 'Prev' pointer |
650 | | //~ this can be a 64-bit int (?) |
651 | 50 | obj.getDict()->lookupNF("Prev", &obj2); |
652 | 50 | if (obj2.isInt()) { |
653 | 15 | *pos = (GFileOffset)(Guint)obj2.getInt(); |
654 | 15 | more = gTrue; |
655 | 35 | } else if (obj2.isRef()) { |
656 | | // certain buggy PDF generators generate "/Prev NNN 0 R" instead |
657 | | // of "/Prev NNN" |
658 | 3 | *pos = (GFileOffset)(Guint)obj2.getRefNum(); |
659 | 3 | more = gTrue; |
660 | 32 | } else { |
661 | 32 | more = gFalse; |
662 | 32 | } |
663 | 50 | obj2.free(); |
664 | | |
665 | | // save the first trailer dictionary |
666 | 50 | if (trailerDict.isNone()) { |
667 | 35 | obj.copy(&trailerDict); |
668 | 35 | } |
669 | | |
670 | | // check for an 'XRefStm' key |
671 | | //~ this can be a 64-bit int (?) |
672 | 50 | if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) { |
673 | 8 | pos2 = (GFileOffset)(Guint)obj2.getInt(); |
674 | 8 | readXRef(&pos2, posSet, gTrue); |
675 | 8 | if (!ok) { |
676 | 0 | obj2.free(); |
677 | 0 | obj.free(); |
678 | 0 | goto err1; |
679 | 0 | } |
680 | 8 | } |
681 | 50 | obj2.free(); |
682 | | |
683 | 50 | obj.free(); |
684 | 50 | return more; |
685 | | |
686 | 17 | err1: |
687 | 17 | ok = gFalse; |
688 | 17 | return gFalse; |
689 | 50 | } |
690 | | |
691 | 269 | GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos) { |
692 | 269 | Dict *dict; |
693 | 269 | int w[3]; |
694 | 269 | GBool more; |
695 | 269 | Object obj, obj2, idx; |
696 | 269 | int newSize, first, n, i; |
697 | | |
698 | 269 | dict = xrefStr->getDict(); |
699 | | |
700 | 269 | if (!dict->lookupNF("Size", &obj)->isInt()) { |
701 | 15 | goto err1; |
702 | 15 | } |
703 | 254 | newSize = obj.getInt(); |
704 | 254 | obj.free(); |
705 | 254 | if (newSize < 0) { |
706 | 0 | goto err1; |
707 | 0 | } |
708 | 254 | if (newSize > size) { |
709 | 172 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
710 | 844k | for (i = size; i < newSize; ++i) { |
711 | 844k | entries[i].offset = (GFileOffset)-1; |
712 | 844k | entries[i].type = xrefEntryFree; |
713 | 844k | } |
714 | 172 | size = newSize; |
715 | 172 | } |
716 | | |
717 | 254 | if (!dict->lookupNF("W", &obj)->isArray() || |
718 | 252 | obj.arrayGetLength() < 3) { |
719 | 2 | goto err1; |
720 | 2 | } |
721 | 1.00k | for (i = 0; i < 3; ++i) { |
722 | 756 | if (!obj.arrayGet(i, &obj2)->isInt()) { |
723 | 1 | obj2.free(); |
724 | 1 | goto err1; |
725 | 1 | } |
726 | 755 | w[i] = obj2.getInt(); |
727 | 755 | obj2.free(); |
728 | 755 | } |
729 | 251 | obj.free(); |
730 | 251 | if (w[0] < 0 || w[0] > 8 || |
731 | 250 | w[1] < 0 || w[1] > 8 || |
732 | 249 | w[2] < 0 || w[2] > 8) { |
733 | 2 | goto err0; |
734 | 2 | } |
735 | | |
736 | 249 | xrefStr->reset(); |
737 | 249 | dict->lookupNF("Index", &idx); |
738 | 249 | if (idx.isArray()) { |
739 | 650 | for (i = 0; i+1 < idx.arrayGetLength(); i += 2) { |
740 | 526 | if (!idx.arrayGet(i, &obj)->isInt()) { |
741 | 2 | idx.free(); |
742 | 2 | goto err1; |
743 | 2 | } |
744 | 524 | first = obj.getInt(); |
745 | 524 | obj.free(); |
746 | 524 | if (!idx.arrayGet(i+1, &obj)->isInt()) { |
747 | 0 | idx.free(); |
748 | 0 | goto err1; |
749 | 0 | } |
750 | 524 | n = obj.getInt(); |
751 | 524 | obj.free(); |
752 | 524 | if (first < 0 || n < 0 || |
753 | 522 | !readXRefStreamSection(xrefStr, w, first, n)) { |
754 | 29 | idx.free(); |
755 | 29 | goto err0; |
756 | 29 | } |
757 | 524 | } |
758 | 155 | } else { |
759 | 94 | if (!readXRefStreamSection(xrefStr, w, 0, newSize)) { |
760 | 27 | idx.free(); |
761 | 27 | goto err0; |
762 | 27 | } |
763 | 94 | } |
764 | 191 | idx.free(); |
765 | | |
766 | | //~ this can be a 64-bit int (?) |
767 | 191 | dict->lookupNF("Prev", &obj); |
768 | 191 | if (obj.isInt()) { |
769 | 94 | *pos = (GFileOffset)(Guint)obj.getInt(); |
770 | 94 | more = gTrue; |
771 | 97 | } else { |
772 | 97 | more = gFalse; |
773 | 97 | } |
774 | 191 | obj.free(); |
775 | 191 | if (trailerDict.isNone()) { |
776 | 113 | trailerDict.initDict(dict); |
777 | 113 | } |
778 | | |
779 | 191 | return more; |
780 | | |
781 | 20 | err1: |
782 | 20 | obj.free(); |
783 | 78 | err0: |
784 | 78 | ok = gFalse; |
785 | 78 | return gFalse; |
786 | 20 | } |
787 | | |
788 | 616 | GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { |
789 | 616 | long long type, gen, offset; |
790 | 616 | int c, newSize, i, j; |
791 | | |
792 | 616 | if (first + n < 0) { |
793 | 2 | return gFalse; |
794 | 2 | } |
795 | 614 | if (first + n > size) { |
796 | 52 | for (newSize = size ? 2 * size : 1024; |
797 | 237 | first + n > newSize && newSize > 0; |
798 | 185 | newSize <<= 1) ; |
799 | 52 | if (newSize < 0) { |
800 | 2 | return gFalse; |
801 | 2 | } |
802 | 50 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
803 | 3.12M | for (i = size; i < newSize; ++i) { |
804 | 3.12M | entries[i].offset = (GFileOffset)-1; |
805 | 3.12M | entries[i].type = xrefEntryFree; |
806 | 3.12M | } |
807 | 50 | size = newSize; |
808 | 50 | } |
809 | 9.56k | for (i = first; i < first + n; ++i) { |
810 | 9.00k | if (w[0] == 0) { |
811 | 2.57k | type = 1; |
812 | 6.42k | } else { |
813 | 13.0k | for (type = 0, j = 0; j < w[0]; ++j) { |
814 | 6.63k | if ((c = xrefStr->getChar()) == EOF) { |
815 | 10 | return gFalse; |
816 | 10 | } |
817 | 6.62k | type = (type << 8) + c; |
818 | 6.62k | } |
819 | 6.42k | } |
820 | 26.6k | for (offset = 0, j = 0; j < w[1]; ++j) { |
821 | 17.6k | if ((c = xrefStr->getChar()) == EOF) { |
822 | 10 | return gFalse; |
823 | 10 | } |
824 | 17.6k | offset = (offset << 8) + c; |
825 | 17.6k | } |
826 | 8.98k | if (offset < 0 || offset > GFILEOFFSET_MAX) { |
827 | 1 | return gFalse; |
828 | 1 | } |
829 | 19.0k | for (gen = 0, j = 0; j < w[2]; ++j) { |
830 | 10.1k | if ((c = xrefStr->getChar()) == EOF) { |
831 | 7 | return gFalse; |
832 | 7 | } |
833 | 10.1k | gen = (gen << 8) + c; |
834 | 10.1k | } |
835 | | // some PDF generators include a free entry with gen=0xffffffff |
836 | 8.97k | if ((gen < 0 || gen > INT_MAX) && type != 0) { |
837 | 4 | return gFalse; |
838 | 4 | } |
839 | 8.96k | if (entries[i].offset == (GFileOffset)-1) { |
840 | 8.21k | switch (type) { |
841 | 375 | case 0: |
842 | 375 | entries[i].offset = (GFileOffset)offset; |
843 | 375 | entries[i].gen = (int)gen; |
844 | 375 | entries[i].type = xrefEntryFree; |
845 | 375 | break; |
846 | 3.37k | case 1: |
847 | 3.37k | entries[i].offset = (GFileOffset)offset; |
848 | 3.37k | entries[i].gen = (int)gen; |
849 | 3.37k | entries[i].type = xrefEntryUncompressed; |
850 | 3.37k | break; |
851 | 4.45k | case 2: |
852 | 4.45k | entries[i].offset = (GFileOffset)offset; |
853 | 4.45k | entries[i].gen = (int)gen; |
854 | 4.45k | entries[i].type = xrefEntryCompressed; |
855 | 4.45k | break; |
856 | 18 | default: |
857 | 18 | return gFalse; |
858 | 8.21k | } |
859 | 8.20k | if (i > last) { |
860 | 3.72k | last = i; |
861 | 3.72k | } |
862 | 8.20k | } |
863 | 8.96k | } |
864 | | |
865 | 562 | return gTrue; |
866 | 612 | } |
867 | | |
868 | | // Attempt to construct an xref table for a damaged file. |
869 | 21.8k | GBool XRef::constructXRef() { |
870 | 21.8k | int *streamObjNums = NULL; |
871 | 21.8k | int streamObjNumsLen = 0; |
872 | 21.8k | int streamObjNumsSize = 0; |
873 | 21.8k | int lastObjNum = -1; |
874 | 21.8k | rootNum = -1; |
875 | 21.8k | int streamEndsSize = 0; |
876 | 21.8k | streamEndsLen = 0; |
877 | 21.8k | char buf[4096 + 1]; |
878 | 21.8k | str->reset(); |
879 | 21.8k | GFileOffset bufPos = start; |
880 | 21.8k | char *p = buf; |
881 | 21.8k | char *end = buf; |
882 | 21.8k | GBool startOfLine = gTrue; |
883 | 21.8k | GBool space = gTrue; |
884 | 21.8k | GBool eof = gFalse; |
885 | 228M | while (1) { |
886 | 228M | if (end - p < 256 && !eof) { |
887 | 75.9k | memcpy(buf, p, end - p); |
888 | 75.9k | bufPos += p - buf; |
889 | 75.9k | p = buf + (end - p); |
890 | 75.9k | int n = (int)(buf + 4096 - p); |
891 | 75.9k | int m = str->getBlock(p, n); |
892 | 75.9k | end = p + m; |
893 | 75.9k | *end = '\0'; |
894 | 75.9k | p = buf; |
895 | 75.9k | eof = m < n; |
896 | 75.9k | } |
897 | 228M | if (p == end && eof) { |
898 | 21.8k | break; |
899 | 21.8k | } |
900 | 228M | if (startOfLine && !strncmp(p, "trailer", 7)) { |
901 | 47.8k | constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf))); |
902 | 47.8k | p += 7; |
903 | 47.8k | startOfLine = gFalse; |
904 | 47.8k | space = gFalse; |
905 | 228M | } else if (startOfLine && !strncmp(p, "endstream", 9)) { |
906 | 92.3k | if (streamEndsLen == streamEndsSize) { |
907 | 13.6k | streamEndsSize += 64; |
908 | 13.6k | streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize, |
909 | 13.6k | sizeof(GFileOffset)); |
910 | 13.6k | } |
911 | 92.3k | streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf)); |
912 | 92.3k | p += 9; |
913 | 92.3k | startOfLine = gFalse; |
914 | 92.3k | space = gFalse; |
915 | 228M | } else if (space && *p >= '0' && *p <= '9') { |
916 | 4.00M | p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)), |
917 | 4.00M | &lastObjNum); |
918 | 4.00M | startOfLine = gFalse; |
919 | 4.00M | space = gFalse; |
920 | 224M | } else if (p[0] == '>' && p[1] == '>') { |
921 | 1.09M | p += 2; |
922 | 1.09M | startOfLine = gFalse; |
923 | 1.09M | space = gFalse; |
924 | | // skip any PDF whitespace except for '\0' |
925 | 1.72M | while (*p == '\t' || *p == '\n' || *p == '\x0c' || |
926 | 1.25M | *p == '\r' || *p == ' ') { |
927 | 625k | if (*p == '\n' || *p == '\r') { |
928 | 356k | startOfLine = gTrue; |
929 | 356k | } |
930 | 625k | space = gTrue; |
931 | 625k | ++p; |
932 | 625k | } |
933 | 1.09M | if (!strncmp(p, "stream", 6)) { |
934 | 451k | if (lastObjNum >= 0) { |
935 | 446k | if (streamObjNumsLen == streamObjNumsSize) { |
936 | 24.3k | streamObjNumsSize += 64; |
937 | 24.3k | streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize, |
938 | 24.3k | sizeof(int)); |
939 | 24.3k | } |
940 | 446k | streamObjNums[streamObjNumsLen++] = lastObjNum; |
941 | 446k | } |
942 | 451k | p += 6; |
943 | 451k | startOfLine = gFalse; |
944 | 451k | space = gFalse; |
945 | 451k | } |
946 | 223M | } else { |
947 | 223M | if (*p == '\n' || *p == '\r') { |
948 | 5.40M | startOfLine = gTrue; |
949 | 5.40M | space = gTrue; |
950 | 218M | } else if (Lexer::isSpace(*p & 0xff)) { |
951 | 28.6M | space = gTrue; |
952 | 189M | } else { |
953 | 189M | startOfLine = gFalse; |
954 | 189M | space = gFalse; |
955 | 189M | } |
956 | 223M | ++p; |
957 | 223M | } |
958 | 228M | } |
959 | | |
960 | | // read each stream object, check for xref or object stream |
961 | 467k | for (int i = 0; i < streamObjNumsLen; ++i) { |
962 | 445k | Object obj; |
963 | 445k | fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj); |
964 | 445k | if (obj.isStream()) { |
965 | 381k | Dict *dict = obj.streamGetDict(); |
966 | 381k | Object type; |
967 | 381k | dict->lookup("Type", &type); |
968 | 381k | if (type.isName("XRef")) { |
969 | 6.20k | saveTrailerDict(dict, gTrue); |
970 | 375k | } else if (type.isName("ObjStm")) { |
971 | 286k | constructObjectStreamEntries(&obj, streamObjNums[i]); |
972 | 286k | } |
973 | 381k | type.free(); |
974 | 381k | } |
975 | 445k | obj.free(); |
976 | 445k | } |
977 | | |
978 | 21.8k | gfree(streamObjNums); |
979 | | |
980 | | // if the file is encrypted, then any objects fetched here will be |
981 | | // incorrect (because decryption is not yet enabled), so clear the |
982 | | // cache to avoid that problem |
983 | 370k | for (int i = 0; i < xrefCacheSize; ++i) { |
984 | 348k | if (cache[i].num >= 0) { |
985 | 85.1k | cache[i].obj.free(); |
986 | 85.1k | cache[i].num = -1; |
987 | 85.1k | } |
988 | 348k | } |
989 | | |
990 | 21.8k | if (rootNum < 0) { |
991 | 5.85k | error(errSyntaxError, -1, "Couldn't find trailer dictionary"); |
992 | 5.85k | return gFalse; |
993 | 5.85k | } |
994 | 16.0k | return gTrue; |
995 | 21.8k | } |
996 | | |
997 | | // Attempt to construct a trailer dict at [pos] in the stream. |
998 | 47.8k | void XRef::constructTrailerDict(GFileOffset pos) { |
999 | 47.8k | Object newTrailerDict, obj; |
1000 | 47.8k | obj.initNull(); |
1001 | 47.8k | Parser *parser = |
1002 | 47.8k | new Parser(NULL, |
1003 | 47.8k | new Lexer(NULL, |
1004 | 47.8k | str->makeSubStream(pos, gFalse, 0, &obj)), |
1005 | 47.8k | gFalse); |
1006 | 47.8k | parser->getObj(&newTrailerDict); |
1007 | 47.8k | if (newTrailerDict.isDict()) { |
1008 | 33.9k | saveTrailerDict(newTrailerDict.getDict(), gFalse); |
1009 | 33.9k | } |
1010 | 47.8k | newTrailerDict.free(); |
1011 | 47.8k | delete parser; |
1012 | 47.8k | } |
1013 | | |
1014 | | // If [dict] "looks like" a trailer dict (i.e., has a Root entry), |
1015 | | // save it as the trailer dict. |
1016 | 40.2k | void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) { |
1017 | 40.2k | Object obj; |
1018 | 40.2k | dict->lookupNF("Root", &obj); |
1019 | 40.2k | if (obj.isRef()) { |
1020 | 22.9k | int newRootNum = obj.getRefNum(); |
1021 | | // the xref stream scanning code runs after all objects are found, |
1022 | | // so we can check for a valid root object number at that point |
1023 | 22.9k | if (!isXRefStream || newRootNum <= last) { |
1024 | 22.6k | rootNum = newRootNum; |
1025 | 22.6k | rootGen = obj.getRefGen(); |
1026 | 22.6k | if (!trailerDict.isNone()) { |
1027 | 6.78k | trailerDict.free(); |
1028 | 6.78k | } |
1029 | 22.6k | trailerDict.initDict(dict); |
1030 | 22.6k | } |
1031 | 22.9k | } |
1032 | 40.2k | obj.free(); |
1033 | 40.2k | } |
1034 | | |
1035 | | // Look for an object header ("nnn ggg obj") at [p]. The first |
1036 | | // character at *[p] is a digit. [pos] is the position of *[p]. |
1037 | 4.00M | char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) { |
1038 | | // we look for non-end-of-line space characters here, to deal with |
1039 | | // situations like: |
1040 | | // nnn <-- garbage digits on a line |
1041 | | // nnn nnn obj <-- actual object |
1042 | | // and we also ignore '\0' (because it's used to terminate the |
1043 | | // buffer in this damage-scanning code) |
1044 | 4.00M | int num = 0; |
1045 | 11.8M | do { |
1046 | 11.8M | num = (num * 10) + (*p - '0'); |
1047 | 11.8M | ++p; |
1048 | 11.8M | } while (*p >= '0' && *p <= '9' && num < 100000000); |
1049 | 4.00M | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1050 | 1.50M | return p; |
1051 | 1.50M | } |
1052 | 2.58M | do { |
1053 | 2.58M | ++p; |
1054 | 2.58M | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1055 | 2.49M | if (!(*p >= '0' && *p <= '9')) { |
1056 | 449k | return p; |
1057 | 449k | } |
1058 | 2.04M | int gen = 0; |
1059 | 3.98M | do { |
1060 | 3.98M | gen = (gen * 10) + (*p - '0'); |
1061 | 3.98M | ++p; |
1062 | 3.98M | } while (*p >= '0' && *p <= '9' && gen < 100000000); |
1063 | 2.04M | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1064 | 102k | return p; |
1065 | 102k | } |
1066 | 1.98M | do { |
1067 | 1.98M | ++p; |
1068 | 1.98M | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1069 | 1.94M | if (strncmp(p, "obj", 3)) { |
1070 | 1.52M | return p; |
1071 | 1.52M | } |
1072 | | |
1073 | 420k | if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) { |
1074 | 420k | *objNum = num; |
1075 | 420k | } |
1076 | | |
1077 | 420k | return p; |
1078 | 1.94M | } |
1079 | | |
1080 | | // Read the header from an object stream, and add xref entries for all |
1081 | | // of its objects. |
1082 | 286k | void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) { |
1083 | 286k | Object obj1, obj2; |
1084 | | |
1085 | | // get the object count |
1086 | 286k | if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) { |
1087 | 891 | obj1.free(); |
1088 | 891 | return; |
1089 | 891 | } |
1090 | 285k | int nObjects = obj1.getInt(); |
1091 | 285k | obj1.free(); |
1092 | 285k | if (nObjects <= 0 || nObjects > 1000000) { |
1093 | 453 | return; |
1094 | 453 | } |
1095 | | |
1096 | | // parse the header: object numbers and offsets |
1097 | 284k | Parser *parser = new Parser(NULL, |
1098 | 284k | new Lexer(NULL, objStr->getStream()->copy()), |
1099 | 284k | gFalse); |
1100 | 122M | for (int i = 0; i < nObjects; ++i) { |
1101 | 122M | parser->getObj(&obj1, gTrue); |
1102 | 122M | parser->getObj(&obj2, gTrue); |
1103 | 122M | if (obj1.isInt() && obj2.isInt()) { |
1104 | 111k | int num = obj1.getInt(); |
1105 | 111k | if (num >= 0 && num < 1000000) { |
1106 | 109k | constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed); |
1107 | 109k | } |
1108 | 111k | } |
1109 | 122M | obj2.free(); |
1110 | 122M | obj1.free(); |
1111 | 122M | } |
1112 | 284k | delete parser; |
1113 | 284k | } |
1114 | | |
1115 | | GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos, |
1116 | 530k | XRefEntryType type) { |
1117 | 530k | if (num >= size) { |
1118 | 23.4k | int newSize = (num + 1 + 255) & ~255; |
1119 | 23.4k | if (newSize < 0) { |
1120 | 0 | return gFalse; |
1121 | 0 | } |
1122 | 23.4k | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
1123 | 810M | for (int i = size; i < newSize; ++i) { |
1124 | 810M | entries[i].offset = (GFileOffset)-1; |
1125 | 810M | entries[i].type = xrefEntryFree; |
1126 | 810M | } |
1127 | 23.4k | size = newSize; |
1128 | 23.4k | } |
1129 | | |
1130 | 530k | if (entries[num].type == xrefEntryFree || |
1131 | 510k | gen >= entries[num].gen) { |
1132 | 510k | entries[num].offset = pos; |
1133 | 510k | entries[num].gen = gen; |
1134 | 510k | entries[num].type = type; |
1135 | 510k | if (num > last) { |
1136 | 190k | last = num; |
1137 | 190k | } |
1138 | 510k | } |
1139 | | |
1140 | 530k | return gTrue; |
1141 | 530k | } |
1142 | | |
1143 | | void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA, |
1144 | | Guchar *fileKeyA, int keyLengthA, int encVersionA, |
1145 | 881 | CryptAlgorithm encAlgorithmA) { |
1146 | 881 | int i; |
1147 | | |
1148 | 881 | encrypted = gTrue; |
1149 | 881 | permFlags = permFlagsA; |
1150 | 881 | ownerPasswordOk = ownerPasswordOkA; |
1151 | 881 | if (keyLengthA <= 32) { |
1152 | 881 | keyLength = keyLengthA; |
1153 | 881 | } else { |
1154 | 0 | keyLength = 32; |
1155 | 0 | } |
1156 | 11.5k | for (i = 0; i < keyLength; ++i) { |
1157 | 10.6k | fileKey[i] = fileKeyA[i]; |
1158 | 10.6k | } |
1159 | 881 | encVersion = encVersionA; |
1160 | 881 | encAlgorithm = encAlgorithmA; |
1161 | 881 | } |
1162 | | |
1163 | | GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA, |
1164 | | int *keyLengthA, int *encVersionA, |
1165 | 0 | CryptAlgorithm *encAlgorithmA) { |
1166 | 0 | if (!encrypted) { |
1167 | 0 | return gFalse; |
1168 | 0 | } |
1169 | 0 | *permFlagsA = permFlags; |
1170 | 0 | *ownerPasswordOkA = ownerPasswordOk; |
1171 | 0 | *keyLengthA = keyLength; |
1172 | 0 | *encVersionA = encVersion; |
1173 | 0 | *encAlgorithmA = encAlgorithm; |
1174 | 0 | return gTrue; |
1175 | 0 | } |
1176 | | |
1177 | 12.8k | GBool XRef::okToPrint(GBool ignoreOwnerPW) { |
1178 | 12.8k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint); |
1179 | 12.8k | } |
1180 | | |
1181 | 12.8k | GBool XRef::okToChange(GBool ignoreOwnerPW) { |
1182 | 12.8k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange); |
1183 | 12.8k | } |
1184 | | |
1185 | 12.8k | GBool XRef::okToCopy(GBool ignoreOwnerPW) { |
1186 | 12.8k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy); |
1187 | 12.8k | } |
1188 | | |
1189 | 12.8k | GBool XRef::okToAddNotes(GBool ignoreOwnerPW) { |
1190 | 12.8k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes); |
1191 | 12.8k | } |
1192 | | |
1193 | 60.7M | Object *XRef::fetch(int num, int gen, Object *obj, int recursion) { |
1194 | 60.7M | XRefEntry *e; |
1195 | 60.7M | Parser *parser; |
1196 | 60.7M | Object obj1, obj2, obj3; |
1197 | 60.7M | XRefCacheEntry tmp; |
1198 | 60.7M | int i, j; |
1199 | | |
1200 | | // check for bogus ref - this can happen in corrupted PDF files |
1201 | 60.7M | if (num < 0 || num >= size) { |
1202 | 112k | goto err; |
1203 | 112k | } |
1204 | | |
1205 | | // check the cache |
1206 | 60.6M | #if MULTITHREADED |
1207 | 60.6M | gLockMutex(&cacheMutex); |
1208 | 60.6M | #endif |
1209 | 60.6M | if (cache[0].num == num && cache[0].gen == gen) { |
1210 | 1.02M | cache[0].obj.copy(obj); |
1211 | 1.02M | #if MULTITHREADED |
1212 | 1.02M | gUnlockMutex(&cacheMutex); |
1213 | 1.02M | #endif |
1214 | 1.02M | return obj; |
1215 | 1.02M | } |
1216 | 945M | for (i = 1; i < xrefCacheSize; ++i) { |
1217 | 886M | if (cache[i].num == num && cache[i].gen == gen) { |
1218 | 578k | tmp = cache[i]; |
1219 | 2.31M | for (j = i; j > 0; --j) { |
1220 | 1.73M | cache[j] = cache[j - 1]; |
1221 | 1.73M | } |
1222 | 578k | cache[0] = tmp; |
1223 | 578k | cache[0].obj.copy(obj); |
1224 | 578k | #if MULTITHREADED |
1225 | 578k | gUnlockMutex(&cacheMutex); |
1226 | 578k | #endif |
1227 | 578k | return obj; |
1228 | 578k | } |
1229 | 886M | } |
1230 | 58.9M | #if MULTITHREADED |
1231 | 58.9M | gUnlockMutex(&cacheMutex); |
1232 | 58.9M | #endif |
1233 | | |
1234 | 58.9M | e = &entries[num]; |
1235 | 58.9M | switch (e->type) { |
1236 | | |
1237 | 418k | case xrefEntryUncompressed: |
1238 | 418k | if (e->gen != gen) { |
1239 | 109k | goto err; |
1240 | 109k | } |
1241 | 309k | obj1.initNull(); |
1242 | 309k | parser = new Parser(this, |
1243 | 309k | new Lexer(this, |
1244 | 309k | str->makeSubStream(start + e->offset, gFalse, 0, &obj1)), |
1245 | 309k | gTrue); |
1246 | 309k | parser->getObj(&obj1, gTrue); |
1247 | 309k | parser->getObj(&obj2, gTrue); |
1248 | 309k | parser->getObj(&obj3, gTrue); |
1249 | 309k | if (!obj1.isInt() || obj1.getInt() != num || |
1250 | 307k | !obj2.isInt() || obj2.getInt() != gen || |
1251 | 307k | !obj3.isCmd("obj")) { |
1252 | 6.58k | obj1.free(); |
1253 | 6.58k | obj2.free(); |
1254 | 6.58k | obj3.free(); |
1255 | 6.58k | delete parser; |
1256 | 6.58k | goto err; |
1257 | 6.58k | } |
1258 | 302k | parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL, |
1259 | 302k | encAlgorithm, keyLength, num, gen, recursion); |
1260 | 302k | obj1.free(); |
1261 | 302k | obj2.free(); |
1262 | 302k | obj3.free(); |
1263 | 302k | delete parser; |
1264 | 302k | break; |
1265 | | |
1266 | 86.9k | case xrefEntryCompressed: |
1267 | | #if 0 // Adobe apparently ignores the generation number on compressed objects |
1268 | | if (gen != 0) { |
1269 | | goto err; |
1270 | | } |
1271 | | #endif |
1272 | 86.9k | if (e->offset >= (GFileOffset)size || |
1273 | 86.5k | entries[e->offset].type != xrefEntryUncompressed) { |
1274 | 988 | error(errSyntaxError, -1, "Invalid object stream"); |
1275 | 988 | goto err; |
1276 | 988 | } |
1277 | 86.0k | if (!getObjectStreamObject((int)e->offset, e->gen, num, obj, recursion)) { |
1278 | 40.8k | goto err; |
1279 | 40.8k | } |
1280 | 45.1k | break; |
1281 | | |
1282 | 58.4M | default: |
1283 | 58.4M | goto err; |
1284 | 58.9M | } |
1285 | | |
1286 | | // put the new object in the cache, throwing away the oldest object |
1287 | | // currently in the cache |
1288 | 348k | #if MULTITHREADED |
1289 | 348k | gLockMutex(&cacheMutex); |
1290 | 348k | #endif |
1291 | 348k | if (cache[xrefCacheSize - 1].num >= 0) { |
1292 | 112k | cache[xrefCacheSize - 1].obj.free(); |
1293 | 112k | } |
1294 | 5.56M | for (i = xrefCacheSize - 1; i > 0; --i) { |
1295 | 5.22M | cache[i] = cache[i - 1]; |
1296 | 5.22M | } |
1297 | 348k | cache[0].num = num; |
1298 | 348k | cache[0].gen = gen; |
1299 | 348k | obj->copy(&cache[0].obj); |
1300 | 348k | #if MULTITHREADED |
1301 | 348k | gUnlockMutex(&cacheMutex); |
1302 | 348k | #endif |
1303 | | |
1304 | 348k | return obj; |
1305 | | |
1306 | 58.7M | err: |
1307 | 58.7M | return obj->initNull(); |
1308 | 58.9M | } |
1309 | | |
1310 | | GBool XRef::getObjectStreamObject(int objStrNum, int objIdx, |
1311 | 86.0k | int objNum, Object *obj, int recursion) { |
1312 | 86.0k | if (recursion >= objectRecursionLimit) { |
1313 | 67 | return gFalse; |
1314 | 67 | } |
1315 | | |
1316 | | // check for a cached ObjectStream |
1317 | 85.9k | #if MULTITHREADED |
1318 | 85.9k | gLockMutex(&objStrsMutex); |
1319 | 85.9k | #endif |
1320 | 85.9k | ObjectStream *objStr = getObjectStreamFromCache(objStrNum); |
1321 | 85.9k | GBool found = gFalse; |
1322 | 85.9k | if (objStr) { |
1323 | 38.1k | objStr->getObject(objIdx, objNum, obj); |
1324 | 38.1k | cleanObjectStreamCache(); |
1325 | 38.1k | found = gTrue; |
1326 | 38.1k | } |
1327 | 85.9k | #if MULTITHREADED |
1328 | 85.9k | gUnlockMutex(&objStrsMutex); |
1329 | 85.9k | #endif |
1330 | 85.9k | if (found) { |
1331 | 38.1k | return gTrue; |
1332 | 38.1k | } |
1333 | | |
1334 | | // load a new ObjectStream |
1335 | 47.7k | objStr = new ObjectStream(this, objStrNum, recursion + 1); |
1336 | 47.7k | if (!objStr->isOk()) { |
1337 | 40.7k | delete objStr; |
1338 | 40.7k | return gFalse; |
1339 | 40.7k | } |
1340 | 6.99k | objStr->getObject(objIdx, objNum, obj); |
1341 | 6.99k | #if MULTITHREADED |
1342 | 6.99k | gLockMutex(&objStrsMutex); |
1343 | 6.99k | #endif |
1344 | 6.99k | addObjectStreamToCache(objStr); |
1345 | 6.99k | cleanObjectStreamCache(); |
1346 | 6.99k | #if MULTITHREADED |
1347 | 6.99k | gUnlockMutex(&objStrsMutex); |
1348 | 6.99k | #endif |
1349 | 6.99k | return gTrue; |
1350 | 47.7k | } |
1351 | | |
1352 | | // NB: objStrsMutex must be locked when calling this function. |
1353 | 85.9k | ObjectStream *XRef::getObjectStreamFromCache(int objStrNum) { |
1354 | | // check the MRU entry in the cache |
1355 | 85.9k | if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) { |
1356 | 36.4k | ObjectStream *objStr = objStrs[0]; |
1357 | 36.4k | objStrLastUse[0] = objStrTime++; |
1358 | 36.4k | return objStr; |
1359 | 36.4k | } |
1360 | | |
1361 | | // check the rest of the cache |
1362 | 65.4k | for (int i = 1; i < objStrCacheLength; ++i) { |
1363 | 17.7k | if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) { |
1364 | 1.73k | ObjectStream *objStr = objStrs[i]; |
1365 | 5.19k | for (int j = i; j > 0; --j) { |
1366 | 3.45k | objStrs[j] = objStrs[j - 1]; |
1367 | 3.45k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1368 | 3.45k | } |
1369 | 1.73k | objStrs[0] = objStr; |
1370 | 1.73k | objStrLastUse[0] = objStrTime++; |
1371 | 1.73k | return objStr; |
1372 | 1.73k | } |
1373 | 17.7k | } |
1374 | | |
1375 | 47.7k | return NULL; |
1376 | 49.4k | } |
1377 | | |
1378 | | // NB: objStrsMutex must be locked when calling this function. |
1379 | 6.99k | void XRef::addObjectStreamToCache(ObjectStream *objStr) { |
1380 | | // add to the cache |
1381 | 6.99k | if (objStrCacheLength == objStrCacheSize) { |
1382 | 491 | delete objStrs[objStrCacheSize - 1]; |
1383 | 491 | --objStrCacheLength; |
1384 | 491 | } |
1385 | 106k | for (int j = objStrCacheLength; j > 0; --j) { |
1386 | 99.8k | objStrs[j] = objStrs[j - 1]; |
1387 | 99.8k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1388 | 99.8k | } |
1389 | 6.99k | ++objStrCacheLength; |
1390 | 6.99k | objStrs[0] = objStr; |
1391 | 6.99k | objStrLastUse[0] = objStrTime++; |
1392 | 6.99k | } |
1393 | | |
1394 | | // If the oldest (least recently used) entry in the object stream |
1395 | | // cache is more than objStrCacheTimeout accesses old (hasn't been |
1396 | | // used in the last objStrCacheTimeout accesses), eject it from the |
1397 | | // cache. |
1398 | | // NB: objStrsMutex must be locked when calling this function. |
1399 | 45.1k | void XRef::cleanObjectStreamCache() { |
1400 | | // NB: objStrTime and objStrLastUse[] are unsigned ints, so the |
1401 | | // mod-2^32 arithmetic makes the subtraction work out, even if the |
1402 | | // time wraps around. |
1403 | 45.1k | if (objStrCacheLength > 1 && |
1404 | 35.4k | objStrTime - objStrLastUse[objStrCacheLength - 1] |
1405 | 35.4k | > objStrCacheTimeout) { |
1406 | 0 | delete objStrs[objStrCacheLength - 1]; |
1407 | 0 | objStrs[objStrCacheLength - 1] = NULL; |
1408 | 0 | --objStrCacheLength; |
1409 | 0 | } |
1410 | 45.1k | } |
1411 | | |
1412 | 12.8k | Object *XRef::getDocInfo(Object *obj) { |
1413 | 12.8k | return trailerDict.dictLookup("Info", obj); |
1414 | 12.8k | } |
1415 | | |
1416 | | // Added for the pdftex project. |
1417 | 0 | Object *XRef::getDocInfoNF(Object *obj) { |
1418 | 0 | return trailerDict.dictLookupNF("Info", obj); |
1419 | 0 | } |
1420 | | |
1421 | 230k | GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) { |
1422 | 230k | int a, b, m; |
1423 | | |
1424 | 230k | if (streamEndsLen == 0 || |
1425 | 197k | streamStart > streamEnds[streamEndsLen - 1]) { |
1426 | 65.1k | return gFalse; |
1427 | 65.1k | } |
1428 | | |
1429 | 165k | a = -1; |
1430 | 165k | b = streamEndsLen - 1; |
1431 | | // invariant: streamEnds[a] < streamStart <= streamEnds[b] |
1432 | 813k | while (b - a > 1) { |
1433 | 647k | m = (a + b) / 2; |
1434 | 647k | if (streamStart <= streamEnds[m]) { |
1435 | 296k | b = m; |
1436 | 350k | } else { |
1437 | 350k | a = m; |
1438 | 350k | } |
1439 | 647k | } |
1440 | 165k | *streamEnd = streamEnds[b]; |
1441 | 165k | return gTrue; |
1442 | 230k | } |
1443 | | |
1444 | 1.43k | GFileOffset XRef::strToFileOffset(char *s) { |
1445 | 1.43k | GFileOffset x, d; |
1446 | 1.43k | char *p; |
1447 | | |
1448 | 1.43k | x = 0; |
1449 | 5.68k | for (p = s; *p && isdigit(*p & 0xff); ++p) { |
1450 | 4.25k | d = *p - '0'; |
1451 | 4.25k | if (x > (GFILEOFFSET_MAX - d) / 10) { |
1452 | 10 | break; |
1453 | 10 | } |
1454 | 4.24k | x = 10 * x + d; |
1455 | 4.24k | } |
1456 | 1.43k | return x; |
1457 | 1.43k | } |