/src/xpdf-4.05/xpdf/XRef.cc
Line | Count | Source (jump to first uncovered line) |
1 | | //======================================================================== |
2 | | // |
3 | | // XRef.cc |
4 | | // |
5 | | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdlib.h> |
12 | | #include <stddef.h> |
13 | | #include <string.h> |
14 | | #include <ctype.h> |
15 | | #include <limits.h> |
16 | | #include "gmem.h" |
17 | | #include "gmempp.h" |
18 | | #include "gfile.h" |
19 | | #include "Object.h" |
20 | | #include "Stream.h" |
21 | | #include "Lexer.h" |
22 | | #include "Parser.h" |
23 | | #include "Dict.h" |
24 | | #include "Error.h" |
25 | | #include "ErrorCodes.h" |
26 | | #include "XRef.h" |
27 | | |
28 | | //------------------------------------------------------------------------ |
29 | | |
30 | 5.17k | #define xrefSearchSize 1024 // read this many bytes at end of file |
31 | | // to look for 'startxref' |
32 | | |
33 | | //------------------------------------------------------------------------ |
34 | | // Permission bits |
35 | | //------------------------------------------------------------------------ |
36 | | |
37 | 0 | #define permPrint (1<<2) |
38 | 0 | #define permChange (1<<3) |
39 | 0 | #define permCopy (1<<4) |
40 | 0 | #define permNotes (1<<5) |
41 | 5.14k | #define defPermFlags 0xfffc |
42 | | |
43 | | //------------------------------------------------------------------------ |
44 | | // XRefPosSet |
45 | | //------------------------------------------------------------------------ |
46 | | |
47 | | class XRefPosSet { |
48 | | public: |
49 | | |
50 | | XRefPosSet(); |
51 | | ~XRefPosSet(); |
52 | | void add(GFileOffset pos); |
53 | | GBool check(GFileOffset pos); |
54 | 122 | int getLength() { return len; } |
55 | 178 | GFileOffset get(int idx) { return tab[idx]; } |
56 | | |
57 | | private: |
58 | | |
59 | | int find(GFileOffset pos); |
60 | | |
61 | | GFileOffset *tab; |
62 | | int size; |
63 | | int len; |
64 | | }; |
65 | | |
66 | 122 | XRefPosSet::XRefPosSet() { |
67 | 122 | size = 16; |
68 | 122 | len = 0; |
69 | 122 | tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset)); |
70 | 122 | } |
71 | | |
72 | 122 | XRefPosSet::~XRefPosSet() { |
73 | 122 | gfree(tab); |
74 | 122 | } |
75 | | |
76 | 178 | void XRefPosSet::add(GFileOffset pos) { |
77 | 178 | int i; |
78 | | |
79 | 178 | i = find(pos); |
80 | 178 | if (i < len && tab[i] == pos) { |
81 | 0 | return; |
82 | 0 | } |
83 | 178 | if (len == size) { |
84 | 0 | if (size > INT_MAX / 2) { |
85 | 0 | gMemError("Integer overflow in XRefPosSet::add()"); |
86 | 0 | } |
87 | 0 | size *= 2; |
88 | 0 | tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset)); |
89 | 0 | } |
90 | 178 | if (i < len) { |
91 | 25 | memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset)); |
92 | 25 | } |
93 | 178 | tab[i] = pos; |
94 | 178 | ++len; |
95 | 178 | } |
96 | | |
97 | 178 | GBool XRefPosSet::check(GFileOffset pos) { |
98 | 178 | int i; |
99 | | |
100 | 178 | i = find(pos); |
101 | 178 | return i < len && tab[i] == pos; |
102 | 178 | } |
103 | | |
104 | 356 | int XRefPosSet::find(GFileOffset pos) { |
105 | 356 | int a, b, m; |
106 | | |
107 | 356 | a = - 1; |
108 | 356 | b = len; |
109 | | // invariant: tab[a] < pos < tab[b] |
110 | 494 | while (b - a > 1) { |
111 | 138 | m = (a + b) / 2; |
112 | 138 | if (tab[m] < pos) { |
113 | 70 | a = m; |
114 | 70 | } else if (tab[m] > pos) { |
115 | 68 | b = m; |
116 | 68 | } else { |
117 | 0 | return m; |
118 | 0 | } |
119 | 138 | } |
120 | 356 | return b; |
121 | 356 | } |
122 | | |
123 | | //------------------------------------------------------------------------ |
124 | | // ObjectStream |
125 | | //------------------------------------------------------------------------ |
126 | | |
127 | | class ObjectStream { |
128 | | public: |
129 | | |
130 | | // Create an object stream, using object number <objStrNum>, |
131 | | // generation 0. |
132 | | ObjectStream(XRef *xref, int objStrNumA, int recursion); |
133 | | |
134 | 13.6k | GBool isOk() { return ok; } |
135 | | |
136 | | ~ObjectStream(); |
137 | | |
138 | | // Return the object number of this object stream. |
139 | 48.7k | int getObjStrNum() { return objStrNum; } |
140 | | |
141 | | // Get the <objIdx>th object from this stream, which should be |
142 | | // object number <objNum>, generation 0. |
143 | | Object *getObject(int objIdx, int objNum, Object *obj); |
144 | | |
145 | | private: |
146 | | |
147 | | int objStrNum; // object number of the object stream |
148 | | int nObjects; // number of objects in the stream |
149 | | Object *objs; // the objects (length = nObjects) |
150 | | int *objNums; // the object numbers (length = nObjects) |
151 | | GBool ok; |
152 | | }; |
153 | | |
154 | 13.6k | ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion) { |
155 | 13.6k | Stream *str; |
156 | 13.6k | Lexer *lexer; |
157 | 13.6k | Parser *parser; |
158 | 13.6k | int *offsets; |
159 | 13.6k | Object objStr, obj1, obj2; |
160 | 13.6k | int first, i; |
161 | | |
162 | 13.6k | objStrNum = objStrNumA; |
163 | 13.6k | nObjects = 0; |
164 | 13.6k | objs = NULL; |
165 | 13.6k | objNums = NULL; |
166 | 13.6k | ok = gFalse; |
167 | | |
168 | 13.6k | if (!xref->fetch(objStrNum, 0, &objStr, recursion)->isStream()) { |
169 | 1.54k | goto err1; |
170 | 1.54k | } |
171 | | |
172 | 12.1k | if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) { |
173 | 36 | obj1.free(); |
174 | 36 | goto err1; |
175 | 36 | } |
176 | 12.0k | nObjects = obj1.getInt(); |
177 | 12.0k | obj1.free(); |
178 | 12.0k | if (nObjects <= 0) { |
179 | 22 | goto err1; |
180 | 22 | } |
181 | | |
182 | 12.0k | if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) { |
183 | 719 | obj1.free(); |
184 | 719 | goto err1; |
185 | 719 | } |
186 | 11.3k | first = obj1.getInt(); |
187 | 11.3k | obj1.free(); |
188 | 11.3k | if (first < 0) { |
189 | 264 | goto err1; |
190 | 264 | } |
191 | | |
192 | | // this is an arbitrary limit to avoid integer overflow problems |
193 | | // in the 'new Object[nObjects]' call (Acrobat apparently limits |
194 | | // object streams to 100-200 objects) |
195 | 11.0k | if (nObjects > 1000000) { |
196 | 23 | error(errSyntaxError, -1, "Too many objects in an object stream"); |
197 | 23 | goto err1; |
198 | 23 | } |
199 | 11.0k | objs = new Object[nObjects]; |
200 | 11.0k | objNums = (int *)gmallocn(nObjects, sizeof(int)); |
201 | 11.0k | offsets = (int *)gmallocn(nObjects, sizeof(int)); |
202 | | |
203 | | // parse the header: object numbers and offsets |
204 | 11.0k | objStr.streamReset(); |
205 | 11.0k | obj1.initNull(); |
206 | 11.0k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first); |
207 | 11.0k | lexer = new Lexer(xref, str); |
208 | 11.0k | parser = new Parser(xref, lexer, gFalse); |
209 | 323k | for (i = 0; i < nObjects; ++i) { |
210 | 321k | parser->getObj(&obj1, gTrue); |
211 | 321k | parser->getObj(&obj2, gTrue); |
212 | 321k | if (!obj1.isInt() || !obj2.isInt()) { |
213 | 2.99k | obj1.free(); |
214 | 2.99k | obj2.free(); |
215 | 2.99k | delete parser; |
216 | 2.99k | gfree(offsets); |
217 | 2.99k | goto err2; |
218 | 2.99k | } |
219 | 318k | objNums[i] = obj1.getInt(); |
220 | 318k | offsets[i] = obj2.getInt(); |
221 | 318k | obj1.free(); |
222 | 318k | obj2.free(); |
223 | 318k | if (objNums[i] < 0 || offsets[i] < 0 || |
224 | 318k | (i > 0 && offsets[i] < offsets[i-1])) { |
225 | 6.15k | delete parser; |
226 | 6.15k | gfree(offsets); |
227 | 6.15k | goto err2; |
228 | 6.15k | } |
229 | 318k | } |
230 | 1.90k | lexer->skipToEOF(); |
231 | 1.90k | delete parser; |
232 | | |
233 | | // skip to the first object - this generally shouldn't be needed, |
234 | | // because offsets[0] is normally 0, but just in case... |
235 | 1.90k | if (offsets[0] > 0) { |
236 | 7 | objStr.getStream()->discardChars(offsets[0]); |
237 | 7 | } |
238 | | |
239 | | // parse the objects |
240 | 27.2k | for (i = 0; i < nObjects; ++i) { |
241 | 25.3k | obj1.initNull(); |
242 | 25.3k | if (i == nObjects - 1) { |
243 | 1.90k | str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0); |
244 | 23.4k | } else { |
245 | 23.4k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, |
246 | 23.4k | offsets[i+1] - offsets[i]); |
247 | 23.4k | } |
248 | 25.3k | lexer = new Lexer(xref, str); |
249 | 25.3k | parser = new Parser(xref, lexer, gFalse); |
250 | 25.3k | parser->getObj(&objs[i]); |
251 | 25.3k | lexer->skipToEOF(); |
252 | 25.3k | delete parser; |
253 | 25.3k | } |
254 | | |
255 | 1.90k | gfree(offsets); |
256 | 1.90k | ok = gTrue; |
257 | | |
258 | 11.0k | err2: |
259 | 11.0k | objStr.streamClose(); |
260 | 13.6k | err1: |
261 | 13.6k | objStr.free(); |
262 | 13.6k | } |
263 | | |
264 | 13.6k | ObjectStream::~ObjectStream() { |
265 | 13.6k | int i; |
266 | | |
267 | 13.6k | if (objs) { |
268 | 748k | for (i = 0; i < nObjects; ++i) { |
269 | 737k | objs[i].free(); |
270 | 737k | } |
271 | 11.0k | delete[] objs; |
272 | 11.0k | } |
273 | 13.6k | gfree(objNums); |
274 | 13.6k | } |
275 | | |
276 | 28.3k | Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) { |
277 | 28.3k | if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) { |
278 | 137 | obj->initNull(); |
279 | 28.2k | } else { |
280 | 28.2k | objs[objIdx].copy(obj); |
281 | 28.2k | } |
282 | 28.3k | return obj; |
283 | 28.3k | } |
284 | | |
285 | | //------------------------------------------------------------------------ |
286 | | // XRef |
287 | | //------------------------------------------------------------------------ |
288 | | |
289 | 5.14k | XRef::XRef(BaseStream *strA, GBool repair) { |
290 | 5.14k | GFileOffset pos; |
291 | 5.14k | Object obj; |
292 | 5.14k | XRefPosSet *posSet; |
293 | 5.14k | int i; |
294 | | |
295 | 5.14k | ok = gTrue; |
296 | 5.14k | errCode = errNone; |
297 | 5.14k | repaired = gFalse; |
298 | 5.14k | size = 0; |
299 | 5.14k | last = -1; |
300 | 5.14k | entries = NULL; |
301 | 5.14k | lastStartxrefPos = 0; |
302 | 5.14k | xrefTablePos = NULL; |
303 | 5.14k | xrefTablePosLen = 0; |
304 | 5.14k | streamEnds = NULL; |
305 | 5.14k | streamEndsLen = 0; |
306 | 663k | for (i = 0; i < objStrCacheSize; ++i) { |
307 | 658k | objStrs[i] = NULL; |
308 | 658k | objStrLastUse[i] = 0; |
309 | 658k | } |
310 | 5.14k | objStrCacheLength = 0; |
311 | 5.14k | objStrTime = 0; |
312 | | |
313 | 5.14k | encrypted = gFalse; |
314 | 5.14k | permFlags = defPermFlags; |
315 | 5.14k | ownerPasswordOk = gFalse; |
316 | | |
317 | 87.4k | for (i = 0; i < xrefCacheSize; ++i) { |
318 | 82.3k | cache[i].num = -1; |
319 | 82.3k | } |
320 | | |
321 | 5.14k | #if MULTITHREADED |
322 | 5.14k | gInitMutex(&objStrsMutex); |
323 | 5.14k | gInitMutex(&cacheMutex); |
324 | 5.14k | #endif |
325 | | |
326 | 5.14k | str = strA; |
327 | 5.14k | start = str->getStart(); |
328 | | |
329 | | // if the 'repair' flag is set, try to reconstruct the xref table |
330 | 5.14k | if (repair) { |
331 | 2.55k | if (!(ok = constructXRef())) { |
332 | 1.09k | errCode = errDamaged; |
333 | 1.09k | return; |
334 | 1.09k | } |
335 | 1.46k | repaired = gTrue; |
336 | | |
337 | | // if the 'repair' flag is not set, read the xref table |
338 | 2.58k | } else { |
339 | | |
340 | | // read the trailer |
341 | 2.58k | pos = getStartXref(); |
342 | 2.58k | if (pos == 0) { |
343 | 2.46k | errCode = errDamaged; |
344 | 2.46k | ok = gFalse; |
345 | 2.46k | return; |
346 | 2.46k | } |
347 | | |
348 | | // read the xref table |
349 | 122 | posSet = new XRefPosSet(); |
350 | 174 | while (readXRef(&pos, posSet, gFalse)) ; |
351 | 122 | xrefTablePosLen = posSet->getLength(); |
352 | 122 | xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen, |
353 | 122 | sizeof(GFileOffset)); |
354 | 300 | for (i = 0; i < xrefTablePosLen; ++i) { |
355 | 178 | xrefTablePos[i] = posSet->get(i); |
356 | 178 | } |
357 | 122 | delete posSet; |
358 | 122 | if (!ok) { |
359 | 80 | errCode = errDamaged; |
360 | 80 | return; |
361 | 80 | } |
362 | 122 | } |
363 | | |
364 | | // get the root dictionary (catalog) object |
365 | 1.50k | trailerDict.dictLookupNF("Root", &obj); |
366 | 1.50k | if (obj.isRef()) { |
367 | 1.44k | rootNum = obj.getRefNum(); |
368 | 1.44k | rootGen = obj.getRefGen(); |
369 | 1.44k | obj.free(); |
370 | 1.44k | } else { |
371 | 58 | obj.free(); |
372 | 58 | if (!(ok = constructXRef())) { |
373 | 7 | errCode = errDamaged; |
374 | 7 | return; |
375 | 7 | } |
376 | 58 | } |
377 | | |
378 | | // now set the trailer dictionary's xref pointer so we can fetch |
379 | | // indirect objects from it |
380 | 1.49k | trailerDict.getDict()->setXRef(this); |
381 | 1.49k | } |
382 | | |
383 | 5.12k | XRef::~XRef() { |
384 | 5.12k | int i; |
385 | | |
386 | 87.1k | for (i = 0; i < xrefCacheSize; ++i) { |
387 | 82.0k | if (cache[i].num >= 0) { |
388 | 18.8k | cache[i].obj.free(); |
389 | 18.8k | } |
390 | 82.0k | } |
391 | 5.12k | gfree(entries); |
392 | 5.12k | trailerDict.free(); |
393 | 5.12k | if (xrefTablePos) { |
394 | 122 | gfree(xrefTablePos); |
395 | 122 | } |
396 | 5.12k | if (streamEnds) { |
397 | 1.44k | gfree(streamEnds); |
398 | 1.44k | } |
399 | 661k | for (i = 0; i < objStrCacheSize; ++i) { |
400 | 656k | if (objStrs[i]) { |
401 | 1.90k | delete objStrs[i]; |
402 | 1.90k | } |
403 | 656k | } |
404 | 5.12k | #if MULTITHREADED |
405 | 5.12k | gDestroyMutex(&objStrsMutex); |
406 | 5.12k | gDestroyMutex(&cacheMutex); |
407 | 5.12k | #endif |
408 | 5.12k | } |
409 | | |
410 | | // Read the 'startxref' position. |
411 | 2.58k | GFileOffset XRef::getStartXref() { |
412 | 2.58k | char buf[xrefSearchSize+1]; |
413 | 2.58k | char *p; |
414 | 2.58k | int n, i; |
415 | | |
416 | | // read last xrefSearchSize bytes |
417 | 2.58k | str->setPos(xrefSearchSize, -1); |
418 | 2.58k | n = str->getBlock(buf, xrefSearchSize); |
419 | 2.58k | buf[n] = '\0'; |
420 | | |
421 | | // find startxref |
422 | 2.50M | for (i = n - 9; i >= 0; --i) { |
423 | 2.50M | if (!strncmp(&buf[i], "startxref", 9)) { |
424 | 187 | break; |
425 | 187 | } |
426 | 2.50M | } |
427 | 2.58k | if (i < 0) { |
428 | 2.40k | return 0; |
429 | 2.40k | } |
430 | 423 | for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ; |
431 | 187 | lastXRefPos = strToFileOffset(p); |
432 | 187 | lastStartxrefPos = str->getPos() - n + i; |
433 | | |
434 | 187 | return lastXRefPos; |
435 | 2.58k | } |
436 | | |
437 | | // Read one xref table section. Also reads the associated trailer |
438 | | // dictionary, and returns the prev pointer (if any). The [hybrid] |
439 | | // flag is true when following the XRefStm link in a hybrid-reference |
440 | | // file. |
441 | 178 | GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) { |
442 | 178 | Parser *parser; |
443 | 178 | Object obj; |
444 | 178 | GBool more; |
445 | 178 | char buf[100]; |
446 | 178 | int n, i; |
447 | | |
448 | | // check for a loop in the xref tables |
449 | 178 | if (posSet->check(*pos)) { |
450 | 0 | error(errSyntaxWarning, -1, "Infinite loop in xref table"); |
451 | 0 | return gFalse; |
452 | 0 | } |
453 | 178 | posSet->add(*pos); |
454 | | |
455 | | // the xref data should either be "xref ..." (for an xref table) or |
456 | | // "nn gg obj << ... >> stream ..." (for an xref stream); possibly |
457 | | // preceded by whitespace |
458 | 178 | str->setPos(start + *pos); |
459 | 178 | n = str->getBlock(buf, 100); |
460 | 341 | for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ; |
461 | | |
462 | | // parse an old-style xref table |
463 | 178 | if (!hybrid && |
464 | 178 | i + 4 < n && |
465 | 178 | buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' && |
466 | 178 | Lexer::isSpace(buf[i+4])) { |
467 | 21 | more = readXRefTable(pos, i + 5, posSet); |
468 | | |
469 | | // parse an xref stream |
470 | 157 | } else { |
471 | 157 | obj.initNull(); |
472 | 157 | parser = new Parser(NULL, |
473 | 157 | new Lexer(NULL, |
474 | 157 | str->makeSubStream(start + *pos, gFalse, 0, &obj)), |
475 | 157 | gTrue); |
476 | 157 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
477 | 37 | goto err; |
478 | 37 | } |
479 | 120 | obj.free(); |
480 | 120 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
481 | 14 | goto err; |
482 | 14 | } |
483 | 106 | obj.free(); |
484 | 106 | if (!parser->getObj(&obj, gTrue)->isCmd("obj")) { |
485 | 8 | goto err; |
486 | 8 | } |
487 | 98 | obj.free(); |
488 | 98 | if (!parser->getObj(&obj)->isStream()) { |
489 | 4 | goto err; |
490 | 4 | } |
491 | 94 | more = readXRefStream(obj.getStream(), pos, hybrid); |
492 | 94 | obj.free(); |
493 | 94 | delete parser; |
494 | 94 | } |
495 | | |
496 | 115 | return more; |
497 | | |
498 | 63 | err: |
499 | 63 | obj.free(); |
500 | 63 | delete parser; |
501 | 63 | ok = gFalse; |
502 | 63 | return gFalse; |
503 | 178 | } |
504 | | |
505 | 21 | GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) { |
506 | 21 | XRefEntry entry; |
507 | 21 | Parser *parser; |
508 | 21 | Object obj, obj2; |
509 | 21 | char buf[6]; |
510 | 21 | GFileOffset off, pos2; |
511 | 21 | GBool more; |
512 | 21 | int first, n, digit, newSize, gen, i, c; |
513 | | |
514 | 21 | str->setPos(start + *pos + offset); |
515 | | |
516 | 46 | while (1) { |
517 | 94 | do { |
518 | 94 | c = str->getChar(); |
519 | 94 | } while (Lexer::isSpace(c)); |
520 | 46 | if (c == 't') { |
521 | 13 | if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) { |
522 | 0 | goto err1; |
523 | 0 | } |
524 | 13 | break; |
525 | 13 | } |
526 | 33 | if (c < '0' || c > '9') { |
527 | 0 | goto err1; |
528 | 0 | } |
529 | 33 | first = 0; |
530 | 78 | do { |
531 | 78 | digit = c - '0'; |
532 | 78 | if (first > (INT_MAX - digit) / 10) { |
533 | 0 | goto err1; |
534 | 0 | } |
535 | 78 | first = (first * 10) + digit; |
536 | 78 | c = str->getChar(); |
537 | 78 | } while (c >= '0' && c <= '9'); |
538 | 33 | if (!Lexer::isSpace(c)) { |
539 | 0 | goto err1; |
540 | 0 | } |
541 | 33 | do { |
542 | 33 | c = str->getChar(); |
543 | 33 | } while (Lexer::isSpace(c)); |
544 | 33 | n = 0; |
545 | 44 | do { |
546 | 44 | digit = c - '0'; |
547 | 44 | if (n > (INT_MAX - digit) / 10) { |
548 | 0 | goto err1; |
549 | 0 | } |
550 | 44 | n = (n * 10) + digit; |
551 | 44 | c = str->getChar(); |
552 | 44 | } while (c >= '0' && c <= '9'); |
553 | 33 | if (!Lexer::isSpace(c)) { |
554 | 0 | goto err1; |
555 | 0 | } |
556 | 33 | if (first > INT_MAX - n) { |
557 | 0 | goto err1; |
558 | 0 | } |
559 | 33 | if (first + n > size) { |
560 | 9 | for (newSize = size ? 2 * size : 1024; |
561 | 15 | first + n > newSize && newSize > 0; |
562 | 9 | newSize <<= 1) ; |
563 | 9 | if (newSize < 0) { |
564 | 0 | goto err1; |
565 | 0 | } |
566 | 9 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
567 | 16.3k | for (i = size; i < newSize; ++i) { |
568 | 16.3k | entries[i].offset = (GFileOffset)-1; |
569 | 16.3k | entries[i].type = xrefEntryFree; |
570 | 16.3k | } |
571 | 9 | size = newSize; |
572 | 9 | } |
573 | 378 | for (i = first; i < first + n; ++i) { |
574 | 703 | do { |
575 | 703 | c = str->getChar(); |
576 | 703 | } while (Lexer::isSpace(c)); |
577 | 353 | off = 0; |
578 | 3.49k | do { |
579 | 3.49k | off = (off * 10) + (c - '0'); |
580 | 3.49k | c = str->getChar(); |
581 | 3.49k | } while (c >= '0' && c <= '9'); |
582 | 353 | if (!Lexer::isSpace(c)) { |
583 | 4 | goto err1; |
584 | 4 | } |
585 | 349 | entry.offset = off; |
586 | 351 | do { |
587 | 351 | c = str->getChar(); |
588 | 351 | } while (Lexer::isSpace(c)); |
589 | 349 | gen = 0; |
590 | 1.74k | do { |
591 | 1.74k | gen = (gen * 10) + (c - '0'); |
592 | 1.74k | c = str->getChar(); |
593 | 1.74k | } while (c >= '0' && c <= '9'); |
594 | 349 | if (!Lexer::isSpace(c)) { |
595 | 2 | goto err1; |
596 | 2 | } |
597 | 347 | entry.gen = gen; |
598 | 348 | do { |
599 | 348 | c = str->getChar(); |
600 | 348 | } while (Lexer::isSpace(c)); |
601 | 347 | if (c == 'n') { |
602 | 337 | entry.type = xrefEntryUncompressed; |
603 | 337 | } else if (c == 'f') { |
604 | 8 | entry.type = xrefEntryFree; |
605 | 8 | } else { |
606 | 2 | goto err1; |
607 | 2 | } |
608 | 345 | c = str->getChar(); |
609 | 345 | if (!Lexer::isSpace(c)) { |
610 | 0 | goto err1; |
611 | 0 | } |
612 | 345 | if (entries[i].offset == (GFileOffset)-1) { |
613 | 338 | entries[i] = entry; |
614 | | // PDF files of patents from the IBM Intellectual Property |
615 | | // Network have a bug: the xref table claims to start at 1 |
616 | | // instead of 0. |
617 | 338 | if (i == 1 && first == 1 && |
618 | 338 | entries[1].offset == 0 && entries[1].gen == 65535 && |
619 | 338 | entries[1].type == xrefEntryFree) { |
620 | 0 | i = first = 0; |
621 | 0 | entries[0] = entries[1]; |
622 | 0 | entries[1].offset = (GFileOffset)-1; |
623 | 0 | } |
624 | 338 | if (i > last) { |
625 | 307 | last = i; |
626 | 307 | } |
627 | 338 | } |
628 | 345 | } |
629 | 33 | } |
630 | | |
631 | | // read the trailer dictionary |
632 | 13 | obj.initNull(); |
633 | 13 | parser = new Parser(NULL, |
634 | 13 | new Lexer(NULL, |
635 | 13 | str->makeSubStream(str->getPos(), gFalse, 0, &obj)), |
636 | 13 | gTrue); |
637 | 13 | parser->getObj(&obj); |
638 | 13 | delete parser; |
639 | 13 | if (!obj.isDict()) { |
640 | 0 | obj.free(); |
641 | 0 | goto err1; |
642 | 0 | } |
643 | | |
644 | | // get the 'Prev' pointer |
645 | | //~ this can be a 64-bit int (?) |
646 | 13 | obj.getDict()->lookupNF("Prev", &obj2); |
647 | 13 | if (obj2.isInt()) { |
648 | 11 | *pos = (GFileOffset)(Guint)obj2.getInt(); |
649 | 11 | more = gTrue; |
650 | 11 | } else if (obj2.isRef()) { |
651 | | // certain buggy PDF generators generate "/Prev NNN 0 R" instead |
652 | | // of "/Prev NNN" |
653 | 0 | *pos = (GFileOffset)(Guint)obj2.getRefNum(); |
654 | 0 | more = gTrue; |
655 | 2 | } else { |
656 | 2 | more = gFalse; |
657 | 2 | } |
658 | 13 | obj2.free(); |
659 | | |
660 | | // save the first trailer dictionary |
661 | 13 | if (trailerDict.isNone()) { |
662 | 5 | obj.copy(&trailerDict); |
663 | 5 | } |
664 | | |
665 | | // check for an 'XRefStm' key |
666 | | //~ this can be a 64-bit int (?) |
667 | 13 | if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) { |
668 | 4 | pos2 = (GFileOffset)(Guint)obj2.getInt(); |
669 | 4 | readXRef(&pos2, posSet, gTrue); |
670 | 4 | if (!ok) { |
671 | 1 | obj2.free(); |
672 | 1 | obj.free(); |
673 | 1 | goto err1; |
674 | 1 | } |
675 | 4 | } |
676 | 12 | obj2.free(); |
677 | | |
678 | 12 | obj.free(); |
679 | 12 | return more; |
680 | | |
681 | 9 | err1: |
682 | 9 | ok = gFalse; |
683 | 9 | return gFalse; |
684 | 13 | } |
685 | | |
686 | 94 | GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos, GBool hybrid) { |
687 | 94 | Dict *dict; |
688 | 94 | int w[3]; |
689 | 94 | GBool more; |
690 | 94 | Object obj, obj2, idx; |
691 | 94 | int newSize, first, n, i; |
692 | | |
693 | 94 | dict = xrefStr->getDict(); |
694 | | |
695 | 94 | if (!dict->lookupNF("Size", &obj)->isInt()) { |
696 | 3 | goto err1; |
697 | 3 | } |
698 | 91 | newSize = obj.getInt(); |
699 | 91 | obj.free(); |
700 | 91 | if (newSize < 0) { |
701 | 0 | goto err1; |
702 | 0 | } |
703 | 91 | if (newSize > size) { |
704 | 57 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
705 | 8.83k | for (i = size; i < newSize; ++i) { |
706 | 8.78k | entries[i].offset = (GFileOffset)-1; |
707 | 8.78k | entries[i].type = xrefEntryFree; |
708 | 8.78k | } |
709 | 57 | size = newSize; |
710 | 57 | } |
711 | | |
712 | 91 | if (!dict->lookupNF("W", &obj)->isArray() || |
713 | 91 | obj.arrayGetLength() < 3) { |
714 | 0 | goto err1; |
715 | 0 | } |
716 | 364 | for (i = 0; i < 3; ++i) { |
717 | 273 | if (!obj.arrayGet(i, &obj2)->isInt()) { |
718 | 0 | obj2.free(); |
719 | 0 | goto err1; |
720 | 0 | } |
721 | 273 | w[i] = obj2.getInt(); |
722 | 273 | obj2.free(); |
723 | 273 | } |
724 | 91 | obj.free(); |
725 | 91 | if (w[0] < 0 || w[0] > 8 || |
726 | 91 | w[1] < 0 || w[1] > 8 || |
727 | 91 | w[2] < 0 || w[2] > 8) { |
728 | 0 | goto err0; |
729 | 0 | } |
730 | | |
731 | 91 | xrefStr->reset(); |
732 | 91 | dict->lookupNF("Index", &idx); |
733 | 91 | if (idx.isArray()) { |
734 | 195 | for (i = 0; i+1 < idx.arrayGetLength(); i += 2) { |
735 | 137 | if (!idx.arrayGet(i, &obj)->isInt()) { |
736 | 0 | idx.free(); |
737 | 0 | goto err1; |
738 | 0 | } |
739 | 137 | first = obj.getInt(); |
740 | 137 | obj.free(); |
741 | 137 | if (!idx.arrayGet(i+1, &obj)->isInt()) { |
742 | 1 | idx.free(); |
743 | 1 | goto err1; |
744 | 1 | } |
745 | 136 | n = obj.getInt(); |
746 | 136 | obj.free(); |
747 | 136 | if (first < 0 || n < 0 || |
748 | 136 | !readXRefStreamSection(xrefStr, w, first, n)) { |
749 | 1 | idx.free(); |
750 | 1 | goto err0; |
751 | 1 | } |
752 | 136 | } |
753 | 60 | } else { |
754 | 31 | if (!readXRefStreamSection(xrefStr, w, 0, newSize)) { |
755 | 4 | idx.free(); |
756 | 4 | goto err0; |
757 | 4 | } |
758 | 31 | } |
759 | 85 | idx.free(); |
760 | | |
761 | | //~ this can be a 64-bit int (?) |
762 | 85 | dict->lookupNF("Prev", &obj); |
763 | 85 | if (obj.isInt()) { |
764 | 45 | *pos = (GFileOffset)(Guint)obj.getInt(); |
765 | 45 | more = gTrue; |
766 | 45 | } else { |
767 | 40 | more = gFalse; |
768 | 40 | } |
769 | 85 | obj.free(); |
770 | 85 | if (trailerDict.isNone()) { |
771 | 54 | trailerDict.initDict(dict); |
772 | 54 | } |
773 | | |
774 | 85 | return more; |
775 | | |
776 | 4 | err1: |
777 | 4 | obj.free(); |
778 | 9 | err0: |
779 | 9 | ok = gFalse; |
780 | 9 | return gFalse; |
781 | 4 | } |
782 | | |
783 | 167 | GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { |
784 | 167 | long long type, gen, offset; |
785 | 167 | int c, newSize, i, j; |
786 | | |
787 | 167 | if (first + n < 0) { |
788 | 0 | return gFalse; |
789 | 0 | } |
790 | 167 | if (first + n > size) { |
791 | 1 | for (newSize = size ? 2 * size : 1024; |
792 | 4 | first + n > newSize && newSize > 0; |
793 | 3 | newSize <<= 1) ; |
794 | 1 | if (newSize < 0) { |
795 | 0 | return gFalse; |
796 | 0 | } |
797 | 1 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
798 | 76 | for (i = size; i < newSize; ++i) { |
799 | 75 | entries[i].offset = (GFileOffset)-1; |
800 | 75 | entries[i].type = xrefEntryFree; |
801 | 75 | } |
802 | 1 | size = newSize; |
803 | 1 | } |
804 | 2.21k | for (i = first; i < first + n; ++i) { |
805 | 2.05k | if (w[0] == 0) { |
806 | 328 | type = 1; |
807 | 1.72k | } else { |
808 | 3.45k | for (type = 0, j = 0; j < w[0]; ++j) { |
809 | 1.73k | if ((c = xrefStr->getChar()) == EOF) { |
810 | 0 | return gFalse; |
811 | 0 | } |
812 | 1.73k | type = (type << 8) + c; |
813 | 1.73k | } |
814 | 1.72k | } |
815 | 5.96k | for (offset = 0, j = 0; j < w[1]; ++j) { |
816 | 3.91k | if ((c = xrefStr->getChar()) == EOF) { |
817 | 0 | return gFalse; |
818 | 0 | } |
819 | 3.91k | offset = (offset << 8) + c; |
820 | 3.91k | } |
821 | 2.05k | if (offset < 0 || offset > GFILEOFFSET_MAX) { |
822 | 0 | return gFalse; |
823 | 0 | } |
824 | 4.20k | for (gen = 0, j = 0; j < w[2]; ++j) { |
825 | 2.14k | if ((c = xrefStr->getChar()) == EOF) { |
826 | 0 | return gFalse; |
827 | 0 | } |
828 | 2.14k | gen = (gen << 8) + c; |
829 | 2.14k | } |
830 | | // some PDF generators include a free entry with gen=0xffffffff |
831 | 2.05k | if ((gen < 0 || gen > INT_MAX) && type != 0) { |
832 | 0 | return gFalse; |
833 | 0 | } |
834 | 2.05k | if (entries[i].offset == (GFileOffset)-1) { |
835 | 2.01k | switch (type) { |
836 | 115 | case 0: |
837 | 115 | entries[i].offset = (GFileOffset)offset; |
838 | 115 | entries[i].gen = (int)gen; |
839 | 115 | entries[i].type = xrefEntryFree; |
840 | 115 | break; |
841 | 957 | case 1: |
842 | 957 | entries[i].offset = (GFileOffset)offset; |
843 | 957 | entries[i].gen = (int)gen; |
844 | 957 | entries[i].type = xrefEntryUncompressed; |
845 | 957 | break; |
846 | 940 | case 2: |
847 | 940 | entries[i].offset = (GFileOffset)offset; |
848 | 940 | entries[i].gen = (int)gen; |
849 | 940 | entries[i].type = xrefEntryCompressed; |
850 | 940 | break; |
851 | 5 | default: |
852 | 5 | return gFalse; |
853 | 2.01k | } |
854 | 2.01k | if (i > last) { |
855 | 1.16k | last = i; |
856 | 1.16k | } |
857 | 2.01k | } |
858 | 2.05k | } |
859 | | |
860 | 162 | return gTrue; |
861 | 167 | } |
862 | | |
863 | | // Attempt to construct an xref table for a damaged file. |
864 | 2.59k | GBool XRef::constructXRef() { |
865 | 2.59k | int *streamObjNums = NULL; |
866 | 2.59k | int streamObjNumsLen = 0; |
867 | 2.59k | int streamObjNumsSize = 0; |
868 | 2.59k | int lastObjNum = -1; |
869 | 2.59k | rootNum = -1; |
870 | 2.59k | int streamEndsSize = 0; |
871 | 2.59k | streamEndsLen = 0; |
872 | 2.59k | char buf[4096 + 1]; |
873 | 2.59k | str->reset(); |
874 | 2.59k | GFileOffset bufPos = start; |
875 | 2.59k | char *p = buf; |
876 | 2.59k | char *end = buf; |
877 | 2.59k | GBool startOfLine = gTrue; |
878 | 2.59k | GBool space = gTrue; |
879 | 2.59k | GBool eof = gFalse; |
880 | 39.7M | while (1) { |
881 | 39.7M | if (end - p < 256 && !eof) { |
882 | 12.7k | memcpy(buf, p, end - p); |
883 | 12.7k | bufPos += p - buf; |
884 | 12.7k | p = buf + (end - p); |
885 | 12.7k | int n = (int)(buf + 4096 - p); |
886 | 12.7k | int m = str->getBlock(p, n); |
887 | 12.7k | end = p + m; |
888 | 12.7k | *end = '\0'; |
889 | 12.7k | p = buf; |
890 | 12.7k | eof = m < n; |
891 | 12.7k | } |
892 | 39.7M | if (p == end && eof) { |
893 | 2.59k | break; |
894 | 2.59k | } |
895 | 39.7M | if (startOfLine && !strncmp(p, "trailer", 7)) { |
896 | 22.0k | constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf))); |
897 | 22.0k | p += 7; |
898 | 22.0k | startOfLine = gFalse; |
899 | 22.0k | space = gFalse; |
900 | 39.7M | } else if (startOfLine && !strncmp(p, "endstream", 9)) { |
901 | 20.9k | if (streamEndsLen == streamEndsSize) { |
902 | 1.54k | streamEndsSize += 64; |
903 | 1.54k | streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize, |
904 | 1.54k | sizeof(GFileOffset)); |
905 | 1.54k | } |
906 | 20.9k | streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf)); |
907 | 20.9k | p += 9; |
908 | 20.9k | startOfLine = gFalse; |
909 | 20.9k | space = gFalse; |
910 | 39.7M | } else if (space && *p >= '0' && *p <= '9') { |
911 | 830k | p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)), |
912 | 830k | &lastObjNum); |
913 | 830k | startOfLine = gFalse; |
914 | 830k | space = gFalse; |
915 | 38.8M | } else if (p[0] == '>' && p[1] == '>') { |
916 | 392k | p += 2; |
917 | 392k | startOfLine = gFalse; |
918 | 392k | space = gFalse; |
919 | | // skip any PDF whitespace except for '\0' |
920 | 600k | while (*p == '\t' || *p == '\n' || *p == '\x0c' || |
921 | 600k | *p == '\r' || *p == ' ') { |
922 | 208k | if (*p == '\n' || *p == '\r') { |
923 | 53.0k | startOfLine = gTrue; |
924 | 53.0k | } |
925 | 208k | space = gTrue; |
926 | 208k | ++p; |
927 | 208k | } |
928 | 392k | if (!strncmp(p, "stream", 6)) { |
929 | 207k | if (lastObjNum >= 0) { |
930 | 206k | if (streamObjNumsLen == streamObjNumsSize) { |
931 | 4.84k | streamObjNumsSize += 64; |
932 | 4.84k | streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize, |
933 | 4.84k | sizeof(int)); |
934 | 4.84k | } |
935 | 206k | streamObjNums[streamObjNumsLen++] = lastObjNum; |
936 | 206k | } |
937 | 207k | p += 6; |
938 | 207k | startOfLine = gFalse; |
939 | 207k | space = gFalse; |
940 | 207k | } |
941 | 38.4M | } else { |
942 | 38.4M | if (*p == '\n' || *p == '\r') { |
943 | 997k | startOfLine = gTrue; |
944 | 997k | space = gTrue; |
945 | 37.4M | } else if (Lexer::isSpace(*p & 0xff)) { |
946 | 11.0M | space = gTrue; |
947 | 26.3M | } else { |
948 | 26.3M | startOfLine = gFalse; |
949 | 26.3M | space = gFalse; |
950 | 26.3M | } |
951 | 38.4M | ++p; |
952 | 38.4M | } |
953 | 39.7M | } |
954 | | |
955 | | // read each stream object, check for xref or object stream |
956 | 208k | for (int i = 0; i < streamObjNumsLen; ++i) { |
957 | 206k | Object obj; |
958 | 206k | fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj); |
959 | 206k | if (obj.isStream()) { |
960 | 164k | Dict *dict = obj.streamGetDict(); |
961 | 164k | Object type; |
962 | 164k | dict->lookup("Type", &type); |
963 | 164k | if (type.isName("XRef")) { |
964 | 2.13k | saveTrailerDict(dict, gTrue); |
965 | 162k | } else if (type.isName("ObjStm")) { |
966 | 141k | constructObjectStreamEntries(&obj, streamObjNums[i]); |
967 | 141k | } |
968 | 164k | type.free(); |
969 | 164k | } |
970 | 206k | obj.free(); |
971 | 206k | } |
972 | | |
973 | 2.59k | gfree(streamObjNums); |
974 | | |
975 | | // if the file is encrypted, then any objects fetched here will be |
976 | | // incorrect (because decryption is not yet enabled), so clear the |
977 | | // cache to avoid that problem |
978 | 43.9k | for (int i = 0; i < xrefCacheSize; ++i) { |
979 | 41.3k | if (cache[i].num >= 0) { |
980 | 16.3k | cache[i].obj.free(); |
981 | 16.3k | cache[i].num = -1; |
982 | 16.3k | } |
983 | 41.3k | } |
984 | | |
985 | 2.59k | if (rootNum < 0) { |
986 | 1.10k | error(errSyntaxError, -1, "Couldn't find trailer dictionary"); |
987 | 1.10k | return gFalse; |
988 | 1.10k | } |
989 | 1.49k | return gTrue; |
990 | 2.59k | } |
991 | | |
992 | | // Attempt to construct a trailer dict at [pos] in the stream. |
993 | 22.0k | void XRef::constructTrailerDict(GFileOffset pos) { |
994 | 22.0k | Object newTrailerDict, obj; |
995 | 22.0k | obj.initNull(); |
996 | 22.0k | Parser *parser = |
997 | 22.0k | new Parser(NULL, |
998 | 22.0k | new Lexer(NULL, |
999 | 22.0k | str->makeSubStream(pos, gFalse, 0, &obj)), |
1000 | 22.0k | gFalse); |
1001 | 22.0k | parser->getObj(&newTrailerDict); |
1002 | 22.0k | if (newTrailerDict.isDict()) { |
1003 | 15.6k | saveTrailerDict(newTrailerDict.getDict(), gFalse); |
1004 | 15.6k | } |
1005 | 22.0k | newTrailerDict.free(); |
1006 | 22.0k | delete parser; |
1007 | 22.0k | } |
1008 | | |
1009 | | // If [dict] "looks like" a trailer dict (i.e., has a Root entry), |
1010 | | // save it as the trailer dict. |
1011 | 17.8k | void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) { |
1012 | 17.8k | Object obj; |
1013 | 17.8k | dict->lookupNF("Root", &obj); |
1014 | 17.8k | if (obj.isRef()) { |
1015 | 3.53k | int newRootNum = obj.getRefNum(); |
1016 | | // the xref stream scanning code runs after all objects are found, |
1017 | | // so we can check for a valid root object number at that point |
1018 | 3.53k | if (!isXRefStream || newRootNum <= last) { |
1019 | 3.53k | rootNum = newRootNum; |
1020 | 3.53k | rootGen = obj.getRefGen(); |
1021 | 3.53k | if (!trailerDict.isNone()) { |
1022 | 2.08k | trailerDict.free(); |
1023 | 2.08k | } |
1024 | 3.53k | trailerDict.initDict(dict); |
1025 | 3.53k | } |
1026 | 3.53k | } |
1027 | 17.8k | obj.free(); |
1028 | 17.8k | } |
1029 | | |
1030 | | // Look for an object header ("nnn ggg obj") at [p]. The first |
1031 | | // character at *[p] is a digit. [pos] is the position of *[p]. |
1032 | 830k | char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) { |
1033 | | // we look for non-end-of-line space characters here, to deal with |
1034 | | // situations like: |
1035 | | // nnn <-- garbage digits on a line |
1036 | | // nnn nnn obj <-- actual object |
1037 | | // and we also ignore '\0' (because it's used to terminate the |
1038 | | // buffer in this damage-scanning code) |
1039 | 830k | int num = 0; |
1040 | 2.15M | do { |
1041 | 2.15M | num = (num * 10) + (*p - '0'); |
1042 | 2.15M | ++p; |
1043 | 2.15M | } while (*p >= '0' && *p <= '9' && num < 100000000); |
1044 | 830k | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1045 | 415k | return p; |
1046 | 415k | } |
1047 | 441k | do { |
1048 | 441k | ++p; |
1049 | 441k | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1050 | 415k | if (!(*p >= '0' && *p <= '9')) { |
1051 | 113k | return p; |
1052 | 113k | } |
1053 | 301k | int gen = 0; |
1054 | 564k | do { |
1055 | 564k | gen = (gen * 10) + (*p - '0'); |
1056 | 564k | ++p; |
1057 | 564k | } while (*p >= '0' && *p <= '9' && gen < 100000000); |
1058 | 301k | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1059 | 17.1k | return p; |
1060 | 17.1k | } |
1061 | 292k | do { |
1062 | 292k | ++p; |
1063 | 292k | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1064 | 284k | if (strncmp(p, "obj", 3)) { |
1065 | 217k | return p; |
1066 | 217k | } |
1067 | | |
1068 | 67.2k | if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) { |
1069 | 67.2k | *objNum = num; |
1070 | 67.2k | } |
1071 | | |
1072 | 67.2k | return p; |
1073 | 284k | } |
1074 | | |
1075 | | // Read the header from an object stream, and add xref entries for all |
1076 | | // of its objects. |
1077 | 141k | void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) { |
1078 | 141k | Object obj1, obj2; |
1079 | | |
1080 | | // get the object count |
1081 | 141k | if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) { |
1082 | 632 | obj1.free(); |
1083 | 632 | return; |
1084 | 632 | } |
1085 | 140k | int nObjects = obj1.getInt(); |
1086 | 140k | obj1.free(); |
1087 | 140k | if (nObjects <= 0 || nObjects > 1000000) { |
1088 | 234 | return; |
1089 | 234 | } |
1090 | | |
1091 | | // parse the header: object numbers and offsets |
1092 | 140k | Parser *parser = new Parser(NULL, |
1093 | 140k | new Lexer(NULL, objStr->getStream()->copy()), |
1094 | 140k | gFalse); |
1095 | 65.1M | for (int i = 0; i < nObjects; ++i) { |
1096 | 65.0M | parser->getObj(&obj1, gTrue); |
1097 | 65.0M | parser->getObj(&obj2, gTrue); |
1098 | 65.0M | if (obj1.isInt() && obj2.isInt()) { |
1099 | 55.3k | int num = obj1.getInt(); |
1100 | 55.3k | if (num >= 0 && num < 1000000) { |
1101 | 54.5k | constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed); |
1102 | 54.5k | } |
1103 | 55.3k | } |
1104 | 65.0M | obj2.free(); |
1105 | 65.0M | obj1.free(); |
1106 | 65.0M | } |
1107 | 140k | delete parser; |
1108 | 140k | } |
1109 | | |
1110 | | GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos, |
1111 | 121k | XRefEntryType type) { |
1112 | 121k | if (num >= size) { |
1113 | 3.60k | int newSize = (num + 1 + 255) & ~255; |
1114 | 3.60k | if (newSize < 0) { |
1115 | 0 | return gFalse; |
1116 | 0 | } |
1117 | 3.60k | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
1118 | 136M | for (int i = size; i < newSize; ++i) { |
1119 | 136M | entries[i].offset = (GFileOffset)-1; |
1120 | 136M | entries[i].type = xrefEntryFree; |
1121 | 136M | } |
1122 | 3.60k | size = newSize; |
1123 | 3.60k | } |
1124 | | |
1125 | 121k | if (entries[num].type == xrefEntryFree || |
1126 | 121k | gen >= entries[num].gen) { |
1127 | 118k | entries[num].offset = pos; |
1128 | 118k | entries[num].gen = gen; |
1129 | 118k | entries[num].type = type; |
1130 | 118k | if (num > last) { |
1131 | 17.8k | last = num; |
1132 | 17.8k | } |
1133 | 118k | } |
1134 | | |
1135 | 121k | return gTrue; |
1136 | 121k | } |
1137 | | |
1138 | | void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA, |
1139 | | Guchar *fileKeyA, int keyLengthA, int encVersionA, |
1140 | 131 | CryptAlgorithm encAlgorithmA) { |
1141 | 131 | int i; |
1142 | | |
1143 | 131 | encrypted = gTrue; |
1144 | 131 | permFlags = permFlagsA; |
1145 | 131 | ownerPasswordOk = ownerPasswordOkA; |
1146 | 131 | if (keyLengthA <= 32) { |
1147 | 131 | keyLength = keyLengthA; |
1148 | 131 | } else { |
1149 | 0 | keyLength = 32; |
1150 | 0 | } |
1151 | 3.51k | for (i = 0; i < keyLength; ++i) { |
1152 | 3.38k | fileKey[i] = fileKeyA[i]; |
1153 | 3.38k | } |
1154 | 131 | encVersion = encVersionA; |
1155 | 131 | encAlgorithm = encAlgorithmA; |
1156 | 131 | } |
1157 | | |
1158 | | GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA, |
1159 | | int *keyLengthA, int *encVersionA, |
1160 | 0 | CryptAlgorithm *encAlgorithmA) { |
1161 | 0 | if (!encrypted) { |
1162 | 0 | return gFalse; |
1163 | 0 | } |
1164 | 0 | *permFlagsA = permFlags; |
1165 | 0 | *ownerPasswordOkA = ownerPasswordOk; |
1166 | 0 | *keyLengthA = keyLength; |
1167 | 0 | *encVersionA = encVersion; |
1168 | 0 | *encAlgorithmA = encAlgorithm; |
1169 | 0 | return gTrue; |
1170 | 0 | } |
1171 | | |
1172 | 0 | GBool XRef::okToPrint(GBool ignoreOwnerPW) { |
1173 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint); |
1174 | 0 | } |
1175 | | |
1176 | 0 | GBool XRef::okToChange(GBool ignoreOwnerPW) { |
1177 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange); |
1178 | 0 | } |
1179 | | |
1180 | 0 | GBool XRef::okToCopy(GBool ignoreOwnerPW) { |
1181 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy); |
1182 | 0 | } |
1183 | | |
1184 | 0 | GBool XRef::okToAddNotes(GBool ignoreOwnerPW) { |
1185 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes); |
1186 | 0 | } |
1187 | | |
1188 | 60.2M | Object *XRef::fetch(int num, int gen, Object *obj, int recursion) { |
1189 | 60.2M | XRefEntry *e; |
1190 | 60.2M | Parser *parser; |
1191 | 60.2M | Object obj1, obj2, obj3; |
1192 | 60.2M | XRefCacheEntry tmp; |
1193 | 60.2M | int i, j; |
1194 | | |
1195 | | // check for bogus ref - this can happen in corrupted PDF files |
1196 | 60.2M | if (num < 0 || num >= size) { |
1197 | 289k | goto err; |
1198 | 289k | } |
1199 | | |
1200 | | // check the cache |
1201 | 59.9M | #if MULTITHREADED |
1202 | 59.9M | gLockMutex(&cacheMutex); |
1203 | 59.9M | #endif |
1204 | 59.9M | if (cache[0].num == num && cache[0].gen == gen) { |
1205 | 316k | cache[0].obj.copy(obj); |
1206 | 316k | #if MULTITHREADED |
1207 | 316k | gUnlockMutex(&cacheMutex); |
1208 | 316k | #endif |
1209 | 316k | return obj; |
1210 | 316k | } |
1211 | 952M | for (i = 1; i < xrefCacheSize; ++i) { |
1212 | 892M | if (cache[i].num == num && cache[i].gen == gen) { |
1213 | 177k | tmp = cache[i]; |
1214 | 1.13M | for (j = i; j > 0; --j) { |
1215 | 962k | cache[j] = cache[j - 1]; |
1216 | 962k | } |
1217 | 177k | cache[0] = tmp; |
1218 | 177k | cache[0].obj.copy(obj); |
1219 | 177k | #if MULTITHREADED |
1220 | 177k | gUnlockMutex(&cacheMutex); |
1221 | 177k | #endif |
1222 | 177k | return obj; |
1223 | 177k | } |
1224 | 892M | } |
1225 | 59.4M | #if MULTITHREADED |
1226 | 59.4M | gUnlockMutex(&cacheMutex); |
1227 | 59.4M | #endif |
1228 | | |
1229 | 59.4M | e = &entries[num]; |
1230 | 59.4M | switch (e->type) { |
1231 | | |
1232 | 254k | case xrefEntryUncompressed: |
1233 | 254k | if (e->gen != gen) { |
1234 | 151k | goto err; |
1235 | 151k | } |
1236 | 102k | obj1.initNull(); |
1237 | 102k | parser = new Parser(this, |
1238 | 102k | new Lexer(this, |
1239 | 102k | str->makeSubStream(start + e->offset, gFalse, 0, &obj1)), |
1240 | 102k | gTrue); |
1241 | 102k | parser->getObj(&obj1, gTrue); |
1242 | 102k | parser->getObj(&obj2, gTrue); |
1243 | 102k | parser->getObj(&obj3, gTrue); |
1244 | 102k | if (!obj1.isInt() || obj1.getInt() != num || |
1245 | 102k | !obj2.isInt() || obj2.getInt() != gen || |
1246 | 102k | !obj3.isCmd("obj")) { |
1247 | 4.33k | obj1.free(); |
1248 | 4.33k | obj2.free(); |
1249 | 4.33k | obj3.free(); |
1250 | 4.33k | delete parser; |
1251 | 4.33k | goto err; |
1252 | 4.33k | } |
1253 | 98.3k | parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL, |
1254 | 98.3k | encAlgorithm, keyLength, num, gen, recursion); |
1255 | 98.3k | obj1.free(); |
1256 | 98.3k | obj2.free(); |
1257 | 98.3k | obj3.free(); |
1258 | 98.3k | delete parser; |
1259 | 98.3k | break; |
1260 | | |
1261 | 44.3k | case xrefEntryCompressed: |
1262 | | #if 0 // Adobe apparently ignores the generation number on compressed objects |
1263 | | if (gen != 0) { |
1264 | | goto err; |
1265 | | } |
1266 | | #endif |
1267 | 44.3k | if (e->offset >= (GFileOffset)size || |
1268 | 44.3k | entries[e->offset].type != xrefEntryUncompressed) { |
1269 | 4.20k | error(errSyntaxError, -1, "Invalid object stream"); |
1270 | 4.20k | goto err; |
1271 | 4.20k | } |
1272 | 40.1k | if (!getObjectStreamObject((int)e->offset, e->gen, num, obj, recursion)) { |
1273 | 11.7k | goto err; |
1274 | 11.7k | } |
1275 | 28.3k | break; |
1276 | | |
1277 | 59.1M | default: |
1278 | 59.1M | goto err; |
1279 | 59.4M | } |
1280 | | |
1281 | | // put the new object in the cache, throwing away the oldest object |
1282 | | // currently in the cache |
1283 | 126k | #if MULTITHREADED |
1284 | 126k | gLockMutex(&cacheMutex); |
1285 | 126k | #endif |
1286 | 126k | if (cache[xrefCacheSize - 1].num >= 0) { |
1287 | 91.5k | cache[xrefCacheSize - 1].obj.free(); |
1288 | 91.5k | } |
1289 | 2.02M | for (i = xrefCacheSize - 1; i > 0; --i) { |
1290 | 1.90M | cache[i] = cache[i - 1]; |
1291 | 1.90M | } |
1292 | 126k | cache[0].num = num; |
1293 | 126k | cache[0].gen = gen; |
1294 | 126k | obj->copy(&cache[0].obj); |
1295 | 126k | #if MULTITHREADED |
1296 | 126k | gUnlockMutex(&cacheMutex); |
1297 | 126k | #endif |
1298 | | |
1299 | 126k | return obj; |
1300 | | |
1301 | 59.6M | err: |
1302 | 59.6M | return obj->initNull(); |
1303 | 59.4M | } |
1304 | | |
1305 | | GBool XRef::getObjectStreamObject(int objStrNum, int objIdx, |
1306 | 40.1k | int objNum, Object *obj, int recursion) { |
1307 | | // check for a cached ObjectStream |
1308 | 40.1k | #if MULTITHREADED |
1309 | 40.1k | gLockMutex(&objStrsMutex); |
1310 | 40.1k | #endif |
1311 | 40.1k | ObjectStream *objStr = getObjectStreamFromCache(objStrNum); |
1312 | 40.1k | GBool found = gFalse; |
1313 | 40.1k | if (objStr) { |
1314 | 26.4k | objStr->getObject(objIdx, objNum, obj); |
1315 | 26.4k | cleanObjectStreamCache(); |
1316 | 26.4k | found = gTrue; |
1317 | 26.4k | } |
1318 | 40.1k | #if MULTITHREADED |
1319 | 40.1k | gUnlockMutex(&objStrsMutex); |
1320 | 40.1k | #endif |
1321 | 40.1k | if (found) { |
1322 | 26.4k | return gTrue; |
1323 | 26.4k | } |
1324 | | |
1325 | | // load a new ObjectStream |
1326 | 13.6k | objStr = new ObjectStream(this, objStrNum, recursion); |
1327 | 13.6k | if (!objStr->isOk()) { |
1328 | 11.7k | delete objStr; |
1329 | 11.7k | return gFalse; |
1330 | 11.7k | } |
1331 | 1.90k | objStr->getObject(objIdx, objNum, obj); |
1332 | 1.90k | #if MULTITHREADED |
1333 | 1.90k | gLockMutex(&objStrsMutex); |
1334 | 1.90k | #endif |
1335 | 1.90k | addObjectStreamToCache(objStr); |
1336 | 1.90k | cleanObjectStreamCache(); |
1337 | 1.90k | #if MULTITHREADED |
1338 | 1.90k | gUnlockMutex(&objStrsMutex); |
1339 | 1.90k | #endif |
1340 | 1.90k | return gTrue; |
1341 | 13.6k | } |
1342 | | |
1343 | | // NB: objStrsMutex must be locked when calling this function. |
1344 | 40.1k | ObjectStream *XRef::getObjectStreamFromCache(int objStrNum) { |
1345 | | // check the MRU entry in the cache |
1346 | 40.1k | if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) { |
1347 | 25.3k | ObjectStream *objStr = objStrs[0]; |
1348 | 25.3k | objStrLastUse[0] = objStrTime++; |
1349 | 25.3k | return objStr; |
1350 | 25.3k | } |
1351 | | |
1352 | | // check the rest of the cache |
1353 | 29.3k | for (int i = 1; i < objStrCacheLength; ++i) { |
1354 | 15.7k | if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) { |
1355 | 1.11k | ObjectStream *objStr = objStrs[i]; |
1356 | 2.98k | for (int j = i; j > 0; --j) { |
1357 | 1.87k | objStrs[j] = objStrs[j - 1]; |
1358 | 1.87k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1359 | 1.87k | } |
1360 | 1.11k | objStrs[0] = objStr; |
1361 | 1.11k | objStrLastUse[0] = objStrTime++; |
1362 | 1.11k | return objStr; |
1363 | 1.11k | } |
1364 | 15.7k | } |
1365 | | |
1366 | 13.6k | return NULL; |
1367 | 14.7k | } |
1368 | | |
1369 | | // NB: objStrsMutex must be locked when calling this function. |
1370 | 1.90k | void XRef::addObjectStreamToCache(ObjectStream *objStr) { |
1371 | | // add to the cache |
1372 | 1.90k | if (objStrCacheLength == objStrCacheSize) { |
1373 | 0 | delete objStrs[objStrCacheSize - 1]; |
1374 | 0 | --objStrCacheLength; |
1375 | 0 | } |
1376 | 3.90k | for (int j = objStrCacheLength; j > 0; --j) { |
1377 | 1.99k | objStrs[j] = objStrs[j - 1]; |
1378 | 1.99k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1379 | 1.99k | } |
1380 | 1.90k | ++objStrCacheLength; |
1381 | 1.90k | objStrs[0] = objStr; |
1382 | 1.90k | objStrLastUse[0] = objStrTime++; |
1383 | 1.90k | } |
1384 | | |
1385 | | // If the oldest (least recently used) entry in the object stream |
1386 | | // cache is more than objStrCacheTimeout accesses old (hasn't been |
1387 | | // used in the last objStrCacheTimeout accesses), eject it from the |
1388 | | // cache. |
1389 | | // NB: objStrsMutex must be locked when calling this function. |
1390 | 28.3k | void XRef::cleanObjectStreamCache() { |
1391 | | // NB: objStrTime and objStrLastUse[] are unsigned ints, so the |
1392 | | // mod-2^32 arithmetic makes the subtraction work out, even if the |
1393 | | // time wraps around. |
1394 | 28.3k | if (objStrCacheLength > 1 && |
1395 | 28.3k | objStrTime - objStrLastUse[objStrCacheLength - 1] |
1396 | 25.0k | > objStrCacheTimeout) { |
1397 | 5 | delete objStrs[objStrCacheLength - 1]; |
1398 | 5 | objStrs[objStrCacheLength - 1] = NULL; |
1399 | 5 | --objStrCacheLength; |
1400 | 5 | } |
1401 | 28.3k | } |
1402 | | |
1403 | 0 | Object *XRef::getDocInfo(Object *obj) { |
1404 | 0 | return trailerDict.dictLookup("Info", obj); |
1405 | 0 | } |
1406 | | |
1407 | | // Added for the pdftex project. |
1408 | 0 | Object *XRef::getDocInfoNF(Object *obj) { |
1409 | 0 | return trailerDict.dictLookupNF("Info", obj); |
1410 | 0 | } |
1411 | | |
1412 | 129k | GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) { |
1413 | 129k | int a, b, m; |
1414 | | |
1415 | 129k | if (streamEndsLen == 0 || |
1416 | 129k | streamStart > streamEnds[streamEndsLen - 1]) { |
1417 | 24.4k | return gFalse; |
1418 | 24.4k | } |
1419 | | |
1420 | 104k | a = -1; |
1421 | 104k | b = streamEndsLen - 1; |
1422 | | // invariant: streamEnds[a] < streamStart <= streamEnds[b] |
1423 | 571k | while (b - a > 1) { |
1424 | 467k | m = (a + b) / 2; |
1425 | 467k | if (streamStart <= streamEnds[m]) { |
1426 | 187k | b = m; |
1427 | 279k | } else { |
1428 | 279k | a = m; |
1429 | 279k | } |
1430 | 467k | } |
1431 | 104k | *streamEnd = streamEnds[b]; |
1432 | 104k | return gTrue; |
1433 | 129k | } |
1434 | | |
1435 | 187 | GFileOffset XRef::strToFileOffset(char *s) { |
1436 | 187 | GFileOffset x, d; |
1437 | 187 | char *p; |
1438 | | |
1439 | 187 | x = 0; |
1440 | 525 | for (p = s; *p && isdigit(*p & 0xff); ++p) { |
1441 | 338 | d = *p - '0'; |
1442 | 338 | if (x > (GFILEOFFSET_MAX - d) / 10) { |
1443 | 0 | break; |
1444 | 0 | } |
1445 | 338 | x = 10 * x + d; |
1446 | 338 | } |
1447 | 187 | return x; |
1448 | 187 | } |