/src/xpdf-4.06/xpdf/XRef.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // XRef.cc |
4 | | // |
5 | | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdlib.h> |
12 | | #include <stddef.h> |
13 | | #include <string.h> |
14 | | #include <ctype.h> |
15 | | #include <limits.h> |
16 | | #include "gmem.h" |
17 | | #include "gmempp.h" |
18 | | #include "gfile.h" |
19 | | #include "Object.h" |
20 | | #include "Stream.h" |
21 | | #include "Lexer.h" |
22 | | #include "Parser.h" |
23 | | #include "Dict.h" |
24 | | #include "Error.h" |
25 | | #include "ErrorCodes.h" |
26 | | #include "XRef.h" |
27 | | |
28 | | //------------------------------------------------------------------------ |
29 | | |
30 | 22.2k | #define xrefSearchSize 1024 // read this many bytes at end of file |
31 | | // to look for 'startxref' |
32 | | |
33 | | //------------------------------------------------------------------------ |
34 | | // Permission bits |
35 | | //------------------------------------------------------------------------ |
36 | | |
37 | 10.2k | #define permPrint (1<<2) |
38 | 10.2k | #define permChange (1<<3) |
39 | 10.2k | #define permCopy (1<<4) |
40 | 10.2k | #define permNotes (1<<5) |
41 | 22.2k | #define defPermFlags 0xfffc |
42 | | |
43 | | //------------------------------------------------------------------------ |
44 | | // XRefPosSet |
45 | | //------------------------------------------------------------------------ |
46 | | |
47 | | class XRefPosSet { |
48 | | public: |
49 | | |
50 | | XRefPosSet(); |
51 | | ~XRefPosSet(); |
52 | | void add(GFileOffset pos); |
53 | | GBool check(GFileOffset pos); |
54 | 432 | int getLength() { return len; } |
55 | 466 | GFileOffset get(int idx) { return tab[idx]; } |
56 | | |
57 | | private: |
58 | | |
59 | | int find(GFileOffset pos); |
60 | | |
61 | | GFileOffset *tab; |
62 | | int size; |
63 | | int len; |
64 | | }; |
65 | | |
66 | 432 | XRefPosSet::XRefPosSet() { |
67 | 432 | size = 16; |
68 | 432 | len = 0; |
69 | 432 | tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset)); |
70 | 432 | } |
71 | | |
72 | 432 | XRefPosSet::~XRefPosSet() { |
73 | 432 | gfree(tab); |
74 | 432 | } |
75 | | |
76 | 466 | void XRefPosSet::add(GFileOffset pos) { |
77 | 466 | int i; |
78 | | |
79 | 466 | i = find(pos); |
80 | 466 | if (i < len && tab[i] == pos) { |
81 | 0 | return; |
82 | 0 | } |
83 | 466 | if (len == size) { |
84 | 0 | if (size > INT_MAX / 2) { |
85 | 0 | gMemError("Integer overflow in XRefPosSet::add()"); |
86 | 0 | } |
87 | 0 | size *= 2; |
88 | 0 | tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset)); |
89 | 0 | } |
90 | 466 | if (i < len) { |
91 | 16 | memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset)); |
92 | 16 | } |
93 | 466 | tab[i] = pos; |
94 | 466 | ++len; |
95 | 466 | } |
96 | | |
97 | 466 | GBool XRefPosSet::check(GFileOffset pos) { |
98 | 466 | int i; |
99 | | |
100 | 466 | i = find(pos); |
101 | 466 | return i < len && tab[i] == pos; |
102 | 466 | } |
103 | | |
104 | 932 | int XRefPosSet::find(GFileOffset pos) { |
105 | 932 | int a, b, m; |
106 | | |
107 | 932 | a = - 1; |
108 | 932 | b = len; |
109 | | // invariant: tab[a] < pos < tab[b] |
110 | 1.03k | while (b - a > 1) { |
111 | 98 | m = (a + b) / 2; |
112 | 98 | if (tab[m] < pos) { |
113 | 46 | a = m; |
114 | 52 | } else if (tab[m] > pos) { |
115 | 52 | b = m; |
116 | 52 | } else { |
117 | 0 | return m; |
118 | 0 | } |
119 | 98 | } |
120 | 932 | return b; |
121 | 932 | } |
122 | | |
123 | | //------------------------------------------------------------------------ |
124 | | // ObjectStream |
125 | | //------------------------------------------------------------------------ |
126 | | |
127 | | class ObjectStream { |
128 | | public: |
129 | | |
130 | | // Create an object stream, using object number <objStrNum>, |
131 | | // generation 0. |
132 | | ObjectStream(XRef *xref, int objStrNumA, int recursion); |
133 | | |
134 | 4.07k | GBool isOk() { return ok; } |
135 | | |
136 | | ~ObjectStream(); |
137 | | |
138 | | // Return the object number of this object stream. |
139 | 19.3k | int getObjStrNum() { return objStrNum; } |
140 | | |
141 | | // Get the <objIdx>th object from this stream, which should be |
142 | | // object number <objNum>, generation 0. |
143 | | Object *getObject(int objIdx, int objNum, Object *obj); |
144 | | |
145 | | private: |
146 | | |
147 | | int objStrNum; // object number of the object stream |
148 | | int nObjects; // number of objects in the stream |
149 | | Object *objs; // the objects (length = nObjects) |
150 | | int *objNums; // the object numbers (length = nObjects) |
151 | | GBool ok; |
152 | | }; |
153 | | |
154 | 4.07k | ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion) { |
155 | 4.07k | Stream *str; |
156 | 4.07k | Lexer *lexer; |
157 | 4.07k | Parser *parser; |
158 | 4.07k | int *offsets; |
159 | 4.07k | Object objStr, obj1, obj2; |
160 | 4.07k | int first, i; |
161 | | |
162 | 4.07k | objStrNum = objStrNumA; |
163 | 4.07k | nObjects = 0; |
164 | 4.07k | objs = NULL; |
165 | 4.07k | objNums = NULL; |
166 | 4.07k | ok = gFalse; |
167 | | |
168 | 4.07k | if (!xref->fetch(objStrNum, 0, &objStr, recursion)->isStream()) { |
169 | 40 | goto err1; |
170 | 40 | } |
171 | | |
172 | 4.03k | if (!objStr.streamGetDict()->lookup("N", &obj1, recursion)->isInt()) { |
173 | 1 | obj1.free(); |
174 | 1 | goto err1; |
175 | 1 | } |
176 | 4.03k | nObjects = obj1.getInt(); |
177 | 4.03k | obj1.free(); |
178 | 4.03k | if (nObjects <= 0) { |
179 | 13 | goto err1; |
180 | 13 | } |
181 | | |
182 | 4.02k | if (!objStr.streamGetDict()->lookup("First", &obj1, recursion)->isInt()) { |
183 | 78 | obj1.free(); |
184 | 78 | goto err1; |
185 | 78 | } |
186 | 3.94k | first = obj1.getInt(); |
187 | 3.94k | obj1.free(); |
188 | 3.94k | if (first < 0) { |
189 | 39 | goto err1; |
190 | 39 | } |
191 | | |
192 | | // this is an arbitrary limit to avoid integer overflow problems |
193 | | // in the 'new Object[nObjects]' call (Acrobat apparently limits |
194 | | // object streams to 100-200 objects) |
195 | 3.90k | if (nObjects > 1000000) { |
196 | 0 | error(errSyntaxError, -1, "Too many objects in an object stream"); |
197 | 0 | goto err1; |
198 | 0 | } |
199 | 3.90k | objs = new Object[nObjects]; |
200 | 3.90k | objNums = (int *)gmallocn(nObjects, sizeof(int)); |
201 | 3.90k | offsets = (int *)gmallocn(nObjects, sizeof(int)); |
202 | | |
203 | | // parse the header: object numbers and offsets |
204 | 3.90k | objStr.streamReset(); |
205 | 3.90k | obj1.initNull(); |
206 | 3.90k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first); |
207 | 3.90k | lexer = new Lexer(xref, str); |
208 | 3.90k | parser = new Parser(xref, lexer, gFalse); |
209 | 38.0k | for (i = 0; i < nObjects; ++i) { |
210 | 34.8k | parser->getObj(&obj1, gTrue); |
211 | 34.8k | parser->getObj(&obj2, gTrue); |
212 | 34.8k | if (!obj1.isInt() || !obj2.isInt()) { |
213 | 309 | obj1.free(); |
214 | 309 | obj2.free(); |
215 | 309 | delete parser; |
216 | 309 | gfree(offsets); |
217 | 309 | goto err2; |
218 | 309 | } |
219 | 34.4k | objNums[i] = obj1.getInt(); |
220 | 34.4k | offsets[i] = obj2.getInt(); |
221 | 34.4k | obj1.free(); |
222 | 34.4k | obj2.free(); |
223 | 34.4k | if (objNums[i] < 0 || offsets[i] < 0 || |
224 | 34.3k | (i > 0 && offsets[i] < offsets[i-1])) { |
225 | 348 | delete parser; |
226 | 348 | gfree(offsets); |
227 | 348 | goto err2; |
228 | 348 | } |
229 | 34.4k | } |
230 | 3.24k | lexer->skipToEOF(); |
231 | 3.24k | delete parser; |
232 | | |
233 | | // skip to the first object - this generally shouldn't be needed, |
234 | | // because offsets[0] is normally 0, but just in case... |
235 | 3.24k | if (offsets[0] > 0) { |
236 | 32 | objStr.getStream()->discardChars(offsets[0]); |
237 | 32 | } |
238 | | |
239 | | // parse the objects |
240 | 27.9k | for (i = 0; i < nObjects; ++i) { |
241 | 24.7k | obj1.initNull(); |
242 | 24.7k | if (i == nObjects - 1) { |
243 | 3.24k | str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0); |
244 | 21.4k | } else { |
245 | 21.4k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, |
246 | 21.4k | offsets[i+1] - offsets[i]); |
247 | 21.4k | } |
248 | 24.7k | lexer = new Lexer(xref, str); |
249 | 24.7k | parser = new Parser(xref, lexer, gFalse); |
250 | 24.7k | parser->getObj(&objs[i]); |
251 | 24.7k | lexer->skipToEOF(); |
252 | 24.7k | delete parser; |
253 | 24.7k | } |
254 | | |
255 | 3.24k | gfree(offsets); |
256 | 3.24k | ok = gTrue; |
257 | | |
258 | 3.90k | err2: |
259 | 3.90k | objStr.streamClose(); |
260 | 4.07k | err1: |
261 | 4.07k | objStr.free(); |
262 | 4.07k | } |
263 | | |
264 | 4.07k | ObjectStream::~ObjectStream() { |
265 | 4.07k | int i; |
266 | | |
267 | 4.07k | if (objs) { |
268 | 51.8k | for (i = 0; i < nObjects; ++i) { |
269 | 47.9k | objs[i].free(); |
270 | 47.9k | } |
271 | 3.90k | delete[] objs; |
272 | 3.90k | } |
273 | 4.07k | gfree(objNums); |
274 | 4.07k | } |
275 | | |
276 | 13.3k | Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) { |
277 | 13.3k | if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) { |
278 | 16 | obj->initNull(); |
279 | 13.3k | } else { |
280 | 13.3k | objs[objIdx].copy(obj); |
281 | 13.3k | } |
282 | 13.3k | return obj; |
283 | 13.3k | } |
284 | | |
285 | | //------------------------------------------------------------------------ |
286 | | // XRef |
287 | | //------------------------------------------------------------------------ |
288 | | |
289 | 22.2k | XRef::XRef(BaseStream *strA, GBool repair) { |
290 | 22.2k | GFileOffset pos; |
291 | 22.2k | Object obj; |
292 | 22.2k | XRefPosSet *posSet; |
293 | 22.2k | int i; |
294 | | |
295 | 22.2k | ok = gTrue; |
296 | 22.2k | errCode = errNone; |
297 | 22.2k | repaired = gFalse; |
298 | 22.2k | size = 0; |
299 | 22.2k | last = -1; |
300 | 22.2k | entries = NULL; |
301 | 22.2k | lastStartxrefPos = 0; |
302 | 22.2k | xrefTablePos = NULL; |
303 | 22.2k | xrefTablePosLen = 0; |
304 | 22.2k | streamEnds = NULL; |
305 | 22.2k | streamEndsLen = 0; |
306 | 2.86M | for (i = 0; i < objStrCacheSize; ++i) { |
307 | 2.84M | objStrs[i] = NULL; |
308 | 2.84M | objStrLastUse[i] = 0; |
309 | 2.84M | } |
310 | 22.2k | objStrCacheLength = 0; |
311 | 22.2k | objStrTime = 0; |
312 | | |
313 | 22.2k | encrypted = gFalse; |
314 | 22.2k | permFlags = defPermFlags; |
315 | 22.2k | ownerPasswordOk = gFalse; |
316 | | |
317 | 377k | for (i = 0; i < xrefCacheSize; ++i) { |
318 | 355k | cache[i].num = -1; |
319 | 355k | } |
320 | | |
321 | 22.2k | #if MULTITHREADED |
322 | 22.2k | gInitMutex(&objStrsMutex); |
323 | 22.2k | gInitMutex(&cacheMutex); |
324 | 22.2k | #endif |
325 | | |
326 | 22.2k | str = strA; |
327 | 22.2k | start = str->getStart(); |
328 | | |
329 | | // if the 'repair' flag is set, try to reconstruct the xref table |
330 | 22.2k | if (repair) { |
331 | 11.0k | if (!(ok = constructXRef())) { |
332 | 691 | errCode = errDamaged; |
333 | 691 | return; |
334 | 691 | } |
335 | 10.4k | repaired = gTrue; |
336 | | |
337 | | // if the 'repair' flag is not set, read the xref table |
338 | 11.1k | } else { |
339 | | |
340 | | // read the trailer |
341 | 11.1k | pos = getStartXref(); |
342 | 11.1k | if (pos == 0) { |
343 | 10.6k | errCode = errDamaged; |
344 | 10.6k | ok = gFalse; |
345 | 10.6k | return; |
346 | 10.6k | } |
347 | | |
348 | | // read the xref table |
349 | 432 | posSet = new XRefPosSet(); |
350 | 463 | while (readXRef(&pos, posSet, gFalse)) ; |
351 | 432 | xrefTablePosLen = posSet->getLength(); |
352 | 432 | xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen, |
353 | 432 | sizeof(GFileOffset)); |
354 | 898 | for (i = 0; i < xrefTablePosLen; ++i) { |
355 | 466 | xrefTablePos[i] = posSet->get(i); |
356 | 466 | } |
357 | 432 | delete posSet; |
358 | 432 | if (!ok) { |
359 | 409 | errCode = errDamaged; |
360 | 409 | return; |
361 | 409 | } |
362 | 432 | } |
363 | | |
364 | | // get the root dictionary (catalog) object |
365 | 10.4k | trailerDict.dictLookupNF("Root", &obj); |
366 | 10.4k | if (obj.isRef()) { |
367 | 10.4k | rootNum = obj.getRefNum(); |
368 | 10.4k | rootGen = obj.getRefGen(); |
369 | 10.4k | obj.free(); |
370 | 10.4k | } else { |
371 | 18 | obj.free(); |
372 | 18 | if (!(ok = constructXRef())) { |
373 | 11 | errCode = errDamaged; |
374 | 11 | return; |
375 | 11 | } |
376 | 18 | } |
377 | | |
378 | | // now set the trailer dictionary's xref pointer so we can fetch |
379 | | // indirect objects from it |
380 | 10.4k | trailerDict.getDict()->setXRef(this); |
381 | 10.4k | } |
382 | | |
383 | 22.2k | XRef::~XRef() { |
384 | 22.2k | int i; |
385 | | |
386 | 377k | for (i = 0; i < xrefCacheSize; ++i) { |
387 | 355k | if (cache[i].num >= 0) { |
388 | 100k | cache[i].obj.free(); |
389 | 100k | } |
390 | 355k | } |
391 | 22.2k | gfree(entries); |
392 | 22.2k | trailerDict.free(); |
393 | 22.2k | if (xrefTablePos) { |
394 | 432 | gfree(xrefTablePos); |
395 | 432 | } |
396 | 22.2k | if (streamEnds) { |
397 | 9.32k | gfree(streamEnds); |
398 | 9.32k | } |
399 | 2.86M | for (i = 0; i < objStrCacheSize; ++i) { |
400 | 2.84M | if (objStrs[i]) { |
401 | 3.24k | delete objStrs[i]; |
402 | 3.24k | } |
403 | 2.84M | } |
404 | 22.2k | #if MULTITHREADED |
405 | 22.2k | gDestroyMutex(&objStrsMutex); |
406 | 22.2k | gDestroyMutex(&cacheMutex); |
407 | 22.2k | #endif |
408 | 22.2k | } |
409 | | |
410 | | // Read the 'startxref' position. |
411 | 11.1k | GFileOffset XRef::getStartXref() { |
412 | 11.1k | char buf[xrefSearchSize+1]; |
413 | 11.1k | char *p; |
414 | 11.1k | int n, i; |
415 | | |
416 | | // read last xrefSearchSize bytes |
417 | 11.1k | str->setPos(xrefSearchSize, -1); |
418 | 11.1k | n = str->getBlock(buf, xrefSearchSize); |
419 | 11.1k | buf[n] = '\0'; |
420 | | |
421 | | // find startxref |
422 | 10.9M | for (i = n - 9; i >= 0; --i) { |
423 | 10.9M | if (!strncmp(&buf[i], "startxref", 9)) { |
424 | 553 | break; |
425 | 553 | } |
426 | 10.9M | } |
427 | 11.1k | if (i < 0) { |
428 | 10.5k | return 0; |
429 | 10.5k | } |
430 | 1.44k | for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ; |
431 | 553 | lastXRefPos = strToFileOffset(p); |
432 | 553 | lastStartxrefPos = str->getPos() - n + i; |
433 | | |
434 | 553 | return lastXRefPos; |
435 | 11.1k | } |
436 | | |
437 | | // Read one xref table section. Also reads the associated trailer |
438 | | // dictionary, and returns the prev pointer (if any). The [hybrid] |
439 | | // flag is true when following the XRefStm link in a hybrid-reference |
440 | | // file. |
441 | 466 | GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) { |
442 | 466 | Parser *parser; |
443 | 466 | Object obj; |
444 | 466 | GBool more; |
445 | 466 | char buf[100]; |
446 | 466 | int n, i; |
447 | | |
448 | | // check for a loop in the xref tables |
449 | 466 | if (posSet->check(*pos)) { |
450 | 0 | error(errSyntaxWarning, -1, "Infinite loop in xref table"); |
451 | 0 | return gFalse; |
452 | 0 | } |
453 | 466 | posSet->add(*pos); |
454 | | |
455 | | // the xref data should either be "xref ..." (for an xref table) or |
456 | | // "nn gg obj << ... >> stream ..." (for an xref stream); possibly |
457 | | // preceded by whitespace |
458 | 466 | str->setPos(start + *pos); |
459 | 466 | n = str->getBlock(buf, 100); |
460 | 1.28k | for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ; |
461 | | |
462 | | // parse an old-style xref table |
463 | 466 | if (!hybrid && |
464 | 463 | i + 4 < n && |
465 | 411 | buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' && |
466 | 12 | Lexer::isSpace(buf[i+4])) { |
467 | 12 | more = readXRefTable(pos, i + 5, posSet); |
468 | | |
469 | | // parse an xref stream |
470 | 454 | } else { |
471 | 454 | obj.initNull(); |
472 | 454 | parser = new Parser(NULL, |
473 | 454 | new Lexer(NULL, |
474 | 454 | str->makeSubStream(start + *pos, gFalse, 0, &obj)), |
475 | 454 | gTrue); |
476 | 454 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
477 | 284 | goto err; |
478 | 284 | } |
479 | 170 | obj.free(); |
480 | 170 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
481 | 94 | goto err; |
482 | 94 | } |
483 | 76 | obj.free(); |
484 | 76 | if (!parser->getObj(&obj, gTrue)->isCmd("obj")) { |
485 | 7 | goto err; |
486 | 7 | } |
487 | 69 | obj.free(); |
488 | 69 | if (!parser->getObj(&obj)->isStream()) { |
489 | 9 | goto err; |
490 | 9 | } |
491 | 60 | more = readXRefStream(obj.getStream(), pos); |
492 | 60 | obj.free(); |
493 | 60 | delete parser; |
494 | 60 | } |
495 | | |
496 | 72 | return more; |
497 | | |
498 | 394 | err: |
499 | 394 | obj.free(); |
500 | 394 | delete parser; |
501 | 394 | if (hybrid) { |
502 | 0 | error(errSyntaxError, -1, "Invalid XRefStm link in trailer"); |
503 | 394 | } else { |
504 | 394 | ok = gFalse; |
505 | 394 | } |
506 | 394 | return gFalse; |
507 | 466 | } |
508 | | |
509 | 12 | GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) { |
510 | 12 | XRefEntry entry; |
511 | 12 | Parser *parser; |
512 | 12 | Object obj, obj2; |
513 | 12 | char buf[6]; |
514 | 12 | GFileOffset off, pos2; |
515 | 12 | GBool more; |
516 | 12 | int first, n, digit, newSize, gen, i, c; |
517 | | |
518 | 12 | str->setPos(start + *pos + offset); |
519 | | |
520 | 71 | while (1) { |
521 | 2.14k | do { |
522 | 2.14k | c = str->getChar(); |
523 | 2.14k | } while (Lexer::isSpace(c)); |
524 | 71 | if (c == 't') { |
525 | 12 | if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) { |
526 | 0 | goto err1; |
527 | 0 | } |
528 | 12 | break; |
529 | 12 | } |
530 | 59 | if (c < '0' || c > '9') { |
531 | 0 | goto err1; |
532 | 0 | } |
533 | 59 | first = 0; |
534 | 177 | do { |
535 | 177 | digit = c - '0'; |
536 | 177 | if (first > (INT_MAX - digit) / 10) { |
537 | 0 | goto err1; |
538 | 0 | } |
539 | 177 | first = (first * 10) + digit; |
540 | 177 | c = str->getChar(); |
541 | 177 | } while (c >= '0' && c <= '9'); |
542 | 59 | if (!Lexer::isSpace(c)) { |
543 | 0 | goto err1; |
544 | 0 | } |
545 | 135 | do { |
546 | 135 | c = str->getChar(); |
547 | 135 | } while (Lexer::isSpace(c)); |
548 | 59 | n = 0; |
549 | 123 | do { |
550 | 123 | digit = c - '0'; |
551 | 123 | if (n > (INT_MAX - digit) / 10) { |
552 | 0 | goto err1; |
553 | 0 | } |
554 | 123 | n = (n * 10) + digit; |
555 | 123 | c = str->getChar(); |
556 | 123 | } while (c >= '0' && c <= '9'); |
557 | 59 | if (!Lexer::isSpace(c)) { |
558 | 0 | goto err1; |
559 | 0 | } |
560 | 59 | if (first > INT_MAX - n) { |
561 | 0 | goto err1; |
562 | 0 | } |
563 | 59 | if (first + n > size) { |
564 | 4 | newSize = size ? size : 512; |
565 | 4 | do { |
566 | 4 | if (newSize > INT_MAX / 2) { |
567 | 0 | goto err1; |
568 | 0 | } |
569 | 4 | newSize <<= 1; |
570 | 4 | } while (first + n > newSize); |
571 | 4 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
572 | 4.10k | for (i = size; i < newSize; ++i) { |
573 | 4.09k | entries[i].offset = (GFileOffset)-1; |
574 | 4.09k | entries[i].type = xrefEntryFree; |
575 | 4.09k | } |
576 | 4 | size = newSize; |
577 | 4 | } |
578 | 539 | for (i = first; i < first + n; ++i) { |
579 | 960 | do { |
580 | 960 | c = str->getChar(); |
581 | 960 | } while (Lexer::isSpace(c)); |
582 | 480 | off = 0; |
583 | 4.80k | do { |
584 | 4.80k | off = (off * 10) + (c - '0'); |
585 | 4.80k | c = str->getChar(); |
586 | 4.80k | } while (c >= '0' && c <= '9'); |
587 | 480 | if (!Lexer::isSpace(c)) { |
588 | 0 | goto err1; |
589 | 0 | } |
590 | 480 | entry.offset = off; |
591 | 480 | do { |
592 | 480 | c = str->getChar(); |
593 | 480 | } while (Lexer::isSpace(c)); |
594 | 480 | gen = 0; |
595 | 2.40k | do { |
596 | 2.40k | gen = (gen * 10) + (c - '0'); |
597 | 2.40k | c = str->getChar(); |
598 | 2.40k | } while (c >= '0' && c <= '9'); |
599 | 480 | if (!Lexer::isSpace(c)) { |
600 | 0 | goto err1; |
601 | 0 | } |
602 | 480 | entry.gen = gen; |
603 | 480 | do { |
604 | 480 | c = str->getChar(); |
605 | 480 | } while (Lexer::isSpace(c)); |
606 | 480 | if (c == 'n') { |
607 | 479 | entry.type = xrefEntryUncompressed; |
608 | 479 | } else if (c == 'f') { |
609 | 1 | entry.type = xrefEntryFree; |
610 | 1 | } else { |
611 | 0 | goto err1; |
612 | 0 | } |
613 | 480 | c = str->getChar(); |
614 | 480 | if (!Lexer::isSpace(c)) { |
615 | 0 | goto err1; |
616 | 0 | } |
617 | 480 | if (entries[i].offset == (GFileOffset)-1) { |
618 | 347 | entries[i] = entry; |
619 | | // PDF files of patents from the IBM Intellectual Property |
620 | | // Network have a bug: the xref table claims to start at 1 |
621 | | // instead of 0. |
622 | 347 | if (i == 1 && first == 1 && |
623 | 0 | entries[1].offset == 0 && entries[1].gen == 65535 && |
624 | 0 | entries[1].type == xrefEntryFree) { |
625 | 0 | i = first = 0; |
626 | 0 | entries[0] = entries[1]; |
627 | 0 | entries[1].offset = (GFileOffset)-1; |
628 | 0 | } |
629 | 347 | if (i > last) { |
630 | 0 | last = i; |
631 | 0 | } |
632 | 347 | } |
633 | 480 | } |
634 | 59 | } |
635 | | |
636 | | // read the trailer dictionary |
637 | 12 | obj.initNull(); |
638 | 12 | parser = new Parser(NULL, |
639 | 12 | new Lexer(NULL, |
640 | 12 | str->makeSubStream(str->getPos(), gFalse, 0, &obj)), |
641 | 12 | gTrue); |
642 | 12 | parser->getObj(&obj); |
643 | 12 | delete parser; |
644 | 12 | if (!obj.isDict()) { |
645 | 0 | obj.free(); |
646 | 0 | goto err1; |
647 | 0 | } |
648 | | |
649 | | // get the 'Prev' pointer |
650 | | //~ this can be a 64-bit int (?) |
651 | 12 | obj.getDict()->lookupNF("Prev", &obj2); |
652 | 12 | if (obj2.isInt()) { |
653 | 5 | *pos = (GFileOffset)(Guint)obj2.getInt(); |
654 | 5 | more = gTrue; |
655 | 7 | } else if (obj2.isRef()) { |
656 | | // certain buggy PDF generators generate "/Prev NNN 0 R" instead |
657 | | // of "/Prev NNN" |
658 | 0 | *pos = (GFileOffset)(Guint)obj2.getRefNum(); |
659 | 0 | more = gTrue; |
660 | 7 | } else { |
661 | 7 | more = gFalse; |
662 | 7 | } |
663 | 12 | obj2.free(); |
664 | | |
665 | | // save the first trailer dictionary |
666 | 12 | if (trailerDict.isNone()) { |
667 | 7 | obj.copy(&trailerDict); |
668 | 7 | } |
669 | | |
670 | | // check for an 'XRefStm' key |
671 | | //~ this can be a 64-bit int (?) |
672 | 12 | if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) { |
673 | 3 | pos2 = (GFileOffset)(Guint)obj2.getInt(); |
674 | 3 | readXRef(&pos2, posSet, gTrue); |
675 | 3 | if (!ok) { |
676 | 0 | obj2.free(); |
677 | 0 | obj.free(); |
678 | 0 | goto err1; |
679 | 0 | } |
680 | 3 | } |
681 | 12 | obj2.free(); |
682 | | |
683 | 12 | obj.free(); |
684 | 12 | return more; |
685 | | |
686 | 0 | err1: |
687 | 0 | ok = gFalse; |
688 | 0 | return gFalse; |
689 | 12 | } |
690 | | |
691 | 60 | GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos) { |
692 | 60 | Dict *dict; |
693 | 60 | int w[3]; |
694 | 60 | GBool more; |
695 | 60 | Object obj, obj2, idx; |
696 | 60 | int newSize, first, n, i; |
697 | | |
698 | 60 | dict = xrefStr->getDict(); |
699 | | |
700 | 60 | if (!dict->lookupNF("Size", &obj)->isInt()) { |
701 | 2 | goto err1; |
702 | 2 | } |
703 | 58 | newSize = obj.getInt(); |
704 | 58 | obj.free(); |
705 | 58 | if (newSize < 0) { |
706 | 0 | goto err1; |
707 | 0 | } |
708 | 58 | if (newSize > size) { |
709 | 37 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
710 | 3.39k | for (i = size; i < newSize; ++i) { |
711 | 3.36k | entries[i].offset = (GFileOffset)-1; |
712 | 3.36k | entries[i].type = xrefEntryFree; |
713 | 3.36k | } |
714 | 37 | size = newSize; |
715 | 37 | } |
716 | | |
717 | 58 | if (!dict->lookupNF("W", &obj)->isArray() || |
718 | 58 | obj.arrayGetLength() < 3) { |
719 | 0 | goto err1; |
720 | 0 | } |
721 | 232 | for (i = 0; i < 3; ++i) { |
722 | 174 | if (!obj.arrayGet(i, &obj2)->isInt()) { |
723 | 0 | obj2.free(); |
724 | 0 | goto err1; |
725 | 0 | } |
726 | 174 | w[i] = obj2.getInt(); |
727 | 174 | obj2.free(); |
728 | 174 | } |
729 | 58 | obj.free(); |
730 | 58 | if (w[0] < 0 || w[0] > 8 || |
731 | 58 | w[1] < 0 || w[1] > 8 || |
732 | 58 | w[2] < 0 || w[2] > 8) { |
733 | 0 | goto err0; |
734 | 0 | } |
735 | | |
736 | 58 | xrefStr->reset(); |
737 | 58 | dict->lookupNF("Index", &idx); |
738 | 58 | if (idx.isArray()) { |
739 | 92 | for (i = 0; i+1 < idx.arrayGetLength(); i += 2) { |
740 | 64 | if (!idx.arrayGet(i, &obj)->isInt()) { |
741 | 0 | idx.free(); |
742 | 0 | goto err1; |
743 | 0 | } |
744 | 64 | first = obj.getInt(); |
745 | 64 | obj.free(); |
746 | 64 | if (!idx.arrayGet(i+1, &obj)->isInt()) { |
747 | 0 | idx.free(); |
748 | 0 | goto err1; |
749 | 0 | } |
750 | 64 | n = obj.getInt(); |
751 | 64 | obj.free(); |
752 | 64 | if (first < 0 || n < 0 || |
753 | 63 | !readXRefStreamSection(xrefStr, w, first, n)) { |
754 | 4 | idx.free(); |
755 | 4 | goto err0; |
756 | 4 | } |
757 | 64 | } |
758 | 32 | } else { |
759 | 26 | if (!readXRefStreamSection(xrefStr, w, 0, newSize)) { |
760 | 9 | idx.free(); |
761 | 9 | goto err0; |
762 | 9 | } |
763 | 26 | } |
764 | 45 | idx.free(); |
765 | | |
766 | | //~ this can be a 64-bit int (?) |
767 | 45 | dict->lookupNF("Prev", &obj); |
768 | 45 | if (obj.isInt()) { |
769 | 27 | *pos = (GFileOffset)(Guint)obj.getInt(); |
770 | 27 | more = gTrue; |
771 | 27 | } else { |
772 | 18 | more = gFalse; |
773 | 18 | } |
774 | 45 | obj.free(); |
775 | 45 | if (trailerDict.isNone()) { |
776 | 28 | trailerDict.initDict(dict); |
777 | 28 | } |
778 | | |
779 | 45 | return more; |
780 | | |
781 | 2 | err1: |
782 | 2 | obj.free(); |
783 | 15 | err0: |
784 | 15 | ok = gFalse; |
785 | 15 | return gFalse; |
786 | 2 | } |
787 | | |
788 | 89 | GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { |
789 | 89 | long long type, gen, offset; |
790 | 89 | int c, newSize, i, j; |
791 | | |
792 | 89 | if (first + n < 0) { |
793 | 0 | return gFalse; |
794 | 0 | } |
795 | 89 | if (first + n > size) { |
796 | 2 | for (newSize = size ? 2 * size : 1024; |
797 | 21 | first + n > newSize && newSize > 0; |
798 | 19 | newSize <<= 1) ; |
799 | 2 | if (newSize < 0) { |
800 | 0 | return gFalse; |
801 | 0 | } |
802 | 2 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
803 | 2.88M | for (i = size; i < newSize; ++i) { |
804 | 2.88M | entries[i].offset = (GFileOffset)-1; |
805 | 2.88M | entries[i].type = xrefEntryFree; |
806 | 2.88M | } |
807 | 2 | size = newSize; |
808 | 2 | } |
809 | 2.55k | for (i = first; i < first + n; ++i) { |
810 | 2.47k | if (w[0] == 0) { |
811 | 42 | type = 1; |
812 | 2.43k | } else { |
813 | 4.89k | for (type = 0, j = 0; j < w[0]; ++j) { |
814 | 2.46k | if ((c = xrefStr->getChar()) == EOF) { |
815 | 7 | return gFalse; |
816 | 7 | } |
817 | 2.45k | type = (type << 8) + c; |
818 | 2.45k | } |
819 | 2.43k | } |
820 | 6.06k | for (offset = 0, j = 0; j < w[1]; ++j) { |
821 | 3.59k | if ((c = xrefStr->getChar()) == EOF) { |
822 | 0 | return gFalse; |
823 | 0 | } |
824 | 3.59k | offset = (offset << 8) + c; |
825 | 3.59k | } |
826 | 2.47k | if (offset < 0 || offset > GFILEOFFSET_MAX) { |
827 | 0 | return gFalse; |
828 | 0 | } |
829 | 4.94k | for (gen = 0, j = 0; j < w[2]; ++j) { |
830 | 2.47k | if ((c = xrefStr->getChar()) == EOF) { |
831 | 1 | return gFalse; |
832 | 1 | } |
833 | 2.47k | gen = (gen << 8) + c; |
834 | 2.47k | } |
835 | | // some PDF generators include a free entry with gen=0xffffffff |
836 | 2.47k | if ((gen < 0 || gen > INT_MAX) && type != 0) { |
837 | 0 | return gFalse; |
838 | 0 | } |
839 | 2.47k | if (entries[i].offset == (GFileOffset)-1) { |
840 | 2.44k | switch (type) { |
841 | 102 | case 0: |
842 | 102 | entries[i].offset = (GFileOffset)offset; |
843 | 102 | entries[i].gen = (int)gen; |
844 | 102 | entries[i].type = xrefEntryFree; |
845 | 102 | break; |
846 | 495 | case 1: |
847 | 495 | entries[i].offset = (GFileOffset)offset; |
848 | 495 | entries[i].gen = (int)gen; |
849 | 495 | entries[i].type = xrefEntryUncompressed; |
850 | 495 | break; |
851 | 1.84k | case 2: |
852 | 1.84k | entries[i].offset = (GFileOffset)offset; |
853 | 1.84k | entries[i].gen = (int)gen; |
854 | 1.84k | entries[i].type = xrefEntryCompressed; |
855 | 1.84k | break; |
856 | 4 | default: |
857 | 4 | return gFalse; |
858 | 2.44k | } |
859 | 2.44k | if (i > last) { |
860 | 896 | last = i; |
861 | 896 | } |
862 | 2.44k | } |
863 | 2.47k | } |
864 | | |
865 | 77 | return gTrue; |
866 | 89 | } |
867 | | |
868 | | // Attempt to construct an xref table for a damaged file. |
869 | 11.1k | GBool XRef::constructXRef() { |
870 | 11.1k | int *streamObjNums = NULL; |
871 | 11.1k | int streamObjNumsLen = 0; |
872 | 11.1k | int streamObjNumsSize = 0; |
873 | 11.1k | int lastObjNum = -1; |
874 | 11.1k | rootNum = -1; |
875 | 11.1k | int streamEndsSize = 0; |
876 | 11.1k | streamEndsLen = 0; |
877 | 11.1k | char buf[4096 + 1]; |
878 | 11.1k | str->reset(); |
879 | 11.1k | GFileOffset bufPos = start; |
880 | 11.1k | char *p = buf; |
881 | 11.1k | char *end = buf; |
882 | 11.1k | GBool startOfLine = gTrue; |
883 | 11.1k | GBool space = gTrue; |
884 | 11.1k | GBool eof = gFalse; |
885 | 158M | while (1) { |
886 | 158M | if (end - p < 256 && !eof) { |
887 | 49.5k | memcpy(buf, p, end - p); |
888 | 49.5k | bufPos += p - buf; |
889 | 49.5k | p = buf + (end - p); |
890 | 49.5k | int n = (int)(buf + 4096 - p); |
891 | 49.5k | int m = str->getBlock(p, n); |
892 | 49.5k | end = p + m; |
893 | 49.5k | *end = '\0'; |
894 | 49.5k | p = buf; |
895 | 49.5k | eof = m < n; |
896 | 49.5k | } |
897 | 158M | if (p == end && eof) { |
898 | 11.1k | break; |
899 | 11.1k | } |
900 | 158M | if (startOfLine && !strncmp(p, "trailer", 7)) { |
901 | 20.6k | constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf))); |
902 | 20.6k | p += 7; |
903 | 20.6k | startOfLine = gFalse; |
904 | 20.6k | space = gFalse; |
905 | 158M | } else if (startOfLine && !strncmp(p, "endstream", 9)) { |
906 | 60.3k | if (streamEndsLen == streamEndsSize) { |
907 | 9.48k | streamEndsSize += 64; |
908 | 9.48k | streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize, |
909 | 9.48k | sizeof(GFileOffset)); |
910 | 9.48k | } |
911 | 60.3k | streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf)); |
912 | 60.3k | p += 9; |
913 | 60.3k | startOfLine = gFalse; |
914 | 60.3k | space = gFalse; |
915 | 158M | } else if (space && *p >= '0' && *p <= '9') { |
916 | 2.52M | p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)), |
917 | 2.52M | &lastObjNum); |
918 | 2.52M | startOfLine = gFalse; |
919 | 2.52M | space = gFalse; |
920 | 155M | } else if (p[0] == '>' && p[1] == '>') { |
921 | 489k | p += 2; |
922 | 489k | startOfLine = gFalse; |
923 | 489k | space = gFalse; |
924 | | // skip any PDF whitespace except for '\0' |
925 | 829k | while (*p == '\t' || *p == '\n' || *p == '\x0c' || |
926 | 602k | *p == '\r' || *p == ' ') { |
927 | 339k | if (*p == '\n' || *p == '\r') { |
928 | 257k | startOfLine = gTrue; |
929 | 257k | } |
930 | 339k | space = gTrue; |
931 | 339k | ++p; |
932 | 339k | } |
933 | 489k | if (!strncmp(p, "stream", 6)) { |
934 | 144k | if (lastObjNum >= 0) { |
935 | 143k | if (streamObjNumsLen == streamObjNumsSize) { |
936 | 11.6k | streamObjNumsSize += 64; |
937 | 11.6k | streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize, |
938 | 11.6k | sizeof(int)); |
939 | 11.6k | } |
940 | 143k | streamObjNums[streamObjNumsLen++] = lastObjNum; |
941 | 143k | } |
942 | 144k | p += 6; |
943 | 144k | startOfLine = gFalse; |
944 | 144k | space = gFalse; |
945 | 144k | } |
946 | 155M | } else { |
947 | 155M | if (*p == '\n' || *p == '\r') { |
948 | 3.51M | startOfLine = gTrue; |
949 | 3.51M | space = gTrue; |
950 | 151M | } else if (Lexer::isSpace(*p & 0xff)) { |
951 | 11.6M | space = gTrue; |
952 | 139M | } else { |
953 | 139M | startOfLine = gFalse; |
954 | 139M | space = gFalse; |
955 | 139M | } |
956 | 155M | ++p; |
957 | 155M | } |
958 | 158M | } |
959 | | |
960 | | // read each stream object, check for xref or object stream |
961 | 154k | for (int i = 0; i < streamObjNumsLen; ++i) { |
962 | 143k | Object obj; |
963 | 143k | fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj); |
964 | 143k | if (obj.isStream()) { |
965 | 122k | Dict *dict = obj.streamGetDict(); |
966 | 122k | Object type; |
967 | 122k | dict->lookup("Type", &type); |
968 | 122k | if (type.isName("XRef")) { |
969 | 2.13k | saveTrailerDict(dict, gTrue); |
970 | 120k | } else if (type.isName("ObjStm")) { |
971 | 61.6k | constructObjectStreamEntries(&obj, streamObjNums[i]); |
972 | 61.6k | } |
973 | 122k | type.free(); |
974 | 122k | } |
975 | 143k | obj.free(); |
976 | 143k | } |
977 | | |
978 | 11.1k | gfree(streamObjNums); |
979 | | |
980 | | // if the file is encrypted, then any objects fetched here will be |
981 | | // incorrect (because decryption is not yet enabled), so clear the |
982 | | // cache to avoid that problem |
983 | 188k | for (int i = 0; i < xrefCacheSize; ++i) { |
984 | 177k | if (cache[i].num >= 0) { |
985 | 52.1k | cache[i].obj.free(); |
986 | 52.1k | cache[i].num = -1; |
987 | 52.1k | } |
988 | 177k | } |
989 | | |
990 | 11.1k | if (rootNum < 0) { |
991 | 702 | error(errSyntaxError, -1, "Couldn't find trailer dictionary"); |
992 | 702 | return gFalse; |
993 | 702 | } |
994 | 10.4k | return gTrue; |
995 | 11.1k | } |
996 | | |
997 | | // Attempt to construct a trailer dict at [pos] in the stream. |
998 | 20.6k | void XRef::constructTrailerDict(GFileOffset pos) { |
999 | 20.6k | Object newTrailerDict, obj; |
1000 | 20.6k | obj.initNull(); |
1001 | 20.6k | Parser *parser = |
1002 | 20.6k | new Parser(NULL, |
1003 | 20.6k | new Lexer(NULL, |
1004 | 20.6k | str->makeSubStream(pos, gFalse, 0, &obj)), |
1005 | 20.6k | gFalse); |
1006 | 20.6k | parser->getObj(&newTrailerDict); |
1007 | 20.6k | if (newTrailerDict.isDict()) { |
1008 | 14.4k | saveTrailerDict(newTrailerDict.getDict(), gFalse); |
1009 | 14.4k | } |
1010 | 20.6k | newTrailerDict.free(); |
1011 | 20.6k | delete parser; |
1012 | 20.6k | } |
1013 | | |
1014 | | // If [dict] "looks like" a trailer dict (i.e., has a Root entry), |
1015 | | // save it as the trailer dict. |
1016 | 16.5k | void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) { |
1017 | 16.5k | Object obj; |
1018 | 16.5k | dict->lookupNF("Root", &obj); |
1019 | 16.5k | if (obj.isRef()) { |
1020 | 14.2k | int newRootNum = obj.getRefNum(); |
1021 | | // the xref stream scanning code runs after all objects are found, |
1022 | | // so we can check for a valid root object number at that point |
1023 | 14.2k | if (!isXRefStream || newRootNum <= last) { |
1024 | 14.2k | rootNum = newRootNum; |
1025 | 14.2k | rootGen = obj.getRefGen(); |
1026 | 14.2k | if (!trailerDict.isNone()) { |
1027 | 3.85k | trailerDict.free(); |
1028 | 3.85k | } |
1029 | 14.2k | trailerDict.initDict(dict); |
1030 | 14.2k | } |
1031 | 14.2k | } |
1032 | 16.5k | obj.free(); |
1033 | 16.5k | } |
1034 | | |
1035 | | // Look for an object header ("nnn ggg obj") at [p]. The first |
1036 | | // character at *[p] is a digit. [pos] is the position of *[p]. |
1037 | 2.52M | char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) { |
1038 | | // we look for non-end-of-line space characters here, to deal with |
1039 | | // situations like: |
1040 | | // nnn <-- garbage digits on a line |
1041 | | // nnn nnn obj <-- actual object |
1042 | | // and we also ignore '\0' (because it's used to terminate the |
1043 | | // buffer in this damage-scanning code) |
1044 | 2.52M | int num = 0; |
1045 | 7.36M | do { |
1046 | 7.36M | num = (num * 10) + (*p - '0'); |
1047 | 7.36M | ++p; |
1048 | 7.36M | } while (*p >= '0' && *p <= '9' && num < 100000000); |
1049 | 2.52M | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1050 | 859k | return p; |
1051 | 859k | } |
1052 | 1.72M | do { |
1053 | 1.72M | ++p; |
1054 | 1.72M | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1055 | 1.66M | if (!(*p >= '0' && *p <= '9')) { |
1056 | 272k | return p; |
1057 | 272k | } |
1058 | 1.39M | int gen = 0; |
1059 | 2.74M | do { |
1060 | 2.74M | gen = (gen * 10) + (*p - '0'); |
1061 | 2.74M | ++p; |
1062 | 2.74M | } while (*p >= '0' && *p <= '9' && gen < 100000000); |
1063 | 1.39M | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1064 | 72.0k | return p; |
1065 | 72.0k | } |
1066 | 1.34M | do { |
1067 | 1.34M | ++p; |
1068 | 1.34M | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1069 | 1.32M | if (strncmp(p, "obj", 3)) { |
1070 | 1.03M | return p; |
1071 | 1.03M | } |
1072 | | |
1073 | 283k | if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) { |
1074 | 283k | *objNum = num; |
1075 | 283k | } |
1076 | | |
1077 | 283k | return p; |
1078 | 1.32M | } |
1079 | | |
1080 | | // Read the header from an object stream, and add xref entries for all |
1081 | | // of its objects. |
1082 | 61.6k | void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) { |
1083 | 61.6k | Object obj1, obj2; |
1084 | | |
1085 | | // get the object count |
1086 | 61.6k | if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) { |
1087 | 464 | obj1.free(); |
1088 | 464 | return; |
1089 | 464 | } |
1090 | 61.2k | int nObjects = obj1.getInt(); |
1091 | 61.2k | obj1.free(); |
1092 | 61.2k | if (nObjects <= 0 || nObjects > 1000000) { |
1093 | 83 | return; |
1094 | 83 | } |
1095 | | |
1096 | | // parse the header: object numbers and offsets |
1097 | 61.1k | Parser *parser = new Parser(NULL, |
1098 | 61.1k | new Lexer(NULL, objStr->getStream()->copy()), |
1099 | 61.1k | gFalse); |
1100 | 20.7M | for (int i = 0; i < nObjects; ++i) { |
1101 | 20.7M | parser->getObj(&obj1, gTrue); |
1102 | 20.7M | parser->getObj(&obj2, gTrue); |
1103 | 20.7M | if (obj1.isInt() && obj2.isInt()) { |
1104 | 43.0k | int num = obj1.getInt(); |
1105 | 43.0k | if (num >= 0 && num < 1000000) { |
1106 | 42.9k | constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed); |
1107 | 42.9k | } |
1108 | 43.0k | } |
1109 | 20.7M | obj2.free(); |
1110 | 20.7M | obj1.free(); |
1111 | 20.7M | } |
1112 | 61.1k | delete parser; |
1113 | 61.1k | } |
1114 | | |
1115 | | GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos, |
1116 | 326k | XRefEntryType type) { |
1117 | 326k | if (num >= size) { |
1118 | 11.6k | int newSize = (num + 1 + 255) & ~255; |
1119 | 11.6k | if (newSize < 0) { |
1120 | 0 | return gFalse; |
1121 | 0 | } |
1122 | 11.6k | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
1123 | 186M | for (int i = size; i < newSize; ++i) { |
1124 | 186M | entries[i].offset = (GFileOffset)-1; |
1125 | 186M | entries[i].type = xrefEntryFree; |
1126 | 186M | } |
1127 | 11.6k | size = newSize; |
1128 | 11.6k | } |
1129 | | |
1130 | 326k | if (entries[num].type == xrefEntryFree || |
1131 | 316k | gen >= entries[num].gen) { |
1132 | 316k | entries[num].offset = pos; |
1133 | 316k | entries[num].gen = gen; |
1134 | 316k | entries[num].type = type; |
1135 | 316k | if (num > last) { |
1136 | 132k | last = num; |
1137 | 132k | } |
1138 | 316k | } |
1139 | | |
1140 | 326k | return gTrue; |
1141 | 326k | } |
1142 | | |
1143 | | void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA, |
1144 | | Guchar *fileKeyA, int keyLengthA, int encVersionA, |
1145 | 506 | CryptAlgorithm encAlgorithmA) { |
1146 | 506 | int i; |
1147 | | |
1148 | 506 | encrypted = gTrue; |
1149 | 506 | permFlags = permFlagsA; |
1150 | 506 | ownerPasswordOk = ownerPasswordOkA; |
1151 | 506 | if (keyLengthA <= 32) { |
1152 | 506 | keyLength = keyLengthA; |
1153 | 506 | } else { |
1154 | 0 | keyLength = 32; |
1155 | 0 | } |
1156 | 3.68k | for (i = 0; i < keyLength; ++i) { |
1157 | 3.17k | fileKey[i] = fileKeyA[i]; |
1158 | 3.17k | } |
1159 | 506 | encVersion = encVersionA; |
1160 | 506 | encAlgorithm = encAlgorithmA; |
1161 | 506 | } |
1162 | | |
1163 | | GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA, |
1164 | | int *keyLengthA, int *encVersionA, |
1165 | 0 | CryptAlgorithm *encAlgorithmA) { |
1166 | 0 | if (!encrypted) { |
1167 | 0 | return gFalse; |
1168 | 0 | } |
1169 | 0 | *permFlagsA = permFlags; |
1170 | 0 | *ownerPasswordOkA = ownerPasswordOk; |
1171 | 0 | *keyLengthA = keyLength; |
1172 | 0 | *encVersionA = encVersion; |
1173 | 0 | *encAlgorithmA = encAlgorithm; |
1174 | 0 | return gTrue; |
1175 | 0 | } |
1176 | | |
1177 | 10.2k | GBool XRef::okToPrint(GBool ignoreOwnerPW) { |
1178 | 10.2k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint); |
1179 | 10.2k | } |
1180 | | |
1181 | 10.2k | GBool XRef::okToChange(GBool ignoreOwnerPW) { |
1182 | 10.2k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange); |
1183 | 10.2k | } |
1184 | | |
1185 | 10.2k | GBool XRef::okToCopy(GBool ignoreOwnerPW) { |
1186 | 10.2k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy); |
1187 | 10.2k | } |
1188 | | |
1189 | 10.2k | GBool XRef::okToAddNotes(GBool ignoreOwnerPW) { |
1190 | 10.2k | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes); |
1191 | 10.2k | } |
1192 | | |
1193 | 1.47M | Object *XRef::fetch(int num, int gen, Object *obj, int recursion) { |
1194 | 1.47M | XRefEntry *e; |
1195 | 1.47M | Parser *parser; |
1196 | 1.47M | Object obj1, obj2, obj3; |
1197 | 1.47M | XRefCacheEntry tmp; |
1198 | 1.47M | int i, j; |
1199 | | |
1200 | | // check for bogus ref - this can happen in corrupted PDF files |
1201 | 1.47M | if (num < 0 || num >= size) { |
1202 | 872 | goto err; |
1203 | 872 | } |
1204 | | |
1205 | | // check the cache |
1206 | 1.47M | #if MULTITHREADED |
1207 | 1.47M | gLockMutex(&cacheMutex); |
1208 | 1.47M | #endif |
1209 | 1.47M | if (cache[0].num == num && cache[0].gen == gen) { |
1210 | 423k | cache[0].obj.copy(obj); |
1211 | 423k | #if MULTITHREADED |
1212 | 423k | gUnlockMutex(&cacheMutex); |
1213 | 423k | #endif |
1214 | 423k | return obj; |
1215 | 423k | } |
1216 | 10.7M | for (i = 1; i < xrefCacheSize; ++i) { |
1217 | 10.1M | if (cache[i].num == num && cache[i].gen == gen) { |
1218 | 455k | tmp = cache[i]; |
1219 | 1.69M | for (j = i; j > 0; --j) { |
1220 | 1.24M | cache[j] = cache[j - 1]; |
1221 | 1.24M | } |
1222 | 455k | cache[0] = tmp; |
1223 | 455k | cache[0].obj.copy(obj); |
1224 | 455k | #if MULTITHREADED |
1225 | 455k | gUnlockMutex(&cacheMutex); |
1226 | 455k | #endif |
1227 | 455k | return obj; |
1228 | 455k | } |
1229 | 10.1M | } |
1230 | 592k | #if MULTITHREADED |
1231 | 592k | gUnlockMutex(&cacheMutex); |
1232 | 592k | #endif |
1233 | | |
1234 | 592k | e = &entries[num]; |
1235 | 592k | switch (e->type) { |
1236 | | |
1237 | 182k | case xrefEntryUncompressed: |
1238 | 182k | if (e->gen != gen) { |
1239 | 2.29k | goto err; |
1240 | 2.29k | } |
1241 | 180k | obj1.initNull(); |
1242 | 180k | parser = new Parser(this, |
1243 | 180k | new Lexer(this, |
1244 | 180k | str->makeSubStream(start + e->offset, gFalse, 0, &obj1)), |
1245 | 180k | gTrue); |
1246 | 180k | parser->getObj(&obj1, gTrue); |
1247 | 180k | parser->getObj(&obj2, gTrue); |
1248 | 180k | parser->getObj(&obj3, gTrue); |
1249 | 180k | if (!obj1.isInt() || obj1.getInt() != num || |
1250 | 180k | !obj2.isInt() || obj2.getInt() != gen || |
1251 | 180k | !obj3.isCmd("obj")) { |
1252 | 1.29k | obj1.free(); |
1253 | 1.29k | obj2.free(); |
1254 | 1.29k | obj3.free(); |
1255 | 1.29k | delete parser; |
1256 | 1.29k | goto err; |
1257 | 1.29k | } |
1258 | 178k | parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL, |
1259 | 178k | encAlgorithm, keyLength, num, gen, recursion); |
1260 | 178k | obj1.free(); |
1261 | 178k | obj2.free(); |
1262 | 178k | obj3.free(); |
1263 | 178k | delete parser; |
1264 | 178k | break; |
1265 | | |
1266 | 14.4k | case xrefEntryCompressed: |
1267 | | #if 0 // Adobe apparently ignores the generation number on compressed objects |
1268 | | if (gen != 0) { |
1269 | | goto err; |
1270 | | } |
1271 | | #endif |
1272 | 14.4k | if (e->offset >= (GFileOffset)size || |
1273 | 14.4k | entries[e->offset].type != xrefEntryUncompressed) { |
1274 | 287 | error(errSyntaxError, -1, "Invalid object stream"); |
1275 | 287 | goto err; |
1276 | 287 | } |
1277 | 14.1k | if (!getObjectStreamObject((int)e->offset, e->gen, num, obj, recursion)) { |
1278 | 828 | goto err; |
1279 | 828 | } |
1280 | 13.3k | break; |
1281 | | |
1282 | 395k | default: |
1283 | 395k | goto err; |
1284 | 592k | } |
1285 | | |
1286 | | // put the new object in the cache, throwing away the oldest object |
1287 | | // currently in the cache |
1288 | 192k | #if MULTITHREADED |
1289 | 192k | gLockMutex(&cacheMutex); |
1290 | 192k | #endif |
1291 | 192k | if (cache[xrefCacheSize - 1].num >= 0) { |
1292 | 39.7k | cache[xrefCacheSize - 1].obj.free(); |
1293 | 39.7k | } |
1294 | 3.07M | for (i = xrefCacheSize - 1; i > 0; --i) { |
1295 | 2.88M | cache[i] = cache[i - 1]; |
1296 | 2.88M | } |
1297 | 192k | cache[0].num = num; |
1298 | 192k | cache[0].gen = gen; |
1299 | 192k | obj->copy(&cache[0].obj); |
1300 | 192k | #if MULTITHREADED |
1301 | 192k | gUnlockMutex(&cacheMutex); |
1302 | 192k | #endif |
1303 | | |
1304 | 192k | return obj; |
1305 | | |
1306 | 400k | err: |
1307 | 400k | return obj->initNull(); |
1308 | 592k | } |
1309 | | |
1310 | | GBool XRef::getObjectStreamObject(int objStrNum, int objIdx, |
1311 | 14.1k | int objNum, Object *obj, int recursion) { |
1312 | 14.1k | if (recursion >= objectRecursionLimit) { |
1313 | 0 | return gFalse; |
1314 | 0 | } |
1315 | | |
1316 | | // check for a cached ObjectStream |
1317 | 14.1k | #if MULTITHREADED |
1318 | 14.1k | gLockMutex(&objStrsMutex); |
1319 | 14.1k | #endif |
1320 | 14.1k | ObjectStream *objStr = getObjectStreamFromCache(objStrNum); |
1321 | 14.1k | GBool found = gFalse; |
1322 | 14.1k | if (objStr) { |
1323 | 10.1k | objStr->getObject(objIdx, objNum, obj); |
1324 | 10.1k | cleanObjectStreamCache(); |
1325 | 10.1k | found = gTrue; |
1326 | 10.1k | } |
1327 | 14.1k | #if MULTITHREADED |
1328 | 14.1k | gUnlockMutex(&objStrsMutex); |
1329 | 14.1k | #endif |
1330 | 14.1k | if (found) { |
1331 | 10.1k | return gTrue; |
1332 | 10.1k | } |
1333 | | |
1334 | | // load a new ObjectStream |
1335 | 4.07k | objStr = new ObjectStream(this, objStrNum, recursion + 1); |
1336 | 4.07k | if (!objStr->isOk()) { |
1337 | 828 | delete objStr; |
1338 | 828 | return gFalse; |
1339 | 828 | } |
1340 | 3.24k | objStr->getObject(objIdx, objNum, obj); |
1341 | 3.24k | #if MULTITHREADED |
1342 | 3.24k | gLockMutex(&objStrsMutex); |
1343 | 3.24k | #endif |
1344 | 3.24k | addObjectStreamToCache(objStr); |
1345 | 3.24k | cleanObjectStreamCache(); |
1346 | 3.24k | #if MULTITHREADED |
1347 | 3.24k | gUnlockMutex(&objStrsMutex); |
1348 | 3.24k | #endif |
1349 | 3.24k | return gTrue; |
1350 | 4.07k | } |
1351 | | |
1352 | | // NB: objStrsMutex must be locked when calling this function. |
1353 | 14.1k | ObjectStream *XRef::getObjectStreamFromCache(int objStrNum) { |
1354 | | // check the MRU entry in the cache |
1355 | 14.1k | if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) { |
1356 | 8.60k | ObjectStream *objStr = objStrs[0]; |
1357 | 8.60k | objStrLastUse[0] = objStrTime++; |
1358 | 8.60k | return objStr; |
1359 | 8.60k | } |
1360 | | |
1361 | | // check the rest of the cache |
1362 | 10.7k | for (int i = 1; i < objStrCacheLength; ++i) { |
1363 | 6.63k | if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) { |
1364 | 1.51k | ObjectStream *objStr = objStrs[i]; |
1365 | 4.33k | for (int j = i; j > 0; --j) { |
1366 | 2.81k | objStrs[j] = objStrs[j - 1]; |
1367 | 2.81k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1368 | 2.81k | } |
1369 | 1.51k | objStrs[0] = objStr; |
1370 | 1.51k | objStrLastUse[0] = objStrTime++; |
1371 | 1.51k | return objStr; |
1372 | 1.51k | } |
1373 | 6.63k | } |
1374 | | |
1375 | 4.07k | return NULL; |
1376 | 5.59k | } |
1377 | | |
1378 | | // NB: objStrsMutex must be locked when calling this function. |
1379 | 3.24k | void XRef::addObjectStreamToCache(ObjectStream *objStr) { |
1380 | | // add to the cache |
1381 | 3.24k | if (objStrCacheLength == objStrCacheSize) { |
1382 | 0 | delete objStrs[objStrCacheSize - 1]; |
1383 | 0 | --objStrCacheLength; |
1384 | 0 | } |
1385 | 7.59k | for (int j = objStrCacheLength; j > 0; --j) { |
1386 | 4.35k | objStrs[j] = objStrs[j - 1]; |
1387 | 4.35k | objStrLastUse[j] = objStrLastUse[j - 1]; |
1388 | 4.35k | } |
1389 | 3.24k | ++objStrCacheLength; |
1390 | 3.24k | objStrs[0] = objStr; |
1391 | 3.24k | objStrLastUse[0] = objStrTime++; |
1392 | 3.24k | } |
1393 | | |
1394 | | // If the oldest (least recently used) entry in the object stream |
1395 | | // cache is more than objStrCacheTimeout accesses old (hasn't been |
1396 | | // used in the last objStrCacheTimeout accesses), eject it from the |
1397 | | // cache. |
1398 | | // NB: objStrsMutex must be locked when calling this function. |
1399 | 13.3k | void XRef::cleanObjectStreamCache() { |
1400 | | // NB: objStrTime and objStrLastUse[] are unsigned ints, so the |
1401 | | // mod-2^32 arithmetic makes the subtraction work out, even if the |
1402 | | // time wraps around. |
1403 | 13.3k | if (objStrCacheLength > 1 && |
1404 | 9.19k | objStrTime - objStrLastUse[objStrCacheLength - 1] |
1405 | 9.19k | > objStrCacheTimeout) { |
1406 | 0 | delete objStrs[objStrCacheLength - 1]; |
1407 | 0 | objStrs[objStrCacheLength - 1] = NULL; |
1408 | 0 | --objStrCacheLength; |
1409 | 0 | } |
1410 | 13.3k | } |
1411 | | |
1412 | 10.2k | Object *XRef::getDocInfo(Object *obj) { |
1413 | 10.2k | return trailerDict.dictLookup("Info", obj); |
1414 | 10.2k | } |
1415 | | |
1416 | | // Added for the pdftex project. |
1417 | 0 | Object *XRef::getDocInfoNF(Object *obj) { |
1418 | 0 | return trailerDict.dictLookupNF("Info", obj); |
1419 | 0 | } |
1420 | | |
1421 | 108k | GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) { |
1422 | 108k | int a, b, m; |
1423 | | |
1424 | 108k | if (streamEndsLen == 0 || |
1425 | 103k | streamStart > streamEnds[streamEndsLen - 1]) { |
1426 | 16.7k | return gFalse; |
1427 | 16.7k | } |
1428 | | |
1429 | 92.2k | a = -1; |
1430 | 92.2k | b = streamEndsLen - 1; |
1431 | | // invariant: streamEnds[a] < streamStart <= streamEnds[b] |
1432 | 416k | while (b - a > 1) { |
1433 | 323k | m = (a + b) / 2; |
1434 | 323k | if (streamStart <= streamEnds[m]) { |
1435 | 146k | b = m; |
1436 | 177k | } else { |
1437 | 177k | a = m; |
1438 | 177k | } |
1439 | 323k | } |
1440 | 92.2k | *streamEnd = streamEnds[b]; |
1441 | 92.2k | return gTrue; |
1442 | 108k | } |
1443 | | |
1444 | 553 | GFileOffset XRef::strToFileOffset(char *s) { |
1445 | 553 | GFileOffset x, d; |
1446 | 553 | char *p; |
1447 | | |
1448 | 553 | x = 0; |
1449 | 1.73k | for (p = s; *p && isdigit(*p & 0xff); ++p) { |
1450 | 1.17k | d = *p - '0'; |
1451 | 1.17k | if (x > (GFILEOFFSET_MAX - d) / 10) { |
1452 | 1 | break; |
1453 | 1 | } |
1454 | 1.17k | x = 10 * x + d; |
1455 | 1.17k | } |
1456 | 553 | return x; |
1457 | 553 | } |