/src/xpdf-4.05/xpdf/XRef.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // XRef.cc |
4 | | // |
5 | | // Copyright 1996-2003 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <stdlib.h> |
12 | | #include <stddef.h> |
13 | | #include <string.h> |
14 | | #include <ctype.h> |
15 | | #include <limits.h> |
16 | | #include "gmem.h" |
17 | | #include "gmempp.h" |
18 | | #include "gfile.h" |
19 | | #include "Object.h" |
20 | | #include "Stream.h" |
21 | | #include "Lexer.h" |
22 | | #include "Parser.h" |
23 | | #include "Dict.h" |
24 | | #include "Error.h" |
25 | | #include "ErrorCodes.h" |
26 | | #include "XRef.h" |
27 | | |
28 | | //------------------------------------------------------------------------ |
29 | | |
30 | 616 | #define xrefSearchSize 1024 // read this many bytes at end of file |
31 | | // to look for 'startxref' |
32 | | |
33 | | //------------------------------------------------------------------------ |
34 | | // Permission bits |
35 | | //------------------------------------------------------------------------ |
36 | | |
37 | 0 | #define permPrint (1<<2) |
38 | 0 | #define permChange (1<<3) |
39 | 0 | #define permCopy (1<<4) |
40 | 0 | #define permNotes (1<<5) |
41 | 616 | #define defPermFlags 0xfffc |
42 | | |
43 | | //------------------------------------------------------------------------ |
44 | | // XRefPosSet |
45 | | //------------------------------------------------------------------------ |
46 | | |
47 | | class XRefPosSet { |
48 | | public: |
49 | | |
50 | | XRefPosSet(); |
51 | | ~XRefPosSet(); |
52 | | void add(GFileOffset pos); |
53 | | GBool check(GFileOffset pos); |
54 | 13 | int getLength() { return len; } |
55 | 33 | GFileOffset get(int idx) { return tab[idx]; } |
56 | | |
57 | | private: |
58 | | |
59 | | int find(GFileOffset pos); |
60 | | |
61 | | GFileOffset *tab; |
62 | | int size; |
63 | | int len; |
64 | | }; |
65 | | |
66 | 13 | XRefPosSet::XRefPosSet() { |
67 | 13 | size = 16; |
68 | 13 | len = 0; |
69 | 13 | tab = (GFileOffset *)gmallocn(size, sizeof(GFileOffset)); |
70 | 13 | } |
71 | | |
72 | 13 | XRefPosSet::~XRefPosSet() { |
73 | 13 | gfree(tab); |
74 | 13 | } |
75 | | |
76 | 33 | void XRefPosSet::add(GFileOffset pos) { |
77 | 33 | int i; |
78 | | |
79 | 33 | i = find(pos); |
80 | 33 | if (i < len && tab[i] == pos) { |
81 | 0 | return; |
82 | 0 | } |
83 | 33 | if (len == size) { |
84 | 0 | if (size > INT_MAX / 2) { |
85 | 0 | gMemError("Integer overflow in XRefPosSet::add()"); |
86 | 0 | } |
87 | 0 | size *= 2; |
88 | 0 | tab = (GFileOffset *)greallocn(tab, size, sizeof(GFileOffset)); |
89 | 0 | } |
90 | 33 | if (i < len) { |
91 | 16 | memmove(&tab[i + 1], &tab[i], (len - i) * sizeof(GFileOffset)); |
92 | 16 | } |
93 | 33 | tab[i] = pos; |
94 | 33 | ++len; |
95 | 33 | } |
96 | | |
97 | 34 | GBool XRefPosSet::check(GFileOffset pos) { |
98 | 34 | int i; |
99 | | |
100 | 34 | i = find(pos); |
101 | 34 | return i < len && tab[i] == pos; |
102 | 34 | } |
103 | | |
104 | 67 | int XRefPosSet::find(GFileOffset pos) { |
105 | 67 | int a, b, m; |
106 | | |
107 | 67 | a = - 1; |
108 | 67 | b = len; |
109 | | // invariant: tab[a] < pos < tab[b] |
110 | 124 | while (b - a > 1) { |
111 | 58 | m = (a + b) / 2; |
112 | 58 | if (tab[m] < pos) { |
113 | 13 | a = m; |
114 | 45 | } else if (tab[m] > pos) { |
115 | 44 | b = m; |
116 | 44 | } else { |
117 | 1 | return m; |
118 | 1 | } |
119 | 58 | } |
120 | 66 | return b; |
121 | 67 | } |
122 | | |
123 | | //------------------------------------------------------------------------ |
124 | | // ObjectStream |
125 | | //------------------------------------------------------------------------ |
126 | | |
127 | | class ObjectStream { |
128 | | public: |
129 | | |
130 | | // Create an object stream, using object number <objStrNum>, |
131 | | // generation 0. |
132 | | ObjectStream(XRef *xref, int objStrNumA, int recursion); |
133 | | |
134 | 3.89k | GBool isOk() { return ok; } |
135 | | |
136 | | ~ObjectStream(); |
137 | | |
138 | | // Return the object number of this object stream. |
139 | 15.8k | int getObjStrNum() { return objStrNum; } |
140 | | |
141 | | // Get the <objIdx>th object from this stream, which should be |
142 | | // object number <objNum>, generation 0. |
143 | | Object *getObject(int objIdx, int objNum, Object *obj); |
144 | | |
145 | | private: |
146 | | |
147 | | int objStrNum; // object number of the object stream |
148 | | int nObjects; // number of objects in the stream |
149 | | Object *objs; // the objects (length = nObjects) |
150 | | int *objNums; // the object numbers (length = nObjects) |
151 | | GBool ok; |
152 | | }; |
153 | | |
154 | 3.89k | ObjectStream::ObjectStream(XRef *xref, int objStrNumA, int recursion) { |
155 | 3.89k | Stream *str; |
156 | 3.89k | Lexer *lexer; |
157 | 3.89k | Parser *parser; |
158 | 3.89k | int *offsets; |
159 | 3.89k | Object objStr, obj1, obj2; |
160 | 3.89k | int first, i; |
161 | | |
162 | 3.89k | objStrNum = objStrNumA; |
163 | 3.89k | nObjects = 0; |
164 | 3.89k | objs = NULL; |
165 | 3.89k | objNums = NULL; |
166 | 3.89k | ok = gFalse; |
167 | | |
168 | 3.89k | if (!xref->fetch(objStrNum, 0, &objStr, recursion)->isStream()) { |
169 | 12 | goto err1; |
170 | 12 | } |
171 | | |
172 | 3.88k | if (!objStr.streamGetDict()->lookup("N", &obj1)->isInt()) { |
173 | 0 | obj1.free(); |
174 | 0 | goto err1; |
175 | 0 | } |
176 | 3.88k | nObjects = obj1.getInt(); |
177 | 3.88k | obj1.free(); |
178 | 3.88k | if (nObjects <= 0) { |
179 | 0 | goto err1; |
180 | 0 | } |
181 | | |
182 | 3.88k | if (!objStr.streamGetDict()->lookup("First", &obj1)->isInt()) { |
183 | 297 | obj1.free(); |
184 | 297 | goto err1; |
185 | 297 | } |
186 | 3.59k | first = obj1.getInt(); |
187 | 3.59k | obj1.free(); |
188 | 3.59k | if (first < 0) { |
189 | 0 | goto err1; |
190 | 0 | } |
191 | | |
192 | | // this is an arbitrary limit to avoid integer overflow problems |
193 | | // in the 'new Object[nObjects]' call (Acrobat apparently limits |
194 | | // object streams to 100-200 objects) |
195 | 3.59k | if (nObjects > 1000000) { |
196 | 0 | error(errSyntaxError, -1, "Too many objects in an object stream"); |
197 | 0 | goto err1; |
198 | 0 | } |
199 | 3.59k | objs = new Object[nObjects]; |
200 | 3.59k | objNums = (int *)gmallocn(nObjects, sizeof(int)); |
201 | 3.59k | offsets = (int *)gmallocn(nObjects, sizeof(int)); |
202 | | |
203 | | // parse the header: object numbers and offsets |
204 | 3.59k | objStr.streamReset(); |
205 | 3.59k | obj1.initNull(); |
206 | 3.59k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, first); |
207 | 3.59k | lexer = new Lexer(xref, str); |
208 | 3.59k | parser = new Parser(xref, lexer, gFalse); |
209 | 135k | for (i = 0; i < nObjects; ++i) { |
210 | 135k | parser->getObj(&obj1, gTrue); |
211 | 135k | parser->getObj(&obj2, gTrue); |
212 | 135k | if (!obj1.isInt() || !obj2.isInt()) { |
213 | 695 | obj1.free(); |
214 | 695 | obj2.free(); |
215 | 695 | delete parser; |
216 | 695 | gfree(offsets); |
217 | 695 | goto err2; |
218 | 695 | } |
219 | 134k | objNums[i] = obj1.getInt(); |
220 | 134k | offsets[i] = obj2.getInt(); |
221 | 134k | obj1.free(); |
222 | 134k | obj2.free(); |
223 | 134k | if (objNums[i] < 0 || offsets[i] < 0 || |
224 | 134k | (i > 0 && offsets[i] < offsets[i-1])) { |
225 | 2.60k | delete parser; |
226 | 2.60k | gfree(offsets); |
227 | 2.60k | goto err2; |
228 | 2.60k | } |
229 | 134k | } |
230 | 286 | lexer->skipToEOF(); |
231 | 286 | delete parser; |
232 | | |
233 | | // skip to the first object - this generally shouldn't be needed, |
234 | | // because offsets[0] is normally 0, but just in case... |
235 | 286 | if (offsets[0] > 0) { |
236 | 5 | objStr.getStream()->discardChars(offsets[0]); |
237 | 5 | } |
238 | | |
239 | | // parse the objects |
240 | 6.31k | for (i = 0; i < nObjects; ++i) { |
241 | 6.03k | obj1.initNull(); |
242 | 6.03k | if (i == nObjects - 1) { |
243 | 286 | str = new EmbedStream(objStr.getStream(), &obj1, gFalse, 0); |
244 | 5.74k | } else { |
245 | 5.74k | str = new EmbedStream(objStr.getStream(), &obj1, gTrue, |
246 | 5.74k | offsets[i+1] - offsets[i]); |
247 | 5.74k | } |
248 | 6.03k | lexer = new Lexer(xref, str); |
249 | 6.03k | parser = new Parser(xref, lexer, gFalse); |
250 | 6.03k | parser->getObj(&objs[i]); |
251 | 6.03k | lexer->skipToEOF(); |
252 | 6.03k | delete parser; |
253 | 6.03k | } |
254 | | |
255 | 286 | gfree(offsets); |
256 | 286 | ok = gTrue; |
257 | | |
258 | 3.59k | err2: |
259 | 3.59k | objStr.streamClose(); |
260 | 3.89k | err1: |
261 | 3.89k | objStr.free(); |
262 | 3.89k | } |
263 | | |
264 | 3.89k | ObjectStream::~ObjectStream() { |
265 | 3.89k | int i; |
266 | | |
267 | 3.89k | if (objs) { |
268 | 288k | for (i = 0; i < nObjects; ++i) { |
269 | 284k | objs[i].free(); |
270 | 284k | } |
271 | 3.59k | delete[] objs; |
272 | 3.59k | } |
273 | 3.89k | gfree(objNums); |
274 | 3.89k | } |
275 | | |
276 | 7.38k | Object *ObjectStream::getObject(int objIdx, int objNum, Object *obj) { |
277 | 7.38k | if (objIdx < 0 || objIdx >= nObjects || objNum != objNums[objIdx]) { |
278 | 0 | obj->initNull(); |
279 | 7.38k | } else { |
280 | 7.38k | objs[objIdx].copy(obj); |
281 | 7.38k | } |
282 | 7.38k | return obj; |
283 | 7.38k | } |
284 | | |
285 | | //------------------------------------------------------------------------ |
286 | | // XRef |
287 | | //------------------------------------------------------------------------ |
288 | | |
289 | 616 | XRef::XRef(BaseStream *strA, GBool repair) { |
290 | 616 | GFileOffset pos; |
291 | 616 | Object obj; |
292 | 616 | XRefPosSet *posSet; |
293 | 616 | int i; |
294 | | |
295 | 616 | ok = gTrue; |
296 | 616 | errCode = errNone; |
297 | 616 | repaired = gFalse; |
298 | 616 | size = 0; |
299 | 616 | last = -1; |
300 | 616 | entries = NULL; |
301 | 616 | lastStartxrefPos = 0; |
302 | 616 | xrefTablePos = NULL; |
303 | 616 | xrefTablePosLen = 0; |
304 | 616 | streamEnds = NULL; |
305 | 616 | streamEndsLen = 0; |
306 | 79.4k | for (i = 0; i < objStrCacheSize; ++i) { |
307 | 78.8k | objStrs[i] = NULL; |
308 | 78.8k | objStrLastUse[i] = 0; |
309 | 78.8k | } |
310 | 616 | objStrCacheLength = 0; |
311 | 616 | objStrTime = 0; |
312 | | |
313 | 616 | encrypted = gFalse; |
314 | 616 | permFlags = defPermFlags; |
315 | 616 | ownerPasswordOk = gFalse; |
316 | | |
317 | 10.4k | for (i = 0; i < xrefCacheSize; ++i) { |
318 | 9.85k | cache[i].num = -1; |
319 | 9.85k | } |
320 | | |
321 | 616 | #if MULTITHREADED |
322 | 616 | gInitMutex(&objStrsMutex); |
323 | 616 | gInitMutex(&cacheMutex); |
324 | 616 | #endif |
325 | | |
326 | 616 | str = strA; |
327 | 616 | start = str->getStart(); |
328 | | |
329 | | // if the 'repair' flag is set, try to reconstruct the xref table |
330 | 616 | if (repair) { |
331 | 308 | if (!(ok = constructXRef())) { |
332 | 43 | errCode = errDamaged; |
333 | 43 | return; |
334 | 43 | } |
335 | 265 | repaired = gTrue; |
336 | | |
337 | | // if the 'repair' flag is not set, read the xref table |
338 | 308 | } else { |
339 | | |
340 | | // read the trailer |
341 | 308 | pos = getStartXref(); |
342 | 308 | if (pos == 0) { |
343 | 295 | errCode = errDamaged; |
344 | 295 | ok = gFalse; |
345 | 295 | return; |
346 | 295 | } |
347 | | |
348 | | // read the xref table |
349 | 13 | posSet = new XRefPosSet(); |
350 | 32 | while (readXRef(&pos, posSet, gFalse)) ; |
351 | 13 | xrefTablePosLen = posSet->getLength(); |
352 | 13 | xrefTablePos = (GFileOffset *)gmallocn(xrefTablePosLen, |
353 | 13 | sizeof(GFileOffset)); |
354 | 46 | for (i = 0; i < xrefTablePosLen; ++i) { |
355 | 33 | xrefTablePos[i] = posSet->get(i); |
356 | 33 | } |
357 | 13 | delete posSet; |
358 | 13 | if (!ok) { |
359 | 11 | errCode = errDamaged; |
360 | 11 | return; |
361 | 11 | } |
362 | 13 | } |
363 | | |
364 | | // get the root dictionary (catalog) object |
365 | 267 | trailerDict.dictLookupNF("Root", &obj); |
366 | 267 | if (obj.isRef()) { |
367 | 264 | rootNum = obj.getRefNum(); |
368 | 264 | rootGen = obj.getRefGen(); |
369 | 264 | obj.free(); |
370 | 264 | } else { |
371 | 3 | obj.free(); |
372 | 3 | if (!(ok = constructXRef())) { |
373 | 1 | errCode = errDamaged; |
374 | 1 | return; |
375 | 1 | } |
376 | 3 | } |
377 | | |
378 | | // now set the trailer dictionary's xref pointer so we can fetch |
379 | | // indirect objects from it |
380 | 266 | trailerDict.getDict()->setXRef(this); |
381 | 266 | } |
382 | | |
383 | 615 | XRef::~XRef() { |
384 | 615 | int i; |
385 | | |
386 | 10.4k | for (i = 0; i < xrefCacheSize; ++i) { |
387 | 9.84k | if (cache[i].num >= 0) { |
388 | 3.46k | cache[i].obj.free(); |
389 | 3.46k | } |
390 | 9.84k | } |
391 | 615 | gfree(entries); |
392 | 615 | trailerDict.free(); |
393 | 615 | if (xrefTablePos) { |
394 | 13 | gfree(xrefTablePos); |
395 | 13 | } |
396 | 615 | if (streamEnds) { |
397 | 237 | gfree(streamEnds); |
398 | 237 | } |
399 | 79.3k | for (i = 0; i < objStrCacheSize; ++i) { |
400 | 78.7k | if (objStrs[i]) { |
401 | 286 | delete objStrs[i]; |
402 | 286 | } |
403 | 78.7k | } |
404 | 615 | #if MULTITHREADED |
405 | 615 | gDestroyMutex(&objStrsMutex); |
406 | 615 | gDestroyMutex(&cacheMutex); |
407 | 615 | #endif |
408 | 615 | } |
409 | | |
410 | | // Read the 'startxref' position. |
411 | 308 | GFileOffset XRef::getStartXref() { |
412 | 308 | char buf[xrefSearchSize+1]; |
413 | 308 | char *p; |
414 | 308 | int n, i; |
415 | | |
416 | | // read last xrefSearchSize bytes |
417 | 308 | str->setPos(xrefSearchSize, -1); |
418 | 308 | n = str->getBlock(buf, xrefSearchSize); |
419 | 308 | buf[n] = '\0'; |
420 | | |
421 | | // find startxref |
422 | 304k | for (i = n - 9; i >= 0; --i) { |
423 | 304k | if (!strncmp(&buf[i], "startxref", 9)) { |
424 | 14 | break; |
425 | 14 | } |
426 | 304k | } |
427 | 308 | if (i < 0) { |
428 | 294 | return 0; |
429 | 294 | } |
430 | 36 | for (p = &buf[i+9]; isspace(*p & 0xff); ++p) ; |
431 | 14 | lastXRefPos = strToFileOffset(p); |
432 | 14 | lastStartxrefPos = str->getPos() - n + i; |
433 | | |
434 | 14 | return lastXRefPos; |
435 | 308 | } |
436 | | |
437 | | // Read one xref table section. Also reads the associated trailer |
438 | | // dictionary, and returns the prev pointer (if any). The [hybrid] |
439 | | // flag is true when following the XRefStm link in a hybrid-reference |
440 | | // file. |
441 | 34 | GBool XRef::readXRef(GFileOffset *pos, XRefPosSet *posSet, GBool hybrid) { |
442 | 34 | Parser *parser; |
443 | 34 | Object obj; |
444 | 34 | GBool more; |
445 | 34 | char buf[100]; |
446 | 34 | int n, i; |
447 | | |
448 | | // check for a loop in the xref tables |
449 | 34 | if (posSet->check(*pos)) { |
450 | 1 | error(errSyntaxWarning, -1, "Infinite loop in xref table"); |
451 | 1 | return gFalse; |
452 | 1 | } |
453 | 33 | posSet->add(*pos); |
454 | | |
455 | | // the xref data should either be "xref ..." (for an xref table) or |
456 | | // "nn gg obj << ... >> stream ..." (for an xref stream); possibly |
457 | | // preceded by whitespace |
458 | 33 | str->setPos(start + *pos); |
459 | 33 | n = str->getBlock(buf, 100); |
460 | 55 | for (i = 0; i < n && Lexer::isSpace(buf[i]); ++i) ; |
461 | | |
462 | | // parse an old-style xref table |
463 | 33 | if (!hybrid && |
464 | 31 | i + 4 < n && |
465 | 28 | buf[i] == 'x' && buf[i+1] == 'r' && buf[i+2] == 'e' && buf[i+3] == 'f' && |
466 | 15 | Lexer::isSpace(buf[i+4])) { |
467 | 15 | more = readXRefTable(pos, i + 5, posSet); |
468 | | |
469 | | // parse an xref stream |
470 | 18 | } else { |
471 | 18 | obj.initNull(); |
472 | 18 | parser = new Parser(NULL, |
473 | 18 | new Lexer(NULL, |
474 | 18 | str->makeSubStream(start + *pos, gFalse, 0, &obj)), |
475 | 18 | gTrue); |
476 | 18 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
477 | 8 | goto err; |
478 | 8 | } |
479 | 10 | obj.free(); |
480 | 10 | if (!parser->getObj(&obj, gTrue)->isInt()) { |
481 | 0 | goto err; |
482 | 0 | } |
483 | 10 | obj.free(); |
484 | 10 | if (!parser->getObj(&obj, gTrue)->isCmd("obj")) { |
485 | 1 | goto err; |
486 | 1 | } |
487 | 9 | obj.free(); |
488 | 9 | if (!parser->getObj(&obj)->isStream()) { |
489 | 0 | goto err; |
490 | 0 | } |
491 | 9 | more = readXRefStream(obj.getStream(), pos, hybrid); |
492 | 9 | obj.free(); |
493 | 9 | delete parser; |
494 | 9 | } |
495 | | |
496 | 24 | return more; |
497 | | |
498 | 9 | err: |
499 | 9 | obj.free(); |
500 | 9 | delete parser; |
501 | 9 | ok = gFalse; |
502 | 9 | return gFalse; |
503 | 33 | } |
504 | | |
505 | 15 | GBool XRef::readXRefTable(GFileOffset *pos, int offset, XRefPosSet *posSet) { |
506 | 15 | XRefEntry entry; |
507 | 15 | Parser *parser; |
508 | 15 | Object obj, obj2; |
509 | 15 | char buf[6]; |
510 | 15 | GFileOffset off, pos2; |
511 | 15 | GBool more; |
512 | 15 | int first, n, digit, newSize, gen, i, c; |
513 | | |
514 | 15 | str->setPos(start + *pos + offset); |
515 | | |
516 | 41 | while (1) { |
517 | 89 | do { |
518 | 89 | c = str->getChar(); |
519 | 89 | } while (Lexer::isSpace(c)); |
520 | 41 | if (c == 't') { |
521 | 13 | if (str->getBlock(buf, 6) != 6 || memcmp(buf, "railer", 6)) { |
522 | 0 | goto err1; |
523 | 0 | } |
524 | 13 | break; |
525 | 13 | } |
526 | 28 | if (c < '0' || c > '9') { |
527 | 0 | goto err1; |
528 | 0 | } |
529 | 28 | first = 0; |
530 | 54 | do { |
531 | 54 | digit = c - '0'; |
532 | 54 | if (first > (INT_MAX - digit) / 10) { |
533 | 0 | goto err1; |
534 | 0 | } |
535 | 54 | first = (first * 10) + digit; |
536 | 54 | c = str->getChar(); |
537 | 54 | } while (c >= '0' && c <= '9'); |
538 | 28 | if (!Lexer::isSpace(c)) { |
539 | 0 | goto err1; |
540 | 0 | } |
541 | 28 | do { |
542 | 28 | c = str->getChar(); |
543 | 28 | } while (Lexer::isSpace(c)); |
544 | 28 | n = 0; |
545 | 28 | do { |
546 | 28 | digit = c - '0'; |
547 | 28 | if (n > (INT_MAX - digit) / 10) { |
548 | 0 | goto err1; |
549 | 0 | } |
550 | 28 | n = (n * 10) + digit; |
551 | 28 | c = str->getChar(); |
552 | 28 | } while (c >= '0' && c <= '9'); |
553 | 28 | if (!Lexer::isSpace(c)) { |
554 | 0 | goto err1; |
555 | 0 | } |
556 | 28 | if (first > INT_MAX - n) { |
557 | 0 | goto err1; |
558 | 0 | } |
559 | 28 | if (first + n > size) { |
560 | 5 | for (newSize = size ? 2 * size : 1024; |
561 | 5 | first + n > newSize && newSize > 0; |
562 | 5 | newSize <<= 1) ; |
563 | 5 | if (newSize < 0) { |
564 | 0 | goto err1; |
565 | 0 | } |
566 | 5 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
567 | 5.12k | for (i = size; i < newSize; ++i) { |
568 | 5.12k | entries[i].offset = (GFileOffset)-1; |
569 | 5.12k | entries[i].type = xrefEntryFree; |
570 | 5.12k | } |
571 | 5 | size = newSize; |
572 | 5 | } |
573 | 63 | for (i = first; i < first + n; ++i) { |
574 | 74 | do { |
575 | 74 | c = str->getChar(); |
576 | 74 | } while (Lexer::isSpace(c)); |
577 | 37 | off = 0; |
578 | 350 | do { |
579 | 350 | off = (off * 10) + (c - '0'); |
580 | 350 | c = str->getChar(); |
581 | 350 | } while (c >= '0' && c <= '9'); |
582 | 37 | if (!Lexer::isSpace(c)) { |
583 | 2 | goto err1; |
584 | 2 | } |
585 | 35 | entry.offset = off; |
586 | 35 | do { |
587 | 35 | c = str->getChar(); |
588 | 35 | } while (Lexer::isSpace(c)); |
589 | 35 | gen = 0; |
590 | 175 | do { |
591 | 175 | gen = (gen * 10) + (c - '0'); |
592 | 175 | c = str->getChar(); |
593 | 175 | } while (c >= '0' && c <= '9'); |
594 | 35 | if (!Lexer::isSpace(c)) { |
595 | 0 | goto err1; |
596 | 0 | } |
597 | 35 | entry.gen = gen; |
598 | 35 | do { |
599 | 35 | c = str->getChar(); |
600 | 35 | } while (Lexer::isSpace(c)); |
601 | 35 | if (c == 'n') { |
602 | 25 | entry.type = xrefEntryUncompressed; |
603 | 25 | } else if (c == 'f') { |
604 | 10 | entry.type = xrefEntryFree; |
605 | 10 | } else { |
606 | 0 | goto err1; |
607 | 0 | } |
608 | 35 | c = str->getChar(); |
609 | 35 | if (!Lexer::isSpace(c)) { |
610 | 0 | goto err1; |
611 | 0 | } |
612 | 35 | if (entries[i].offset == (GFileOffset)-1) { |
613 | 27 | entries[i] = entry; |
614 | | // PDF files of patents from the IBM Intellectual Property |
615 | | // Network have a bug: the xref table claims to start at 1 |
616 | | // instead of 0. |
617 | 27 | if (i == 1 && first == 1 && |
618 | 0 | entries[1].offset == 0 && entries[1].gen == 65535 && |
619 | 0 | entries[1].type == xrefEntryFree) { |
620 | 0 | i = first = 0; |
621 | 0 | entries[0] = entries[1]; |
622 | 0 | entries[1].offset = (GFileOffset)-1; |
623 | 0 | } |
624 | 27 | if (i > last) { |
625 | 12 | last = i; |
626 | 12 | } |
627 | 27 | } |
628 | 35 | } |
629 | 28 | } |
630 | | |
631 | | // read the trailer dictionary |
632 | 13 | obj.initNull(); |
633 | 13 | parser = new Parser(NULL, |
634 | 13 | new Lexer(NULL, |
635 | 13 | str->makeSubStream(str->getPos(), gFalse, 0, &obj)), |
636 | 13 | gTrue); |
637 | 13 | parser->getObj(&obj); |
638 | 13 | delete parser; |
639 | 13 | if (!obj.isDict()) { |
640 | 0 | obj.free(); |
641 | 0 | goto err1; |
642 | 0 | } |
643 | | |
644 | | // get the 'Prev' pointer |
645 | | //~ this can be a 64-bit int (?) |
646 | 13 | obj.getDict()->lookupNF("Prev", &obj2); |
647 | 13 | if (obj2.isInt()) { |
648 | 12 | *pos = (GFileOffset)(Guint)obj2.getInt(); |
649 | 12 | more = gTrue; |
650 | 12 | } else if (obj2.isRef()) { |
651 | | // certain buggy PDF generators generate "/Prev NNN 0 R" instead |
652 | | // of "/Prev NNN" |
653 | 0 | *pos = (GFileOffset)(Guint)obj2.getRefNum(); |
654 | 0 | more = gTrue; |
655 | 1 | } else { |
656 | 1 | more = gFalse; |
657 | 1 | } |
658 | 13 | obj2.free(); |
659 | | |
660 | | // save the first trailer dictionary |
661 | 13 | if (trailerDict.isNone()) { |
662 | 5 | obj.copy(&trailerDict); |
663 | 5 | } |
664 | | |
665 | | // check for an 'XRefStm' key |
666 | | //~ this can be a 64-bit int (?) |
667 | 13 | if (obj.getDict()->lookup("XRefStm", &obj2)->isInt()) { |
668 | 2 | pos2 = (GFileOffset)(Guint)obj2.getInt(); |
669 | 2 | readXRef(&pos2, posSet, gTrue); |
670 | 2 | if (!ok) { |
671 | 0 | obj2.free(); |
672 | 0 | obj.free(); |
673 | 0 | goto err1; |
674 | 0 | } |
675 | 2 | } |
676 | 13 | obj2.free(); |
677 | | |
678 | 13 | obj.free(); |
679 | 13 | return more; |
680 | | |
681 | 2 | err1: |
682 | 2 | ok = gFalse; |
683 | 2 | return gFalse; |
684 | 13 | } |
685 | | |
686 | 9 | GBool XRef::readXRefStream(Stream *xrefStr, GFileOffset *pos, GBool hybrid) { |
687 | 9 | Dict *dict; |
688 | 9 | int w[3]; |
689 | 9 | GBool more; |
690 | 9 | Object obj, obj2, idx; |
691 | 9 | int newSize, first, n, i; |
692 | | |
693 | 9 | dict = xrefStr->getDict(); |
694 | | |
695 | 9 | if (!dict->lookupNF("Size", &obj)->isInt()) { |
696 | 0 | goto err1; |
697 | 0 | } |
698 | 9 | newSize = obj.getInt(); |
699 | 9 | obj.free(); |
700 | 9 | if (newSize < 0) { |
701 | 0 | goto err1; |
702 | 0 | } |
703 | 9 | if (newSize > size) { |
704 | 4 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
705 | 4.19k | for (i = size; i < newSize; ++i) { |
706 | 4.19k | entries[i].offset = (GFileOffset)-1; |
707 | 4.19k | entries[i].type = xrefEntryFree; |
708 | 4.19k | } |
709 | 4 | size = newSize; |
710 | 4 | } |
711 | | |
712 | 9 | if (!dict->lookupNF("W", &obj)->isArray() || |
713 | 9 | obj.arrayGetLength() < 3) { |
714 | 0 | goto err1; |
715 | 0 | } |
716 | 36 | for (i = 0; i < 3; ++i) { |
717 | 27 | if (!obj.arrayGet(i, &obj2)->isInt()) { |
718 | 0 | obj2.free(); |
719 | 0 | goto err1; |
720 | 0 | } |
721 | 27 | w[i] = obj2.getInt(); |
722 | 27 | obj2.free(); |
723 | 27 | } |
724 | 9 | obj.free(); |
725 | 9 | if (w[0] < 0 || w[0] > 8 || |
726 | 9 | w[1] < 0 || w[1] > 8 || |
727 | 9 | w[2] < 0 || w[2] > 8) { |
728 | 0 | goto err0; |
729 | 0 | } |
730 | | |
731 | 9 | xrefStr->reset(); |
732 | 9 | dict->lookupNF("Index", &idx); |
733 | 9 | if (idx.isArray()) { |
734 | 54 | for (i = 0; i+1 < idx.arrayGetLength(); i += 2) { |
735 | 45 | if (!idx.arrayGet(i, &obj)->isInt()) { |
736 | 0 | idx.free(); |
737 | 0 | goto err1; |
738 | 0 | } |
739 | 45 | first = obj.getInt(); |
740 | 45 | obj.free(); |
741 | 45 | if (!idx.arrayGet(i+1, &obj)->isInt()) { |
742 | 0 | idx.free(); |
743 | 0 | goto err1; |
744 | 0 | } |
745 | 45 | n = obj.getInt(); |
746 | 45 | obj.free(); |
747 | 45 | if (first < 0 || n < 0 || |
748 | 45 | !readXRefStreamSection(xrefStr, w, first, n)) { |
749 | 0 | idx.free(); |
750 | 0 | goto err0; |
751 | 0 | } |
752 | 45 | } |
753 | 9 | } else { |
754 | 0 | if (!readXRefStreamSection(xrefStr, w, 0, newSize)) { |
755 | 0 | idx.free(); |
756 | 0 | goto err0; |
757 | 0 | } |
758 | 0 | } |
759 | 9 | idx.free(); |
760 | | |
761 | | //~ this can be a 64-bit int (?) |
762 | 9 | dict->lookupNF("Prev", &obj); |
763 | 9 | if (obj.isInt()) { |
764 | 9 | *pos = (GFileOffset)(Guint)obj.getInt(); |
765 | 9 | more = gTrue; |
766 | 9 | } else { |
767 | 0 | more = gFalse; |
768 | 0 | } |
769 | 9 | obj.free(); |
770 | 9 | if (trailerDict.isNone()) { |
771 | 4 | trailerDict.initDict(dict); |
772 | 4 | } |
773 | | |
774 | 9 | return more; |
775 | | |
776 | 0 | err1: |
777 | 0 | obj.free(); |
778 | 0 | err0: |
779 | 0 | ok = gFalse; |
780 | 0 | return gFalse; |
781 | 0 | } |
782 | | |
783 | 45 | GBool XRef::readXRefStreamSection(Stream *xrefStr, int *w, int first, int n) { |
784 | 45 | long long type, gen, offset; |
785 | 45 | int c, newSize, i, j; |
786 | | |
787 | 45 | if (first + n < 0) { |
788 | 0 | return gFalse; |
789 | 0 | } |
790 | 45 | if (first + n > size) { |
791 | 0 | for (newSize = size ? 2 * size : 1024; |
792 | 0 | first + n > newSize && newSize > 0; |
793 | 0 | newSize <<= 1) ; |
794 | 0 | if (newSize < 0) { |
795 | 0 | return gFalse; |
796 | 0 | } |
797 | 0 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
798 | 0 | for (i = size; i < newSize; ++i) { |
799 | 0 | entries[i].offset = (GFileOffset)-1; |
800 | 0 | entries[i].type = xrefEntryFree; |
801 | 0 | } |
802 | 0 | size = newSize; |
803 | 0 | } |
804 | 191 | for (i = first; i < first + n; ++i) { |
805 | 146 | if (w[0] == 0) { |
806 | 0 | type = 1; |
807 | 146 | } else { |
808 | 292 | for (type = 0, j = 0; j < w[0]; ++j) { |
809 | 146 | if ((c = xrefStr->getChar()) == EOF) { |
810 | 0 | return gFalse; |
811 | 0 | } |
812 | 146 | type = (type << 8) + c; |
813 | 146 | } |
814 | 146 | } |
815 | 526 | for (offset = 0, j = 0; j < w[1]; ++j) { |
816 | 380 | if ((c = xrefStr->getChar()) == EOF) { |
817 | 0 | return gFalse; |
818 | 0 | } |
819 | 380 | offset = (offset << 8) + c; |
820 | 380 | } |
821 | 146 | if (offset < 0 || offset > GFILEOFFSET_MAX) { |
822 | 0 | return gFalse; |
823 | 0 | } |
824 | 346 | for (gen = 0, j = 0; j < w[2]; ++j) { |
825 | 200 | if ((c = xrefStr->getChar()) == EOF) { |
826 | 0 | return gFalse; |
827 | 0 | } |
828 | 200 | gen = (gen << 8) + c; |
829 | 200 | } |
830 | | // some PDF generators include a free entry with gen=0xffffffff |
831 | 146 | if ((gen < 0 || gen > INT_MAX) && type != 0) { |
832 | 0 | return gFalse; |
833 | 0 | } |
834 | 146 | if (entries[i].offset == (GFileOffset)-1) { |
835 | 111 | switch (type) { |
836 | 8 | case 0: |
837 | 8 | entries[i].offset = (GFileOffset)offset; |
838 | 8 | entries[i].gen = (int)gen; |
839 | 8 | entries[i].type = xrefEntryFree; |
840 | 8 | break; |
841 | 75 | case 1: |
842 | 75 | entries[i].offset = (GFileOffset)offset; |
843 | 75 | entries[i].gen = (int)gen; |
844 | 75 | entries[i].type = xrefEntryUncompressed; |
845 | 75 | break; |
846 | 28 | case 2: |
847 | 28 | entries[i].offset = (GFileOffset)offset; |
848 | 28 | entries[i].gen = (int)gen; |
849 | 28 | entries[i].type = xrefEntryCompressed; |
850 | 28 | break; |
851 | 0 | default: |
852 | 0 | return gFalse; |
853 | 111 | } |
854 | 111 | if (i > last) { |
855 | 103 | last = i; |
856 | 103 | } |
857 | 111 | } |
858 | 146 | } |
859 | | |
860 | 45 | return gTrue; |
861 | 45 | } |
862 | | |
863 | | // Attempt to construct an xref table for a damaged file. |
864 | 310 | GBool XRef::constructXRef() { |
865 | 310 | int *streamObjNums = NULL; |
866 | 310 | int streamObjNumsLen = 0; |
867 | 310 | int streamObjNumsSize = 0; |
868 | 310 | int lastObjNum = -1; |
869 | 310 | rootNum = -1; |
870 | 310 | int streamEndsSize = 0; |
871 | 310 | streamEndsLen = 0; |
872 | 310 | char buf[4096 + 1]; |
873 | 310 | str->reset(); |
874 | 310 | GFileOffset bufPos = start; |
875 | 310 | char *p = buf; |
876 | 310 | char *end = buf; |
877 | 310 | GBool startOfLine = gTrue; |
878 | 310 | GBool space = gTrue; |
879 | 310 | GBool eof = gFalse; |
880 | 24.5M | while (1) { |
881 | 24.5M | if (end - p < 256 && !eof) { |
882 | 7.15k | memcpy(buf, p, end - p); |
883 | 7.15k | bufPos += p - buf; |
884 | 7.15k | p = buf + (end - p); |
885 | 7.15k | int n = (int)(buf + 4096 - p); |
886 | 7.15k | int m = str->getBlock(p, n); |
887 | 7.15k | end = p + m; |
888 | 7.15k | *end = '\0'; |
889 | 7.15k | p = buf; |
890 | 7.15k | eof = m < n; |
891 | 7.15k | } |
892 | 24.5M | if (p == end && eof) { |
893 | 310 | break; |
894 | 310 | } |
895 | 24.5M | if (startOfLine && !strncmp(p, "trailer", 7)) { |
896 | 18.1k | constructTrailerDict((GFileOffset)(bufPos + (p + 7 - buf))); |
897 | 18.1k | p += 7; |
898 | 18.1k | startOfLine = gFalse; |
899 | 18.1k | space = gFalse; |
900 | 24.5M | } else if (startOfLine && !strncmp(p, "endstream", 9)) { |
901 | 8.82k | if (streamEndsLen == streamEndsSize) { |
902 | 303 | streamEndsSize += 64; |
903 | 303 | streamEnds = (GFileOffset *)greallocn(streamEnds, streamEndsSize, |
904 | 303 | sizeof(GFileOffset)); |
905 | 303 | } |
906 | 8.82k | streamEnds[streamEndsLen++] = (GFileOffset)(bufPos + (p - buf)); |
907 | 8.82k | p += 9; |
908 | 8.82k | startOfLine = gFalse; |
909 | 8.82k | space = gFalse; |
910 | 24.5M | } else if (space && *p >= '0' && *p <= '9') { |
911 | 499k | p = constructObjectEntry(p, (GFileOffset)(bufPos + (p - buf)), |
912 | 499k | &lastObjNum); |
913 | 499k | startOfLine = gFalse; |
914 | 499k | space = gFalse; |
915 | 24.0M | } else if (p[0] == '>' && p[1] == '>') { |
916 | 107k | p += 2; |
917 | 107k | startOfLine = gFalse; |
918 | 107k | space = gFalse; |
919 | | // skip any PDF whitespace except for '\0' |
920 | 164k | while (*p == '\t' || *p == '\n' || *p == '\x0c' || |
921 | 122k | *p == '\r' || *p == ' ') { |
922 | 57.4k | if (*p == '\n' || *p == '\r') { |
923 | 21.8k | startOfLine = gTrue; |
924 | 21.8k | } |
925 | 57.4k | space = gTrue; |
926 | 57.4k | ++p; |
927 | 57.4k | } |
928 | 107k | if (!strncmp(p, "stream", 6)) { |
929 | 44.6k | if (lastObjNum >= 0) { |
930 | 44.5k | if (streamObjNumsLen == streamObjNumsSize) { |
931 | 853 | streamObjNumsSize += 64; |
932 | 853 | streamObjNums = (int *)greallocn(streamObjNums, streamObjNumsSize, |
933 | 853 | sizeof(int)); |
934 | 853 | } |
935 | 44.5k | streamObjNums[streamObjNumsLen++] = lastObjNum; |
936 | 44.5k | } |
937 | 44.6k | p += 6; |
938 | 44.6k | startOfLine = gFalse; |
939 | 44.6k | space = gFalse; |
940 | 44.6k | } |
941 | 23.9M | } else { |
942 | 23.9M | if (*p == '\n' || *p == '\r') { |
943 | 513k | startOfLine = gTrue; |
944 | 513k | space = gTrue; |
945 | 23.3M | } else if (Lexer::isSpace(*p & 0xff)) { |
946 | 10.5M | space = gTrue; |
947 | 12.8M | } else { |
948 | 12.8M | startOfLine = gFalse; |
949 | 12.8M | space = gFalse; |
950 | 12.8M | } |
951 | 23.9M | ++p; |
952 | 23.9M | } |
953 | 24.5M | } |
954 | | |
955 | | // read each stream object, check for xref or object stream |
956 | 44.7k | for (int i = 0; i < streamObjNumsLen; ++i) { |
957 | 44.4k | Object obj; |
958 | 44.4k | fetch(streamObjNums[i], entries[streamObjNums[i]].gen, &obj); |
959 | 44.4k | if (obj.isStream()) { |
960 | 27.6k | Dict *dict = obj.streamGetDict(); |
961 | 27.6k | Object type; |
962 | 27.6k | dict->lookup("Type", &type); |
963 | 27.6k | if (type.isName("XRef")) { |
964 | 249 | saveTrailerDict(dict, gTrue); |
965 | 27.3k | } else if (type.isName("ObjStm")) { |
966 | 21.1k | constructObjectStreamEntries(&obj, streamObjNums[i]); |
967 | 21.1k | } |
968 | 27.6k | type.free(); |
969 | 27.6k | } |
970 | 44.4k | obj.free(); |
971 | 44.4k | } |
972 | | |
973 | 310 | gfree(streamObjNums); |
974 | | |
975 | | // if the file is encrypted, then any objects fetched here will be |
976 | | // incorrect (because decryption is not yet enabled), so clear the |
977 | | // cache to avoid that problem |
978 | 5.25k | for (int i = 0; i < xrefCacheSize; ++i) { |
979 | 4.94k | if (cache[i].num >= 0) { |
980 | 3.05k | cache[i].obj.free(); |
981 | 3.05k | cache[i].num = -1; |
982 | 3.05k | } |
983 | 4.94k | } |
984 | | |
985 | 310 | if (rootNum < 0) { |
986 | 44 | error(errSyntaxError, -1, "Couldn't find trailer dictionary"); |
987 | 44 | return gFalse; |
988 | 44 | } |
989 | 266 | return gTrue; |
990 | 310 | } |
991 | | |
992 | | // Attempt to construct a trailer dict at [pos] in the stream. |
993 | 18.1k | void XRef::constructTrailerDict(GFileOffset pos) { |
994 | 18.1k | Object newTrailerDict, obj; |
995 | 18.1k | obj.initNull(); |
996 | 18.1k | Parser *parser = |
997 | 18.1k | new Parser(NULL, |
998 | 18.1k | new Lexer(NULL, |
999 | 18.1k | str->makeSubStream(pos, gFalse, 0, &obj)), |
1000 | 18.1k | gFalse); |
1001 | 18.1k | parser->getObj(&newTrailerDict); |
1002 | 18.1k | if (newTrailerDict.isDict()) { |
1003 | 13.5k | saveTrailerDict(newTrailerDict.getDict(), gFalse); |
1004 | 13.5k | } |
1005 | 18.1k | newTrailerDict.free(); |
1006 | 18.1k | delete parser; |
1007 | 18.1k | } |
1008 | | |
1009 | | // If [dict] "looks like" a trailer dict (i.e., has a Root entry), |
1010 | | // save it as the trailer dict. |
1011 | 13.7k | void XRef::saveTrailerDict(Dict *dict, GBool isXRefStream) { |
1012 | 13.7k | Object obj; |
1013 | 13.7k | dict->lookupNF("Root", &obj); |
1014 | 13.7k | if (obj.isRef()) { |
1015 | 990 | int newRootNum = obj.getRefNum(); |
1016 | | // the xref stream scanning code runs after all objects are found, |
1017 | | // so we can check for a valid root object number at that point |
1018 | 990 | if (!isXRefStream || newRootNum <= last) { |
1019 | 951 | rootNum = newRootNum; |
1020 | 951 | rootGen = obj.getRefGen(); |
1021 | 951 | if (!trailerDict.isNone()) { |
1022 | 686 | trailerDict.free(); |
1023 | 686 | } |
1024 | 951 | trailerDict.initDict(dict); |
1025 | 951 | } |
1026 | 990 | } |
1027 | 13.7k | obj.free(); |
1028 | 13.7k | } |
1029 | | |
1030 | | // Look for an object header ("nnn ggg obj") at [p]. The first |
1031 | | // character at *[p] is a digit. [pos] is the position of *[p]. |
1032 | 499k | char *XRef::constructObjectEntry(char *p, GFileOffset pos, int *objNum) { |
1033 | | // we look for non-end-of-line space characters here, to deal with |
1034 | | // situations like: |
1035 | | // nnn <-- garbage digits on a line |
1036 | | // nnn nnn obj <-- actual object |
1037 | | // and we also ignore '\0' (because it's used to terminate the |
1038 | | // buffer in this damage-scanning code) |
1039 | 499k | int num = 0; |
1040 | 1.63M | do { |
1041 | 1.63M | num = (num * 10) + (*p - '0'); |
1042 | 1.63M | ++p; |
1043 | 1.63M | } while (*p >= '0' && *p <= '9' && num < 100000000); |
1044 | 499k | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1045 | 324k | return p; |
1046 | 324k | } |
1047 | 193k | do { |
1048 | 193k | ++p; |
1049 | 193k | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1050 | 174k | if (!(*p >= '0' && *p <= '9')) { |
1051 | 37.3k | return p; |
1052 | 37.3k | } |
1053 | 137k | int gen = 0; |
1054 | 265k | do { |
1055 | 265k | gen = (gen * 10) + (*p - '0'); |
1056 | 265k | ++p; |
1057 | 265k | } while (*p >= '0' && *p <= '9' && gen < 100000000); |
1058 | 137k | if (*p != '\t' && *p != '\x0c' && *p != ' ') { |
1059 | 4.93k | return p; |
1060 | 4.93k | } |
1061 | 137k | do { |
1062 | 137k | ++p; |
1063 | 137k | } while (*p == '\t' || *p == '\x0c' || *p == ' '); |
1064 | 132k | if (strncmp(p, "obj", 3)) { |
1065 | 109k | return p; |
1066 | 109k | } |
1067 | | |
1068 | 22.4k | if (constructXRefEntry(num, gen, pos - start, xrefEntryUncompressed)) { |
1069 | 22.4k | *objNum = num; |
1070 | 22.4k | } |
1071 | | |
1072 | 22.4k | return p; |
1073 | 132k | } |
1074 | | |
1075 | | // Read the header from an object stream, and add xref entries for all |
1076 | | // of its objects. |
1077 | 21.1k | void XRef::constructObjectStreamEntries(Object *objStr, int objStrObjNum) { |
1078 | 21.1k | Object obj1, obj2; |
1079 | | |
1080 | | // get the object count |
1081 | 21.1k | if (!objStr->streamGetDict()->lookup("N", &obj1)->isInt()) { |
1082 | 107 | obj1.free(); |
1083 | 107 | return; |
1084 | 107 | } |
1085 | 21.0k | int nObjects = obj1.getInt(); |
1086 | 21.0k | obj1.free(); |
1087 | 21.0k | if (nObjects <= 0 || nObjects > 1000000) { |
1088 | 265 | return; |
1089 | 265 | } |
1090 | | |
1091 | | // parse the header: object numbers and offsets |
1092 | 20.7k | Parser *parser = new Parser(NULL, |
1093 | 20.7k | new Lexer(NULL, objStr->getStream()->copy()), |
1094 | 20.7k | gFalse); |
1095 | 10.1M | for (int i = 0; i < nObjects; ++i) { |
1096 | 10.1M | parser->getObj(&obj1, gTrue); |
1097 | 10.1M | parser->getObj(&obj2, gTrue); |
1098 | 10.1M | if (obj1.isInt() && obj2.isInt()) { |
1099 | 24.6k | int num = obj1.getInt(); |
1100 | 24.6k | if (num >= 0 && num < 1000000) { |
1101 | 23.1k | constructXRefEntry(num, i, objStrObjNum, xrefEntryCompressed); |
1102 | 23.1k | } |
1103 | 24.6k | } |
1104 | 10.1M | obj2.free(); |
1105 | 10.1M | obj1.free(); |
1106 | 10.1M | } |
1107 | 20.7k | delete parser; |
1108 | 20.7k | } |
1109 | | |
1110 | | GBool XRef::constructXRefEntry(int num, int gen, GFileOffset pos, |
1111 | 45.5k | XRefEntryType type) { |
1112 | 45.5k | if (num >= size) { |
1113 | 684 | int newSize = (num + 1 + 255) & ~255; |
1114 | 684 | if (newSize < 0) { |
1115 | 0 | return gFalse; |
1116 | 0 | } |
1117 | 684 | entries = (XRefEntry *)greallocn(entries, newSize, sizeof(XRefEntry)); |
1118 | 21.3M | for (int i = size; i < newSize; ++i) { |
1119 | 21.3M | entries[i].offset = (GFileOffset)-1; |
1120 | 21.3M | entries[i].type = xrefEntryFree; |
1121 | 21.3M | } |
1122 | 684 | size = newSize; |
1123 | 684 | } |
1124 | | |
1125 | 45.5k | if (entries[num].type == xrefEntryFree || |
1126 | 43.4k | gen >= entries[num].gen) { |
1127 | 43.4k | entries[num].offset = pos; |
1128 | 43.4k | entries[num].gen = gen; |
1129 | 43.4k | entries[num].type = type; |
1130 | 43.4k | if (num > last) { |
1131 | 2.91k | last = num; |
1132 | 2.91k | } |
1133 | 43.4k | } |
1134 | | |
1135 | 45.5k | return gTrue; |
1136 | 45.5k | } |
1137 | | |
1138 | | void XRef::setEncryption(int permFlagsA, GBool ownerPasswordOkA, |
1139 | | Guchar *fileKeyA, int keyLengthA, int encVersionA, |
1140 | 17 | CryptAlgorithm encAlgorithmA) { |
1141 | 17 | int i; |
1142 | | |
1143 | 17 | encrypted = gTrue; |
1144 | 17 | permFlags = permFlagsA; |
1145 | 17 | ownerPasswordOk = ownerPasswordOkA; |
1146 | 17 | if (keyLengthA <= 32) { |
1147 | 17 | keyLength = keyLengthA; |
1148 | 17 | } else { |
1149 | 0 | keyLength = 32; |
1150 | 0 | } |
1151 | 385 | for (i = 0; i < keyLength; ++i) { |
1152 | 368 | fileKey[i] = fileKeyA[i]; |
1153 | 368 | } |
1154 | 17 | encVersion = encVersionA; |
1155 | 17 | encAlgorithm = encAlgorithmA; |
1156 | 17 | } |
1157 | | |
1158 | | GBool XRef::getEncryption(int *permFlagsA, GBool *ownerPasswordOkA, |
1159 | | int *keyLengthA, int *encVersionA, |
1160 | 0 | CryptAlgorithm *encAlgorithmA) { |
1161 | 0 | if (!encrypted) { |
1162 | 0 | return gFalse; |
1163 | 0 | } |
1164 | 0 | *permFlagsA = permFlags; |
1165 | 0 | *ownerPasswordOkA = ownerPasswordOk; |
1166 | 0 | *keyLengthA = keyLength; |
1167 | 0 | *encVersionA = encVersion; |
1168 | 0 | *encAlgorithmA = encAlgorithm; |
1169 | 0 | return gTrue; |
1170 | 0 | } |
1171 | | |
1172 | 0 | GBool XRef::okToPrint(GBool ignoreOwnerPW) { |
1173 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permPrint); |
1174 | 0 | } |
1175 | | |
1176 | 0 | GBool XRef::okToChange(GBool ignoreOwnerPW) { |
1177 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permChange); |
1178 | 0 | } |
1179 | | |
1180 | 0 | GBool XRef::okToCopy(GBool ignoreOwnerPW) { |
1181 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permCopy); |
1182 | 0 | } |
1183 | | |
1184 | 0 | GBool XRef::okToAddNotes(GBool ignoreOwnerPW) { |
1185 | 0 | return (!ignoreOwnerPW && ownerPasswordOk) || (permFlags & permNotes); |
1186 | 0 | } |
1187 | | |
1188 | 13.3M | Object *XRef::fetch(int num, int gen, Object *obj, int recursion) { |
1189 | 13.3M | XRefEntry *e; |
1190 | 13.3M | Parser *parser; |
1191 | 13.3M | Object obj1, obj2, obj3; |
1192 | 13.3M | XRefCacheEntry tmp; |
1193 | 13.3M | int i, j; |
1194 | | |
1195 | | // check for bogus ref - this can happen in corrupted PDF files |
1196 | 13.3M | if (num < 0 || num >= size) { |
1197 | 130 | goto err; |
1198 | 130 | } |
1199 | | |
1200 | | // check the cache |
1201 | 13.3M | #if MULTITHREADED |
1202 | 13.3M | gLockMutex(&cacheMutex); |
1203 | 13.3M | #endif |
1204 | 13.3M | if (cache[0].num == num && cache[0].gen == gen) { |
1205 | 79.9k | cache[0].obj.copy(obj); |
1206 | 79.9k | #if MULTITHREADED |
1207 | 79.9k | gUnlockMutex(&cacheMutex); |
1208 | 79.9k | #endif |
1209 | 79.9k | return obj; |
1210 | 79.9k | } |
1211 | 211M | for (i = 1; i < xrefCacheSize; ++i) { |
1212 | 198M | if (cache[i].num == num && cache[i].gen == gen) { |
1213 | 14.3k | tmp = cache[i]; |
1214 | 62.3k | for (j = i; j > 0; --j) { |
1215 | 48.0k | cache[j] = cache[j - 1]; |
1216 | 48.0k | } |
1217 | 14.3k | cache[0] = tmp; |
1218 | 14.3k | cache[0].obj.copy(obj); |
1219 | 14.3k | #if MULTITHREADED |
1220 | 14.3k | gUnlockMutex(&cacheMutex); |
1221 | 14.3k | #endif |
1222 | 14.3k | return obj; |
1223 | 14.3k | } |
1224 | 198M | } |
1225 | 13.2M | #if MULTITHREADED |
1226 | 13.2M | gUnlockMutex(&cacheMutex); |
1227 | 13.2M | #endif |
1228 | | |
1229 | 13.2M | e = &entries[num]; |
1230 | 13.2M | switch (e->type) { |
1231 | | |
1232 | 23.9k | case xrefEntryUncompressed: |
1233 | 23.9k | if (e->gen != gen) { |
1234 | 473 | goto err; |
1235 | 473 | } |
1236 | 23.4k | obj1.initNull(); |
1237 | 23.4k | parser = new Parser(this, |
1238 | 23.4k | new Lexer(this, |
1239 | 23.4k | str->makeSubStream(start + e->offset, gFalse, 0, &obj1)), |
1240 | 23.4k | gTrue); |
1241 | 23.4k | parser->getObj(&obj1, gTrue); |
1242 | 23.4k | parser->getObj(&obj2, gTrue); |
1243 | 23.4k | parser->getObj(&obj3, gTrue); |
1244 | 23.4k | if (!obj1.isInt() || obj1.getInt() != num || |
1245 | 23.4k | !obj2.isInt() || obj2.getInt() != gen || |
1246 | 23.4k | !obj3.isCmd("obj")) { |
1247 | 461 | obj1.free(); |
1248 | 461 | obj2.free(); |
1249 | 461 | obj3.free(); |
1250 | 461 | delete parser; |
1251 | 461 | goto err; |
1252 | 461 | } |
1253 | 23.0k | parser->getObj(obj, gFalse, encrypted ? fileKey : (Guchar *)NULL, |
1254 | 23.0k | encAlgorithm, keyLength, num, gen, recursion); |
1255 | 23.0k | obj1.free(); |
1256 | 23.0k | obj2.free(); |
1257 | 23.0k | obj3.free(); |
1258 | 23.0k | delete parser; |
1259 | 23.0k | break; |
1260 | | |
1261 | 11.0k | case xrefEntryCompressed: |
1262 | | #if 0 // Adobe apparently ignores the generation number on compressed objects |
1263 | | if (gen != 0) { |
1264 | | goto err; |
1265 | | } |
1266 | | #endif |
1267 | 11.0k | if (e->offset >= (GFileOffset)size || |
1268 | 11.0k | entries[e->offset].type != xrefEntryUncompressed) { |
1269 | 17 | error(errSyntaxError, -1, "Invalid object stream"); |
1270 | 17 | goto err; |
1271 | 17 | } |
1272 | 10.9k | if (!getObjectStreamObject((int)e->offset, e->gen, num, obj, recursion)) { |
1273 | 3.61k | goto err; |
1274 | 3.61k | } |
1275 | 7.38k | break; |
1276 | | |
1277 | 13.1M | default: |
1278 | 13.1M | goto err; |
1279 | 13.2M | } |
1280 | | |
1281 | | // put the new object in the cache, throwing away the oldest object |
1282 | | // currently in the cache |
1283 | 30.4k | #if MULTITHREADED |
1284 | 30.4k | gLockMutex(&cacheMutex); |
1285 | 30.4k | #endif |
1286 | 30.4k | if (cache[xrefCacheSize - 1].num >= 0) { |
1287 | 23.8k | cache[xrefCacheSize - 1].obj.free(); |
1288 | 23.8k | } |
1289 | 486k | for (i = xrefCacheSize - 1; i > 0; --i) { |
1290 | 456k | cache[i] = cache[i - 1]; |
1291 | 456k | } |
1292 | 30.4k | cache[0].num = num; |
1293 | 30.4k | cache[0].gen = gen; |
1294 | 30.4k | obj->copy(&cache[0].obj); |
1295 | 30.4k | #if MULTITHREADED |
1296 | 30.4k | gUnlockMutex(&cacheMutex); |
1297 | 30.4k | #endif |
1298 | | |
1299 | 30.4k | return obj; |
1300 | | |
1301 | 13.1M | err: |
1302 | 13.1M | return obj->initNull(); |
1303 | 13.2M | } |
1304 | | |
1305 | | GBool XRef::getObjectStreamObject(int objStrNum, int objIdx, |
1306 | 10.9k | int objNum, Object *obj, int recursion) { |
1307 | | // check for a cached ObjectStream |
1308 | 10.9k | #if MULTITHREADED |
1309 | 10.9k | gLockMutex(&objStrsMutex); |
1310 | 10.9k | #endif |
1311 | 10.9k | ObjectStream *objStr = getObjectStreamFromCache(objStrNum); |
1312 | 10.9k | GBool found = gFalse; |
1313 | 10.9k | if (objStr) { |
1314 | 7.09k | objStr->getObject(objIdx, objNum, obj); |
1315 | 7.09k | cleanObjectStreamCache(); |
1316 | 7.09k | found = gTrue; |
1317 | 7.09k | } |
1318 | 10.9k | #if MULTITHREADED |
1319 | 10.9k | gUnlockMutex(&objStrsMutex); |
1320 | 10.9k | #endif |
1321 | 10.9k | if (found) { |
1322 | 7.09k | return gTrue; |
1323 | 7.09k | } |
1324 | | |
1325 | | // load a new ObjectStream |
1326 | 3.89k | objStr = new ObjectStream(this, objStrNum, recursion); |
1327 | 3.89k | if (!objStr->isOk()) { |
1328 | 3.61k | delete objStr; |
1329 | 3.61k | return gFalse; |
1330 | 3.61k | } |
1331 | 286 | objStr->getObject(objIdx, objNum, obj); |
1332 | 286 | #if MULTITHREADED |
1333 | 286 | gLockMutex(&objStrsMutex); |
1334 | 286 | #endif |
1335 | 286 | addObjectStreamToCache(objStr); |
1336 | 286 | cleanObjectStreamCache(); |
1337 | 286 | #if MULTITHREADED |
1338 | 286 | gUnlockMutex(&objStrsMutex); |
1339 | 286 | #endif |
1340 | 286 | return gTrue; |
1341 | 3.89k | } |
1342 | | |
1343 | | // NB: objStrsMutex must be locked when calling this function. |
1344 | 10.9k | ObjectStream *XRef::getObjectStreamFromCache(int objStrNum) { |
1345 | | // check the MRU entry in the cache |
1346 | 10.9k | if (objStrs[0] && objStrs[0]->getObjStrNum() == objStrNum) { |
1347 | 6.45k | ObjectStream *objStr = objStrs[0]; |
1348 | 6.45k | objStrLastUse[0] = objStrTime++; |
1349 | 6.45k | return objStr; |
1350 | 6.45k | } |
1351 | | |
1352 | | // check the rest of the cache |
1353 | 10.5k | for (int i = 1; i < objStrCacheLength; ++i) { |
1354 | 6.68k | if (objStrs[i] && objStrs[i]->getObjStrNum() == objStrNum) { |
1355 | 638 | ObjectStream *objStr = objStrs[i]; |
1356 | 1.35k | for (int j = i; j > 0; --j) { |
1357 | 712 | objStrs[j] = objStrs[j - 1]; |
1358 | 712 | objStrLastUse[j] = objStrLastUse[j - 1]; |
1359 | 712 | } |
1360 | 638 | objStrs[0] = objStr; |
1361 | 638 | objStrLastUse[0] = objStrTime++; |
1362 | 638 | return objStr; |
1363 | 638 | } |
1364 | 6.68k | } |
1365 | | |
1366 | 3.89k | return NULL; |
1367 | 4.53k | } |
1368 | | |
1369 | | // NB: objStrsMutex must be locked when calling this function. |
1370 | 286 | void XRef::addObjectStreamToCache(ObjectStream *objStr) { |
1371 | | // add to the cache |
1372 | 286 | if (objStrCacheLength == objStrCacheSize) { |
1373 | 0 | delete objStrs[objStrCacheSize - 1]; |
1374 | 0 | --objStrCacheLength; |
1375 | 0 | } |
1376 | 584 | for (int j = objStrCacheLength; j > 0; --j) { |
1377 | 298 | objStrs[j] = objStrs[j - 1]; |
1378 | 298 | objStrLastUse[j] = objStrLastUse[j - 1]; |
1379 | 298 | } |
1380 | 286 | ++objStrCacheLength; |
1381 | 286 | objStrs[0] = objStr; |
1382 | 286 | objStrLastUse[0] = objStrTime++; |
1383 | 286 | } |
1384 | | |
1385 | | // If the oldest (least recently used) entry in the object stream |
1386 | | // cache is more than objStrCacheTimeout accesses old (hasn't been |
1387 | | // used in the last objStrCacheTimeout accesses), eject it from the |
1388 | | // cache. |
1389 | | // NB: objStrsMutex must be locked when calling this function. |
1390 | 7.38k | void XRef::cleanObjectStreamCache() { |
1391 | | // NB: objStrTime and objStrLastUse[] are unsigned ints, so the |
1392 | | // mod-2^32 arithmetic makes the subtraction work out, even if the |
1393 | | // time wraps around. |
1394 | 7.38k | if (objStrCacheLength > 1 && |
1395 | 6.27k | objStrTime - objStrLastUse[objStrCacheLength - 1] |
1396 | 6.27k | > objStrCacheTimeout) { |
1397 | 0 | delete objStrs[objStrCacheLength - 1]; |
1398 | 0 | objStrs[objStrCacheLength - 1] = NULL; |
1399 | 0 | --objStrCacheLength; |
1400 | 0 | } |
1401 | 7.38k | } |
1402 | | |
1403 | 0 | Object *XRef::getDocInfo(Object *obj) { |
1404 | 0 | return trailerDict.dictLookup("Info", obj); |
1405 | 0 | } |
1406 | | |
1407 | | // Added for the pdftex project. |
1408 | 0 | Object *XRef::getDocInfoNF(Object *obj) { |
1409 | 0 | return trailerDict.dictLookupNF("Info", obj); |
1410 | 0 | } |
1411 | | |
1412 | 21.3k | GBool XRef::getStreamEnd(GFileOffset streamStart, GFileOffset *streamEnd) { |
1413 | 21.3k | int a, b, m; |
1414 | | |
1415 | 21.3k | if (streamEndsLen == 0 || |
1416 | 21.2k | streamStart > streamEnds[streamEndsLen - 1]) { |
1417 | 1.14k | return gFalse; |
1418 | 1.14k | } |
1419 | | |
1420 | 20.2k | a = -1; |
1421 | 20.2k | b = streamEndsLen - 1; |
1422 | | // invariant: streamEnds[a] < streamStart <= streamEnds[b] |
1423 | 121k | while (b - a > 1) { |
1424 | 101k | m = (a + b) / 2; |
1425 | 101k | if (streamStart <= streamEnds[m]) { |
1426 | 42.3k | b = m; |
1427 | 58.9k | } else { |
1428 | 58.9k | a = m; |
1429 | 58.9k | } |
1430 | 101k | } |
1431 | 20.2k | *streamEnd = streamEnds[b]; |
1432 | 20.2k | return gTrue; |
1433 | 21.3k | } |
1434 | | |
1435 | 14 | GFileOffset XRef::strToFileOffset(char *s) { |
1436 | 14 | GFileOffset x, d; |
1437 | 14 | char *p; |
1438 | | |
1439 | 14 | x = 0; |
1440 | 64 | for (p = s; *p && isdigit(*p & 0xff); ++p) { |
1441 | 50 | d = *p - '0'; |
1442 | 50 | if (x > (GFILEOFFSET_MAX - d) / 10) { |
1443 | 0 | break; |
1444 | 0 | } |
1445 | 50 | x = 10 * x + d; |
1446 | 50 | } |
1447 | 14 | return x; |
1448 | 14 | } |