/src/xpdf-4.06/xpdf/Catalog.cc
Line | Count | Source |
1 | | //======================================================================== |
2 | | // |
3 | | // Catalog.cc |
4 | | // |
5 | | // Copyright 1996-2013 Glyph & Cog, LLC |
6 | | // |
7 | | //======================================================================== |
8 | | |
9 | | #include <aconf.h> |
10 | | |
11 | | #include <string.h> |
12 | | #include <stddef.h> |
13 | | #include <limits.h> |
14 | | #include "gmem.h" |
15 | | #include "gmempp.h" |
16 | | #include "gfile.h" |
17 | | #include "GList.h" |
18 | | #include "Object.h" |
19 | | #include "CharTypes.h" |
20 | | #include "PDFDoc.h" |
21 | | #include "XRef.h" |
22 | | #include "Array.h" |
23 | | #include "Dict.h" |
24 | | #include "Page.h" |
25 | | #include "Error.h" |
26 | | #include "Link.h" |
27 | | #include "AcroForm.h" |
28 | | #include "TextString.h" |
29 | | #include "Catalog.h" |
30 | | |
31 | | //------------------------------------------------------------------------ |
32 | | // PageTreeNode |
33 | | //------------------------------------------------------------------------ |
34 | | |
35 | | class PageTreeNode { |
36 | | public: |
37 | | |
38 | | PageTreeNode(Ref refA, int countA, PageTreeNode *parentA); |
39 | | ~PageTreeNode(); |
40 | | |
41 | | Ref ref; |
42 | | int count; |
43 | | PageTreeNode *parent; |
44 | | GList *kids; // [PageTreeNode] |
45 | | PageAttrs *attrs; |
46 | | }; |
47 | | |
48 | 29.7k | PageTreeNode::PageTreeNode(Ref refA, int countA, PageTreeNode *parentA) { |
49 | 29.7k | ref = refA; |
50 | 29.7k | count = countA; |
51 | 29.7k | parent = parentA; |
52 | 29.7k | kids = NULL; |
53 | 29.7k | attrs = NULL; |
54 | 29.7k | } |
55 | | |
56 | 29.7k | PageTreeNode::~PageTreeNode() { |
57 | 29.7k | delete attrs; |
58 | 29.7k | if (kids) { |
59 | 12.3k | deleteGList(kids, PageTreeNode); |
60 | 12.3k | } |
61 | 29.7k | } |
62 | | |
63 | | //------------------------------------------------------------------------ |
64 | | // EmbeddedFile |
65 | | //------------------------------------------------------------------------ |
66 | | |
67 | | class EmbeddedFile { |
68 | | public: |
69 | | |
70 | | EmbeddedFile(TextString *nameA, Object *streamRefA); |
71 | | ~EmbeddedFile(); |
72 | | |
73 | | TextString *name; |
74 | | Object streamRef; |
75 | | }; |
76 | | |
77 | 3 | EmbeddedFile::EmbeddedFile(TextString *nameA, Object *streamRefA) { |
78 | 3 | name = nameA; |
79 | 3 | streamRefA->copy(&streamRef); |
80 | 3 | } |
81 | | |
82 | 3 | EmbeddedFile::~EmbeddedFile() { |
83 | 3 | delete name; |
84 | 3 | streamRef.free(); |
85 | 3 | } |
86 | | |
87 | | //------------------------------------------------------------------------ |
88 | | // PageLabelNode |
89 | | //------------------------------------------------------------------------ |
90 | | |
91 | | class PageLabelNode { |
92 | | public: |
93 | | |
94 | | PageLabelNode(int firstPageA, Dict *dict); |
95 | | ~PageLabelNode(); |
96 | | |
97 | | int firstPage; // first page number covered by this node |
98 | | int lastPage; // last page number covered by this node |
99 | | TextString *prefix; // label prefix (may be empty) |
100 | | int start; // value of the numeric portion of this |
101 | | // label for the first page in the range |
102 | | char style; // page label style |
103 | | }; |
104 | | |
105 | 272 | PageLabelNode::PageLabelNode(int firstPageA, Dict *dict) { |
106 | 272 | Object prefixObj, styleObj, startObj; |
107 | | |
108 | | // convert page index to page number |
109 | 272 | firstPage = firstPageA + 1; |
110 | | |
111 | | // lastPage will be filled in later |
112 | 272 | lastPage = -1; |
113 | | |
114 | 272 | if (dict->lookup("P", &prefixObj)->isString()) { |
115 | 36 | prefix = new TextString(prefixObj.getString()); |
116 | 236 | } else { |
117 | 236 | prefix = new TextString(); |
118 | 236 | } |
119 | 272 | prefixObj.free(); |
120 | | |
121 | 272 | style = '\0'; |
122 | 272 | if (dict->lookup("S", &styleObj)->isName()) { |
123 | 224 | if (strlen(styleObj.getName()) == 1) { |
124 | 211 | style = styleObj.getName()[0]; |
125 | 211 | } |
126 | 224 | } |
127 | 272 | styleObj.free(); |
128 | | |
129 | 272 | start = 1; |
130 | 272 | if (dict->lookup("St", &startObj)->isInt()) { |
131 | 139 | start = startObj.getInt(); |
132 | 139 | } |
133 | 272 | startObj.free(); |
134 | 272 | } |
135 | | |
136 | 272 | PageLabelNode::~PageLabelNode() { |
137 | 272 | delete prefix; |
138 | 272 | } |
139 | | |
140 | | //------------------------------------------------------------------------ |
141 | | // Catalog |
142 | | //------------------------------------------------------------------------ |
143 | | |
144 | 13.0k | Catalog::Catalog(PDFDoc *docA) { |
145 | 13.0k | Object catDict; |
146 | 13.0k | Object obj, obj2; |
147 | | |
148 | 13.0k | ok = gTrue; |
149 | 13.0k | doc = docA; |
150 | 13.0k | xref = doc->getXRef(); |
151 | 13.0k | pageTree = NULL; |
152 | 13.0k | pages = NULL; |
153 | 13.0k | pageRefs = NULL; |
154 | 13.0k | numPages = 0; |
155 | 13.0k | baseURI = NULL; |
156 | 13.0k | form = NULL; |
157 | 13.0k | embeddedFiles = NULL; |
158 | 13.0k | pageLabels = NULL; |
159 | 13.0k | #if MULTITHREADED |
160 | 13.0k | gInitMutex(&pageMutex); |
161 | 13.0k | #endif |
162 | | |
163 | 13.0k | xref->getCatalog(&catDict); |
164 | 13.0k | if (!catDict.isDict()) { |
165 | 134 | error(errSyntaxError, -1, "Catalog object is wrong type ({0:s})", |
166 | 134 | catDict.getTypeName()); |
167 | 134 | goto err1; |
168 | 134 | } |
169 | | |
170 | | // read page tree |
171 | 12.9k | if (!readPageTree(&catDict)) { |
172 | 208 | goto err1; |
173 | 208 | } |
174 | | |
175 | | // read named destination dictionary |
176 | 12.6k | catDict.dictLookup("Dests", &dests); |
177 | | |
178 | | // read root of named destination tree |
179 | 12.6k | if (catDict.dictLookup("Names", &obj)->isDict()) |
180 | 616 | obj.dictLookup("Dests", &nameTree); |
181 | 12.0k | else |
182 | 12.0k | nameTree.initNull(); |
183 | 12.6k | obj.free(); |
184 | | |
185 | | // read base URI |
186 | 12.6k | if (catDict.dictLookup("URI", &obj)->isDict()) { |
187 | 3 | if (obj.dictLookup("Base", &obj2)->isString()) { |
188 | 1 | baseURI = obj2.getString()->copy(); |
189 | 1 | } |
190 | 3 | obj2.free(); |
191 | 3 | } |
192 | 12.6k | obj.free(); |
193 | 12.6k | if (!baseURI || baseURI->getLength() == 0) { |
194 | 12.6k | if (baseURI) { |
195 | 0 | delete baseURI; |
196 | 0 | } |
197 | 12.6k | if (doc->getFileName()) { |
198 | 0 | baseURI = makePathAbsolute(grabPath(doc->getFileName()->getCString())); |
199 | 0 | if (baseURI->getChar(0) == '/') { |
200 | 0 | baseURI->insert(0, "file://localhost"); |
201 | 0 | } else { |
202 | 0 | baseURI->insert(0, "file://localhost/"); |
203 | 0 | } |
204 | 12.6k | } else { |
205 | 12.6k | baseURI = new GString("file://localhost/"); |
206 | 12.6k | } |
207 | 12.6k | } |
208 | | |
209 | | // get the metadata stream |
210 | 12.6k | catDict.dictLookup("Metadata", &metadata); |
211 | | |
212 | | // get the structure tree root |
213 | 12.6k | catDict.dictLookup("StructTreeRoot", &structTreeRoot); |
214 | | |
215 | | // get the outline dictionary |
216 | 12.6k | catDict.dictLookup("Outlines", &outline); |
217 | | |
218 | | // get the AcroForm dictionary |
219 | 12.6k | catDict.dictLookup("AcroForm", &acroForm); |
220 | | |
221 | | // get the NeedsRendering flag |
222 | | // NB: AcroForm::load() uses this value |
223 | 12.6k | needsRendering = catDict.dictLookup("NeedsRendering", &obj)->isBool() && |
224 | 0 | obj.getBool(); |
225 | 12.6k | obj.free(); |
226 | | |
227 | | // create the Form |
228 | | // (if acroForm is a null object, this will still create an AcroForm |
229 | | // if there are unattached Widget-type annots) |
230 | 12.6k | form = AcroForm::load(doc, this, &acroForm); |
231 | | |
232 | | // get the OCProperties dictionary |
233 | 12.6k | catDict.dictLookup("OCProperties", &ocProperties); |
234 | | |
235 | | // get the list of embedded files |
236 | 12.6k | readEmbeddedFileList(catDict.getDict()); |
237 | | |
238 | | // get the ViewerPreferences object |
239 | 12.6k | catDict.dictLookupNF("ViewerPreferences", &viewerPrefs); |
240 | | |
241 | 12.6k | if (catDict.dictLookup("PageLabels", &obj)->isDict()) { |
242 | 340 | readPageLabelTree(&obj); |
243 | 340 | } |
244 | 12.6k | obj.free(); |
245 | | |
246 | 12.6k | catDict.free(); |
247 | 12.6k | return; |
248 | | |
249 | 342 | err1: |
250 | 342 | catDict.free(); |
251 | 342 | dests.initNull(); |
252 | 342 | nameTree.initNull(); |
253 | 342 | ok = gFalse; |
254 | 342 | } |
255 | | |
256 | 13.0k | Catalog::~Catalog() { |
257 | 13.0k | int i; |
258 | | |
259 | 13.0k | if (pageTree) { |
260 | 12.6k | delete pageTree; |
261 | 12.6k | } |
262 | 13.0k | if (pages) { |
263 | 397k | for (i = 0; i < numPages; ++i) { |
264 | 384k | if (pages[i]) { |
265 | 384k | delete pages[i]; |
266 | 384k | } |
267 | 384k | } |
268 | 12.6k | gfree(pages); |
269 | 12.6k | gfree(pageRefs); |
270 | 12.6k | } |
271 | 13.0k | #if MULTITHREADED |
272 | 13.0k | gDestroyMutex(&pageMutex); |
273 | 13.0k | #endif |
274 | 13.0k | dests.free(); |
275 | 13.0k | nameTree.free(); |
276 | 13.0k | if (baseURI) { |
277 | 12.6k | delete baseURI; |
278 | 12.6k | } |
279 | 13.0k | metadata.free(); |
280 | 13.0k | structTreeRoot.free(); |
281 | 13.0k | outline.free(); |
282 | 13.0k | acroForm.free(); |
283 | 13.0k | if (form) { |
284 | 1.24k | delete form; |
285 | 1.24k | } |
286 | 13.0k | ocProperties.free(); |
287 | 13.0k | if (embeddedFiles) { |
288 | 3 | deleteGList(embeddedFiles, EmbeddedFile); |
289 | 3 | } |
290 | 13.0k | if (pageLabels) { |
291 | 146 | deleteGList(pageLabels, PageLabelNode); |
292 | 146 | } |
293 | 13.0k | viewerPrefs.free(); |
294 | 13.0k | } |
295 | | |
296 | 2.30M | Page *Catalog::getPage(int i) { |
297 | 2.30M | Page *page; |
298 | | |
299 | 2.30M | #if MULTITHREADED |
300 | 2.30M | gLockMutex(&pageMutex); |
301 | 2.30M | #endif |
302 | 2.30M | if (!pages[i-1]) { |
303 | 384k | loadPage(i); |
304 | 384k | } |
305 | 2.30M | page = pages[i-1]; |
306 | 2.30M | #if MULTITHREADED |
307 | 2.30M | gUnlockMutex(&pageMutex); |
308 | 2.30M | #endif |
309 | 2.30M | return page; |
310 | 2.30M | } |
311 | | |
312 | 0 | Ref *Catalog::getPageRef(int i) { |
313 | 0 | Ref *pageRef; |
314 | |
|
315 | 0 | #if MULTITHREADED |
316 | 0 | gLockMutex(&pageMutex); |
317 | 0 | #endif |
318 | 0 | if (!pages[i-1]) { |
319 | 0 | loadPage(i); |
320 | 0 | } |
321 | 0 | pageRef = &pageRefs[i-1]; |
322 | 0 | #if MULTITHREADED |
323 | 0 | gUnlockMutex(&pageMutex); |
324 | 0 | #endif |
325 | 0 | return pageRef; |
326 | 0 | } |
327 | | |
328 | 0 | void Catalog::doneWithPage(int i) { |
329 | 0 | #if MULTITHREADED |
330 | 0 | gLockMutex(&pageMutex); |
331 | 0 | #endif |
332 | 0 | if (pages[i-1]) { |
333 | 0 | delete pages[i-1]; |
334 | 0 | pages[i-1] = NULL; |
335 | 0 | } |
336 | 0 | #if MULTITHREADED |
337 | 0 | gUnlockMutex(&pageMutex); |
338 | 0 | #endif |
339 | 0 | } |
340 | | |
341 | 12.6k | GString *Catalog::readMetadata() { |
342 | 12.6k | GString *s; |
343 | 12.6k | Dict *dict; |
344 | 12.6k | Object obj; |
345 | 12.6k | char buf[4096]; |
346 | 12.6k | int n; |
347 | | |
348 | 12.6k | if (!metadata.isStream()) { |
349 | 11.7k | return NULL; |
350 | 11.7k | } |
351 | 982 | dict = metadata.streamGetDict(); |
352 | 982 | if (!dict->lookup("Subtype", &obj)->isName("XML")) { |
353 | 216 | error(errSyntaxWarning, -1, "Unknown Metadata type: '{0:s}'", |
354 | 216 | obj.isName() ? obj.getName() : "???"); |
355 | 216 | } |
356 | 982 | obj.free(); |
357 | 982 | s = new GString(); |
358 | 982 | metadata.streamReset(); |
359 | 2.97k | while ((n = metadata.streamGetBlock(buf, sizeof(buf))) > 0) { |
360 | 1.99k | s->append(buf, n); |
361 | 1.99k | } |
362 | 982 | metadata.streamClose(); |
363 | 982 | return s; |
364 | 12.6k | } |
365 | | |
366 | 0 | int Catalog::findPage(int num, int gen) { |
367 | 0 | int i; |
368 | |
|
369 | 0 | #if MULTITHREADED |
370 | 0 | gLockMutex(&pageMutex); |
371 | 0 | #endif |
372 | 0 | for (i = 0; i < numPages; ++i) { |
373 | 0 | if (!pages[i]) { |
374 | 0 | loadPage(i+1); |
375 | 0 | } |
376 | 0 | if (pageRefs[i].num == num && pageRefs[i].gen == gen) { |
377 | 0 | #if MULTITHREADED |
378 | 0 | gUnlockMutex(&pageMutex); |
379 | 0 | #endif |
380 | 0 | return i + 1; |
381 | 0 | } |
382 | 0 | } |
383 | 0 | #if MULTITHREADED |
384 | 0 | gUnlockMutex(&pageMutex); |
385 | 0 | #endif |
386 | 0 | return 0; |
387 | 0 | } |
388 | | |
389 | 0 | LinkDest *Catalog::findDest(GString *name) { |
390 | 0 | LinkDest *dest; |
391 | 0 | Object obj1, obj2; |
392 | 0 | GBool found; |
393 | | |
394 | | // try named destination dictionary then name tree |
395 | 0 | found = gFalse; |
396 | 0 | if (dests.isDict()) { |
397 | 0 | if (!dests.dictLookup(name->getCString(), &obj1)->isNull()) { |
398 | 0 | found = gTrue; |
399 | 0 | } else { |
400 | 0 | obj1.free(); |
401 | 0 | } |
402 | 0 | } |
403 | 0 | if (!found && nameTree.isDict()) { |
404 | 0 | char *touchedObjs = (char *)gmalloc(xref->getNumObjects()); |
405 | 0 | memset(touchedObjs, 0, xref->getNumObjects()); |
406 | 0 | if (!findDestInTree(&nameTree, &nameTree, name, &obj1, touchedObjs) |
407 | 0 | ->isNull()) { |
408 | 0 | found = gTrue; |
409 | 0 | } else { |
410 | 0 | obj1.free(); |
411 | 0 | } |
412 | 0 | gfree(touchedObjs); |
413 | 0 | } |
414 | 0 | if (!found) { |
415 | 0 | return NULL; |
416 | 0 | } |
417 | | |
418 | | // construct LinkDest |
419 | 0 | dest = NULL; |
420 | 0 | if (obj1.isArray()) { |
421 | 0 | dest = new LinkDest(obj1.getArray()); |
422 | 0 | } else if (obj1.isDict()) { |
423 | 0 | if (obj1.dictLookup("D", &obj2)->isArray()) { |
424 | 0 | dest = new LinkDest(obj2.getArray()); |
425 | 0 | } else { |
426 | 0 | error(errSyntaxWarning, -1, "Bad named destination value"); |
427 | 0 | } |
428 | 0 | obj2.free(); |
429 | 0 | } else { |
430 | 0 | error(errSyntaxWarning, -1, "Bad named destination value"); |
431 | 0 | } |
432 | 0 | obj1.free(); |
433 | 0 | if (dest && !dest->isOk()) { |
434 | 0 | delete dest; |
435 | 0 | dest = NULL; |
436 | 0 | } |
437 | |
|
438 | 0 | return dest; |
439 | 0 | } |
440 | | |
441 | | Object *Catalog::findDestInTree(Object *treeRef, Object *tree, GString *name, |
442 | 0 | Object *obj, char *touchedObjs) { |
443 | 0 | Object names, name1; |
444 | 0 | Object kids, kidRef, kid, limits, low, high; |
445 | 0 | GBool done, found; |
446 | 0 | int cmp, i; |
447 | | |
448 | | // check for invalid reference |
449 | 0 | if (treeRef->isRef() && |
450 | 0 | (treeRef->getRefNum() < 0 || |
451 | 0 | treeRef->getRefNum() >= xref->getNumObjects())) { |
452 | 0 | obj->initNull(); |
453 | 0 | return obj; |
454 | 0 | } |
455 | | |
456 | | // check for a destination tree loop |
457 | 0 | if (treeRef->isRef()) { |
458 | 0 | if (touchedObjs[treeRef->getRefNum()]) { |
459 | 0 | error(errSyntaxError, -1, "Loop in destination name tree"); |
460 | 0 | obj->initNull(); |
461 | 0 | return obj; |
462 | 0 | } |
463 | 0 | touchedObjs[treeRef->getRefNum()] = 1; |
464 | 0 | } |
465 | | |
466 | | // leaf node |
467 | 0 | if (tree->dictLookup("Names", &names)->isArray()) { |
468 | 0 | done = found = gFalse; |
469 | 0 | for (i = 0; !done && i < names.arrayGetLength(); i += 2) { |
470 | 0 | if (names.arrayGet(i, &name1)->isString()) { |
471 | 0 | cmp = name->cmp(name1.getString()); |
472 | 0 | if (cmp == 0) { |
473 | 0 | names.arrayGet(i+1, obj); |
474 | 0 | found = gTrue; |
475 | 0 | done = gTrue; |
476 | 0 | } else if (cmp < 0) { |
477 | 0 | done = gTrue; |
478 | 0 | } |
479 | 0 | } |
480 | 0 | name1.free(); |
481 | 0 | } |
482 | 0 | names.free(); |
483 | 0 | if (!found) { |
484 | 0 | obj->initNull(); |
485 | 0 | } |
486 | 0 | return obj; |
487 | 0 | } |
488 | 0 | names.free(); |
489 | | |
490 | | // root or intermediate node |
491 | 0 | done = gFalse; |
492 | 0 | if (tree->dictLookup("Kids", &kids)->isArray()) { |
493 | 0 | for (i = 0; !done && i < kids.arrayGetLength(); ++i) { |
494 | 0 | kids.arrayGetNF(i, &kidRef); |
495 | 0 | kids.arrayGet(i, &kid); |
496 | 0 | if (kid.isDict()) { |
497 | 0 | if (kid.dictLookup("Limits", &limits)->isArray()) { |
498 | 0 | if (limits.arrayGet(0, &low)->isString() && |
499 | 0 | name->cmp(low.getString()) >= 0) { |
500 | 0 | if (limits.arrayGet(1, &high)->isString() && |
501 | 0 | name->cmp(high.getString()) <= 0) { |
502 | 0 | findDestInTree(&kidRef, &kid, name, obj, touchedObjs); |
503 | 0 | done = gTrue; |
504 | 0 | } |
505 | 0 | high.free(); |
506 | 0 | } |
507 | 0 | low.free(); |
508 | 0 | } |
509 | 0 | limits.free(); |
510 | 0 | } |
511 | 0 | kid.free(); |
512 | 0 | kidRef.free(); |
513 | 0 | } |
514 | 0 | } |
515 | 0 | kids.free(); |
516 | | |
517 | | // name was outside of ranges of all kids |
518 | 0 | if (!done) { |
519 | 0 | obj->initNull(); |
520 | 0 | } |
521 | |
|
522 | 0 | return obj; |
523 | 0 | } |
524 | | |
525 | 12.9k | GBool Catalog::readPageTree(Object *catDict) { |
526 | 12.9k | Object topPagesRef, topPagesObj, countObj; |
527 | 12.9k | int i; |
528 | | |
529 | 12.9k | if (!catDict->dictLookupNF("Pages", &topPagesRef)->isRef()) { |
530 | 159 | error(errSyntaxError, -1, "Top-level pages reference is wrong type ({0:s})", |
531 | 159 | topPagesRef.getTypeName()); |
532 | 159 | topPagesRef.free(); |
533 | 159 | return gFalse; |
534 | 159 | } |
535 | 12.7k | if (!topPagesRef.fetch(xref, &topPagesObj)->isDict()) { |
536 | 47 | error(errSyntaxError, -1, "Top-level pages object is wrong type ({0:s})", |
537 | 47 | topPagesObj.getTypeName()); |
538 | 47 | topPagesObj.free(); |
539 | 47 | topPagesRef.free(); |
540 | 47 | return gFalse; |
541 | 47 | } |
542 | 12.6k | if (topPagesObj.dictLookup("Count", &countObj)->isInt()) { |
543 | 11.2k | numPages = countObj.getInt(); |
544 | 11.2k | if (numPages == 0 || numPages > 50000) { |
545 | | // 1. Acrobat apparently scans the page tree if it sees a zero |
546 | | // count. |
547 | | // 2. Absurdly large page counts result in very slow loading, |
548 | | // because other code tries to fetch pages 1 through n. |
549 | | // In both cases: ignore the given page count and scan the tree |
550 | | // instead. |
551 | 112 | char *touchedObjs = (char *)gmalloc(xref->getNumObjects()); |
552 | 112 | memset(touchedObjs, 0, xref->getNumObjects()); |
553 | 112 | numPages = countPageTree(&topPagesRef, touchedObjs); |
554 | 112 | gfree(touchedObjs); |
555 | 112 | } |
556 | 11.2k | } else { |
557 | | // assume we got a Page node instead of a Pages node |
558 | 1.45k | numPages = 1; |
559 | 1.45k | } |
560 | 12.6k | countObj.free(); |
561 | 12.6k | if (numPages < 0) { |
562 | 2 | error(errSyntaxError, -1, "Invalid page count"); |
563 | 2 | topPagesObj.free(); |
564 | 2 | topPagesRef.free(); |
565 | 2 | numPages = 0; |
566 | 2 | return gFalse; |
567 | 2 | } |
568 | 12.6k | pageTree = new PageTreeNode(topPagesRef.getRef(), numPages, NULL); |
569 | 12.6k | topPagesObj.free(); |
570 | 12.6k | topPagesRef.free(); |
571 | 12.6k | pages = (Page **)greallocn(pages, numPages, sizeof(Page *)); |
572 | 12.6k | pageRefs = (Ref *)greallocn(pageRefs, numPages, sizeof(Ref)); |
573 | 397k | for (i = 0; i < numPages; ++i) { |
574 | 384k | pages[i] = NULL; |
575 | 384k | pageRefs[i].num = -1; |
576 | 384k | pageRefs[i].gen = -1; |
577 | 384k | } |
578 | 12.6k | return gTrue; |
579 | 12.6k | } |
580 | | |
581 | 21.5k | int Catalog::countPageTree(Object *pagesNodeRef, char *touchedObjs) { |
582 | | // check for invalid reference |
583 | 21.5k | if (pagesNodeRef->isRef() && |
584 | 2.83k | (pagesNodeRef->getRefNum() < 0 || |
585 | 2.83k | pagesNodeRef->getRefNum() >= xref->getNumObjects())) { |
586 | 270 | return 0; |
587 | 270 | } |
588 | | |
589 | | // check for a page tree loop; fetch the node object |
590 | 21.3k | Object pagesNode; |
591 | 21.3k | if (pagesNodeRef->isRef()) { |
592 | 2.56k | if (touchedObjs[pagesNodeRef->getRefNum()]) { |
593 | 2.26k | error(errSyntaxError, -1, "Loop in Pages tree"); |
594 | 2.26k | return 0; |
595 | 2.26k | } |
596 | 292 | touchedObjs[pagesNodeRef->getRefNum()] = 1; |
597 | 292 | xref->fetch(pagesNodeRef->getRefNum(), pagesNodeRef->getRefGen(), |
598 | 292 | &pagesNode); |
599 | 18.7k | } else { |
600 | 18.7k | pagesNodeRef->copy(&pagesNode); |
601 | 18.7k | } |
602 | | |
603 | | // count the subtree |
604 | 19.0k | int n = 0; |
605 | 19.0k | if (pagesNode.isDict()) { |
606 | 1.55k | Object kidsRef, kids; |
607 | 1.55k | pagesNode.dictLookupNF("Kids", &kidsRef); |
608 | 1.55k | if (kidsRef.isRef() && |
609 | 3 | kidsRef.getRefNum() >= 0 && |
610 | 3 | kidsRef.getRefNum() < xref->getNumObjects()) { |
611 | 3 | if (touchedObjs[kidsRef.getRefNum()]) { |
612 | 2 | error(errSyntaxError, -1, "Loop in Pages tree"); |
613 | 2 | kidsRef.free(); |
614 | 2 | pagesNode.free(); |
615 | 2 | return 0; |
616 | 2 | } |
617 | 1 | touchedObjs[kidsRef.getRefNum()] = 1; |
618 | 1 | xref->fetch(kidsRef.getRefNum(), kidsRef.getRefGen(), &kids); |
619 | 1.55k | } else { |
620 | 1.55k | kidsRef.copy(&kids); |
621 | 1.55k | } |
622 | 1.55k | kidsRef.free(); |
623 | 1.55k | if (kids.isArray()) { |
624 | 21.6k | for (int i = 0; i < kids.arrayGetLength(); ++i) { |
625 | 21.4k | Object kid; |
626 | 21.4k | kids.arrayGetNF(i, &kid); |
627 | 21.4k | int n2 = countPageTree(&kid, touchedObjs); |
628 | 21.4k | if (n2 < INT_MAX - n) { |
629 | 21.4k | n += n2; |
630 | 21.4k | } else { |
631 | 0 | error(errSyntaxError, -1, "Page tree contains too many pages"); |
632 | 0 | n = INT_MAX; |
633 | 0 | } |
634 | 21.4k | kid.free(); |
635 | 21.4k | } |
636 | 1.39k | } else { |
637 | 1.39k | n = 1; |
638 | 1.39k | } |
639 | 1.55k | kids.free(); |
640 | 1.55k | } |
641 | | |
642 | 19.0k | pagesNode.free(); |
643 | | |
644 | 19.0k | return n; |
645 | 19.0k | } |
646 | | |
647 | 384k | void Catalog::loadPage(int pg) { |
648 | 384k | loadPage2(pg, pg - 1, pageTree); |
649 | 384k | } |
650 | | |
651 | 410k | void Catalog::loadPage2(int pg, int relPg, PageTreeNode *node) { |
652 | 410k | Object pageRefObj, pageObj, kidsObj, kidRefObj, kidObj, countObj; |
653 | 410k | PageTreeNode *kidNode, *p; |
654 | 410k | PageAttrs *attrs; |
655 | 410k | int count, i; |
656 | | |
657 | 410k | if (relPg >= node->count) { |
658 | 0 | error(errSyntaxError, -1, "Internal error in page tree"); |
659 | 0 | pages[pg-1] = new Page(doc, pg); |
660 | 0 | return; |
661 | 0 | } |
662 | | |
663 | | // if this node has not been filled in yet, it's either a leaf node |
664 | | // or an unread internal node |
665 | 410k | if (!node->kids) { |
666 | | |
667 | | // check for a loop in the page tree |
668 | 185k | for (p = node->parent; p; p = p->parent) { |
669 | 25.1k | if (node->ref.num == p->ref.num && node->ref.gen == p->ref.gen) { |
670 | 1.59k | error(errSyntaxError, -1, "Loop in Pages tree"); |
671 | 1.59k | pages[pg-1] = new Page(doc, pg); |
672 | 1.59k | return; |
673 | 1.59k | } |
674 | 25.1k | } |
675 | | |
676 | | // fetch the Page/Pages object |
677 | 160k | pageRefObj.initRef(node->ref.num, node->ref.gen); |
678 | 160k | if (!pageRefObj.fetch(xref, &pageObj)->isDict()) { |
679 | 0 | error(errSyntaxError, -1, "Page tree object is wrong type ({0:s})", |
680 | 0 | pageObj.getTypeName()); |
681 | 0 | pageObj.free(); |
682 | 0 | pageRefObj.free(); |
683 | 0 | pages[pg-1] = new Page(doc, pg); |
684 | 0 | return; |
685 | 0 | } |
686 | | |
687 | | // merge the PageAttrs |
688 | 160k | attrs = new PageAttrs(node->parent ? node->parent->attrs |
689 | 160k | : (PageAttrs *)NULL, |
690 | 160k | pageObj.getDict(), xref); |
691 | | |
692 | | // if "Kids" exists, it's an internal node |
693 | 160k | if (pageObj.dictLookup("Kids", &kidsObj)->isArray()) { |
694 | | |
695 | | // save the PageAttrs |
696 | 12.3k | node->attrs = attrs; |
697 | | |
698 | | // read the kids |
699 | 12.3k | node->kids = new GList(); |
700 | 67.2k | for (i = 0; i < kidsObj.arrayGetLength(); ++i) { |
701 | 54.9k | if (kidsObj.arrayGetNF(i, &kidRefObj)->isRef()) { |
702 | 20.8k | if (kidRefObj.fetch(xref, &kidObj)->isDict()) { |
703 | 17.0k | if (kidObj.dictLookup("Count", &countObj)->isInt()) { |
704 | 1.60k | count = countObj.getInt(); |
705 | 15.4k | } else { |
706 | 15.4k | count = 1; |
707 | 15.4k | } |
708 | 17.0k | countObj.free(); |
709 | 17.0k | node->kids->append(new PageTreeNode(kidRefObj.getRef(), count, |
710 | 17.0k | node)); |
711 | 17.0k | } else { |
712 | 3.82k | error(errSyntaxError, -1, "Page tree object is wrong type ({0:s})", |
713 | 3.82k | kidObj.getTypeName()); |
714 | 3.82k | } |
715 | 20.8k | kidObj.free(); |
716 | 34.1k | } else { |
717 | 34.1k | error(errSyntaxError, -1, |
718 | 34.1k | "Page tree reference is wrong type ({0:s})", |
719 | 34.1k | kidRefObj.getTypeName()); |
720 | 34.1k | } |
721 | 54.9k | kidRefObj.free(); |
722 | 54.9k | } |
723 | | |
724 | 148k | } else { |
725 | | |
726 | | // create the Page object |
727 | 148k | pageRefs[pg-1] = node->ref; |
728 | 148k | pages[pg-1] = new Page(doc, pg, pageObj.getDict(), attrs); |
729 | 148k | if (!pages[pg-1]->isOk()) { |
730 | 2.89k | delete pages[pg-1]; |
731 | 2.89k | pages[pg-1] = new Page(doc, pg); |
732 | 2.89k | } |
733 | | |
734 | 148k | } |
735 | | |
736 | 160k | kidsObj.free(); |
737 | 160k | pageObj.free(); |
738 | 160k | pageRefObj.free(); |
739 | 160k | } |
740 | | |
741 | | // recursively descend the tree |
742 | 408k | if (node->kids) { |
743 | 843k | for (i = 0; i < node->kids->getLength(); ++i) { |
744 | 609k | kidNode = (PageTreeNode *)node->kids->get(i); |
745 | 609k | if (relPg < kidNode->count) { |
746 | 25.9k | loadPage2(pg, relPg, kidNode); |
747 | 25.9k | break; |
748 | 25.9k | } |
749 | 583k | relPg -= kidNode->count; |
750 | 583k | } |
751 | | |
752 | | // this will only happen if the page tree is invalid |
753 | | // (i.e., parent count > sum of children counts) |
754 | 260k | if (i == node->kids->getLength()) { |
755 | 234k | error(errSyntaxError, -1, "Invalid page count in page tree"); |
756 | 234k | pages[pg-1] = new Page(doc, pg); |
757 | 234k | } |
758 | 260k | } |
759 | 408k | } |
760 | | |
761 | 0 | Object *Catalog::getDestOutputProfile(Object *destOutProf) { |
762 | 0 | Object catDict, intents, intent, subtype; |
763 | 0 | int i; |
764 | |
|
765 | 0 | if (!xref->getCatalog(&catDict)->isDict()) { |
766 | 0 | goto err1; |
767 | 0 | } |
768 | 0 | if (!catDict.dictLookup("OutputIntents", &intents)->isArray()) { |
769 | 0 | goto err2; |
770 | 0 | } |
771 | 0 | for (i = 0; i < intents.arrayGetLength(); ++i) { |
772 | 0 | intents.arrayGet(i, &intent); |
773 | 0 | if (!intent.isDict()) { |
774 | 0 | intent.free(); |
775 | 0 | continue; |
776 | 0 | } |
777 | 0 | if (!intent.dictLookup("S", &subtype)->isName("GTS_PDFX")) { |
778 | 0 | subtype.free(); |
779 | 0 | intent.free(); |
780 | 0 | continue; |
781 | 0 | } |
782 | 0 | subtype.free(); |
783 | 0 | if (!intent.dictLookup("DestOutputProfile", destOutProf)->isStream()) { |
784 | 0 | destOutProf->free(); |
785 | 0 | intent.free(); |
786 | 0 | goto err2; |
787 | 0 | } |
788 | 0 | intent.free(); |
789 | 0 | intents.free(); |
790 | 0 | catDict.free(); |
791 | 0 | return destOutProf; |
792 | 0 | } |
793 | | |
794 | 0 | err2: |
795 | 0 | intents.free(); |
796 | 0 | err1: |
797 | 0 | catDict.free(); |
798 | 0 | return NULL; |
799 | 0 | } |
800 | | |
801 | 12.6k | void Catalog::readEmbeddedFileList(Dict *catDict) { |
802 | 12.6k | Object obj1, obj2; |
803 | 12.6k | char *touchedObjs; |
804 | | |
805 | 12.6k | touchedObjs = (char *)gmalloc(xref->getNumObjects()); |
806 | 12.6k | memset(touchedObjs, 0, xref->getNumObjects()); |
807 | | |
808 | | // read the embedded file name tree |
809 | 12.6k | if (catDict->lookup("Names", &obj1)->isDict()) { |
810 | 616 | obj1.dictLookupNF("EmbeddedFiles", &obj2); |
811 | 616 | readEmbeddedFileTree(&obj2, touchedObjs); |
812 | 616 | obj2.free(); |
813 | 616 | } |
814 | 12.6k | obj1.free(); |
815 | | |
816 | | // look for file attachment annotations |
817 | 12.6k | readFileAttachmentAnnots(catDict->lookupNF("Pages", &obj1), touchedObjs); |
818 | 12.6k | obj1.free(); |
819 | | |
820 | 12.6k | gfree(touchedObjs); |
821 | 12.6k | } |
822 | | |
823 | 2.07k | void Catalog::readEmbeddedFileTree(Object *nodeRef, char *touchedObjs) { |
824 | 2.07k | Object node, kidsObj, kidObj; |
825 | 2.07k | Object namesObj, nameObj, fileSpecObj; |
826 | 2.07k | int i; |
827 | | |
828 | | // check for an object loop |
829 | 2.07k | if (nodeRef->isRef()) { |
830 | 85 | if (nodeRef->getRefNum() < 0 || |
831 | 85 | nodeRef->getRefNum() >= xref->getNumObjects() || |
832 | 71 | touchedObjs[nodeRef->getRefNum()]) { |
833 | 71 | return; |
834 | 71 | } |
835 | 14 | touchedObjs[nodeRef->getRefNum()] = 1; |
836 | 14 | xref->fetch(nodeRef->getRefNum(), nodeRef->getRefGen(), &node); |
837 | 1.98k | } else { |
838 | 1.98k | nodeRef->copy(&node); |
839 | 1.98k | } |
840 | | |
841 | 1.99k | if (!node.isDict()) { |
842 | 1.90k | node.free(); |
843 | 1.90k | return; |
844 | 1.90k | } |
845 | | |
846 | 99 | if (checkDictLookup(&node, "Kids", &kidsObj, touchedObjs)->isArray()) { |
847 | 1.50k | for (i = 0; i < kidsObj.arrayGetLength(); ++i) { |
848 | 1.45k | kidsObj.arrayGetNF(i, &kidObj); |
849 | 1.45k | readEmbeddedFileTree(&kidObj, touchedObjs); |
850 | 1.45k | kidObj.free(); |
851 | 1.45k | } |
852 | 52 | } else { |
853 | 52 | if (checkDictLookup(&node, "Names", &namesObj, touchedObjs)->isArray()) { |
854 | 125 | for (i = 0; i+1 < namesObj.arrayGetLength(); ++i) { |
855 | 115 | namesObj.arrayGet(i, &nameObj); |
856 | 115 | namesObj.arrayGet(i+1, &fileSpecObj); |
857 | 115 | readEmbeddedFile(&fileSpecObj, &nameObj); |
858 | 115 | nameObj.free(); |
859 | 115 | fileSpecObj.free(); |
860 | 115 | } |
861 | 10 | } |
862 | 52 | namesObj.free(); |
863 | 52 | } |
864 | 99 | kidsObj.free(); |
865 | | |
866 | 99 | node.free(); |
867 | 99 | } |
868 | | |
869 | | void Catalog::readFileAttachmentAnnots(Object *pageNodeRef, |
870 | 61.8k | char *touchedObjs) { |
871 | 61.8k | Object pageNode, kids, kid, annots, annot, subtype, fileSpec, contents; |
872 | 61.8k | int i; |
873 | | |
874 | | // check for an invalid object reference (e.g., in a damaged PDF file) |
875 | 61.8k | if (pageNodeRef->isRef() && |
876 | 32.6k | (pageNodeRef->getRefNum() < 0 || |
877 | 32.6k | pageNodeRef->getRefNum() >= xref->getNumObjects())) { |
878 | 1.66k | return; |
879 | 1.66k | } |
880 | | |
881 | | // check for a page tree loop |
882 | 60.1k | if (pageNodeRef->isRef()) { |
883 | 31.0k | if (touchedObjs[pageNodeRef->getRefNum()]) { |
884 | 3.43k | return; |
885 | 3.43k | } |
886 | 27.5k | touchedObjs[pageNodeRef->getRefNum()] = 1; |
887 | 27.5k | xref->fetch(pageNodeRef->getRefNum(), pageNodeRef->getRefGen(), &pageNode); |
888 | 29.1k | } else { |
889 | 29.1k | pageNodeRef->copy(&pageNode); |
890 | 29.1k | } |
891 | | |
892 | 56.7k | if (pageNode.isDict()) { |
893 | 27.4k | if (checkDictLookup(&pageNode, "Kids", &kids, touchedObjs)->isArray()) { |
894 | 61.0k | for (i = 0; i < kids.arrayGetLength(); ++i) { |
895 | 49.1k | readFileAttachmentAnnots(kids.arrayGetNF(i, &kid), touchedObjs); |
896 | 49.1k | kid.free(); |
897 | 49.1k | } |
898 | 15.4k | } else { |
899 | 15.4k | if (checkDictLookup(&pageNode, "Annots", |
900 | 15.4k | &annots, touchedObjs)->isArray()) { |
901 | 42.5k | for (i = 0; i < annots.arrayGetLength(); ++i) { |
902 | 40.2k | if (checkArrayGet(&annots, i, &annot, touchedObjs)->isDict()) { |
903 | 8.56k | if (checkDictLookup(&annot, "Subtype", &subtype, touchedObjs) |
904 | 8.56k | ->isName("FileAttachment")) { |
905 | 0 | if (checkDictLookup(&annot, "FS", &fileSpec, touchedObjs)) { |
906 | 0 | readEmbeddedFile(&fileSpec, |
907 | 0 | checkDictLookup(&annot, "Contents", |
908 | 0 | &contents, touchedObjs)); |
909 | 0 | contents.free(); |
910 | 0 | } |
911 | 0 | fileSpec.free(); |
912 | 0 | } |
913 | 8.56k | subtype.free(); |
914 | 8.56k | } |
915 | 40.2k | annot.free(); |
916 | 40.2k | } |
917 | 2.32k | } |
918 | 15.4k | annots.free(); |
919 | 15.4k | } |
920 | 27.4k | kids.free(); |
921 | 27.4k | } |
922 | | |
923 | 56.7k | pageNode.free(); |
924 | 56.7k | } |
925 | | |
926 | 115 | void Catalog::readEmbeddedFile(Object *fileSpec, Object *name1) { |
927 | 115 | Object name2, efObj, streamObj; |
928 | 115 | GString *s; |
929 | 115 | TextString *name; |
930 | | |
931 | 115 | if (fileSpec->isDict()) { |
932 | 15 | if (fileSpec->dictLookup("UF", &name2)->isString()) { |
933 | 1 | name = new TextString(name2.getString()); |
934 | 14 | } else { |
935 | 14 | name2.free(); |
936 | 14 | if (fileSpec->dictLookup("F", &name2)->isString()) { |
937 | 3 | name = new TextString(name2.getString()); |
938 | 11 | } else if (name1 && name1->isString()) { |
939 | 3 | name = new TextString(name1->getString()); |
940 | 8 | } else { |
941 | 8 | s = new GString("?"); |
942 | 8 | name = new TextString(s); |
943 | 8 | delete s; |
944 | 8 | } |
945 | 14 | } |
946 | 15 | name2.free(); |
947 | 15 | if (fileSpec->dictLookup("EF", &efObj)->isDict()) { |
948 | 4 | if (efObj.dictLookupNF("F", &streamObj)->isRef()) { |
949 | 3 | if (!embeddedFiles) { |
950 | 3 | embeddedFiles = new GList(); |
951 | 3 | } |
952 | 3 | embeddedFiles->append(new EmbeddedFile(name, &streamObj)); |
953 | 3 | } else { |
954 | 1 | delete name; |
955 | 1 | } |
956 | 4 | streamObj.free(); |
957 | 11 | } else { |
958 | 11 | delete name; |
959 | 11 | } |
960 | 15 | efObj.free(); |
961 | 15 | } |
962 | 115 | } |
963 | | |
964 | 0 | int Catalog::getNumEmbeddedFiles() { |
965 | 0 | return embeddedFiles ? embeddedFiles->getLength() : 0; |
966 | 0 | } |
967 | | |
968 | 0 | Unicode *Catalog::getEmbeddedFileName(int idx) { |
969 | 0 | return ((EmbeddedFile *)embeddedFiles->get(idx))->name->getUnicode(); |
970 | 0 | } |
971 | | |
972 | 0 | int Catalog::getEmbeddedFileNameLength(int idx) { |
973 | 0 | return ((EmbeddedFile *)embeddedFiles->get(idx))->name->getLength(); |
974 | 0 | } |
975 | | |
976 | 0 | Object *Catalog::getEmbeddedFileStreamRef(int idx) { |
977 | 0 | return &((EmbeddedFile *)embeddedFiles->get(idx))->streamRef; |
978 | 0 | } |
979 | | |
980 | 0 | Object *Catalog::getEmbeddedFileStreamObj(int idx, Object *strObj) { |
981 | 0 | ((EmbeddedFile *)embeddedFiles->get(idx))->streamRef.fetch(xref, strObj); |
982 | 0 | if (!strObj->isStream()) { |
983 | 0 | strObj->free(); |
984 | 0 | return NULL; |
985 | 0 | } |
986 | 0 | return strObj; |
987 | 0 | } |
988 | | |
989 | 340 | void Catalog::readPageLabelTree(Object *root) { |
990 | 340 | PageLabelNode *label0, *label1; |
991 | 340 | char *touchedObjs; |
992 | 340 | int i; |
993 | | |
994 | 340 | touchedObjs = (char *)gmalloc(xref->getNumObjects()); |
995 | 340 | memset(touchedObjs, 0, xref->getNumObjects()); |
996 | 340 | pageLabels = new GList(); |
997 | 340 | readPageLabelTree2(root, touchedObjs); |
998 | 340 | gfree(touchedObjs); |
999 | | |
1000 | 340 | if (pageLabels->getLength() == 0) { |
1001 | 194 | deleteGList(pageLabels, PageLabelNode); |
1002 | 194 | pageLabels = NULL; |
1003 | 194 | return; |
1004 | 194 | } |
1005 | | |
1006 | | // set lastPage in each node |
1007 | 146 | label0 = (PageLabelNode *)pageLabels->get(0); |
1008 | 272 | for (i = 1; i < pageLabels->getLength(); ++i) { |
1009 | 126 | label1 = (PageLabelNode *)pageLabels->get(i); |
1010 | 126 | label0->lastPage = label1->firstPage - 1; |
1011 | 126 | label0 = label1; |
1012 | 126 | } |
1013 | 146 | label0->lastPage = numPages; |
1014 | 146 | } |
1015 | | |
1016 | 650 | void Catalog::readPageLabelTree2(Object *nodeRef, char *touchedObjs) { |
1017 | 650 | Object node, nums, num, labelObj, kidsRef, kids, kid; |
1018 | 650 | int i; |
1019 | | |
1020 | | // check for an object loop |
1021 | 650 | if (nodeRef->isRef()) { |
1022 | 103 | if (nodeRef->getRefNum() < 0 || |
1023 | 103 | nodeRef->getRefNum() >= xref->getNumObjects() || |
1024 | 90 | touchedObjs[nodeRef->getRefNum()]) { |
1025 | 43 | return; |
1026 | 43 | } |
1027 | 60 | touchedObjs[nodeRef->getRefNum()] = 1; |
1028 | 60 | xref->fetch(nodeRef->getRefNum(), nodeRef->getRefGen(), &node); |
1029 | 547 | } else { |
1030 | 547 | nodeRef->copy(&node); |
1031 | 547 | } |
1032 | | |
1033 | 607 | if (!node.isDict()) { |
1034 | 237 | node.free(); |
1035 | 237 | return; |
1036 | 237 | } |
1037 | | |
1038 | 370 | if (node.dictLookup("Nums", &nums)->isArray()) { |
1039 | 1.24k | for (i = 0; i < nums.arrayGetLength() - 1; i += 2) { |
1040 | 1.05k | if (nums.arrayGet(i, &num)->isInt()) { |
1041 | 436 | if (nums.arrayGet(i+1, &labelObj)->isDict()) { |
1042 | 272 | pageLabels->append(new PageLabelNode(num.getInt(), |
1043 | 272 | labelObj.getDict())); |
1044 | 272 | } |
1045 | 436 | labelObj.free(); |
1046 | 436 | } |
1047 | 1.05k | num.free(); |
1048 | 1.05k | } |
1049 | 191 | } |
1050 | 370 | nums.free(); |
1051 | | |
1052 | | // check for an object loop in the Kids entry |
1053 | 370 | if (node.dictLookupNF("Kids", &kidsRef)->isRef()) { |
1054 | 2 | if (kidsRef.getRefNum() < 0 || |
1055 | 2 | kidsRef.getRefNum() >= doc->getXRef()->getNumObjects() || |
1056 | 2 | touchedObjs[kidsRef.getRefNum()]) { |
1057 | 2 | kidsRef.free(); |
1058 | 2 | node.free(); |
1059 | 2 | return; |
1060 | 2 | } |
1061 | 0 | touchedObjs[kidsRef.getRefNum()] = 1; |
1062 | 0 | kidsRef.fetch(doc->getXRef(), &kids); |
1063 | 368 | } else { |
1064 | 368 | kidsRef.copy(&kids); |
1065 | 368 | } |
1066 | 368 | kidsRef.free(); |
1067 | | |
1068 | 368 | if (kids.isArray()) { |
1069 | 337 | for (i = 0; i < kids.arrayGetLength(); ++i) { |
1070 | 310 | kids.arrayGetNF(i, &kid); |
1071 | 310 | readPageLabelTree2(&kid, touchedObjs); |
1072 | 310 | kid.free(); |
1073 | 310 | } |
1074 | 27 | } |
1075 | 368 | kids.free(); |
1076 | | |
1077 | 368 | node.free(); |
1078 | 368 | } |
1079 | | |
1080 | 0 | TextString *Catalog::getPageLabel(int pageNum) { |
1081 | 0 | PageLabelNode *label; |
1082 | 0 | TextString *ts; |
1083 | 0 | int pageRangeNum; |
1084 | 0 | GString *suffix; |
1085 | |
|
1086 | 0 | if (!pageLabels || !(label = findPageLabel(pageNum))) { |
1087 | 0 | return NULL; |
1088 | 0 | } |
1089 | | |
1090 | 0 | ts = new TextString(label->prefix); |
1091 | |
|
1092 | 0 | pageRangeNum = label->start + (pageNum - label->firstPage); |
1093 | |
|
1094 | 0 | suffix = NULL; |
1095 | 0 | if (label->style == 'D') { |
1096 | 0 | suffix = GString::format("{0:d}", pageRangeNum); |
1097 | 0 | } else if (label->style == 'R') { |
1098 | 0 | suffix = makeRomanNumeral(pageRangeNum, gTrue); |
1099 | 0 | } else if (label->style == 'r') { |
1100 | 0 | suffix = makeRomanNumeral(pageRangeNum, gFalse); |
1101 | 0 | } else if (label->style == 'A') { |
1102 | 0 | suffix = makeLetterLabel(pageRangeNum, gTrue); |
1103 | 0 | } else if (label->style == 'a') { |
1104 | 0 | suffix = makeLetterLabel(pageRangeNum, gFalse); |
1105 | 0 | } |
1106 | 0 | if (suffix) { |
1107 | 0 | ts->append(suffix); |
1108 | 0 | delete suffix; |
1109 | 0 | } |
1110 | |
|
1111 | 0 | return ts; |
1112 | 0 | } |
1113 | | |
1114 | 0 | PageLabelNode *Catalog::findPageLabel(int pageNum) { |
1115 | 0 | PageLabelNode *label; |
1116 | 0 | int i; |
1117 | | |
1118 | | //~ this could use a binary search |
1119 | 0 | for (i = 0; i < pageLabels->getLength(); ++i) { |
1120 | 0 | label = (PageLabelNode *)pageLabels->get(i); |
1121 | 0 | if (pageNum >= label->firstPage && pageNum <= label->lastPage) { |
1122 | 0 | return label; |
1123 | 0 | } |
1124 | 0 | } |
1125 | 0 | return NULL; |
1126 | 0 | } |
1127 | | |
1128 | 0 | GString *Catalog::makeRomanNumeral(int num, GBool uppercase) { |
1129 | 0 | GString *s; |
1130 | |
|
1131 | 0 | s = new GString(); |
1132 | 0 | while (num >= 1000) { |
1133 | 0 | s->append(uppercase ? 'M' : 'm'); |
1134 | 0 | num -= 1000; |
1135 | 0 | } |
1136 | 0 | if (num >= 900) { |
1137 | 0 | s->append(uppercase ? "CM" : "cm"); |
1138 | 0 | num -= 900; |
1139 | 0 | } else if (num >= 500) { |
1140 | 0 | s->append(uppercase ? 'D' : 'd'); |
1141 | 0 | num -= 500; |
1142 | 0 | } else if (num >= 400) { |
1143 | 0 | s->append(uppercase ? "CD" : "cd"); |
1144 | 0 | num -= 400; |
1145 | 0 | } |
1146 | 0 | while (num >= 100) { |
1147 | 0 | s->append(uppercase ? 'C' : 'c'); |
1148 | 0 | num -= 100; |
1149 | 0 | } |
1150 | 0 | if (num >= 90) { |
1151 | 0 | s->append(uppercase ? "XC" : "xc"); |
1152 | 0 | num -= 90; |
1153 | 0 | } else if (num >= 50) { |
1154 | 0 | s->append(uppercase ? 'L' : 'l'); |
1155 | 0 | num -= 50; |
1156 | 0 | } else if (num >= 40) { |
1157 | 0 | s->append(uppercase ? "XL" : "xl"); |
1158 | 0 | num -= 40; |
1159 | 0 | } |
1160 | 0 | while (num >= 10) { |
1161 | 0 | s->append(uppercase ? 'X' : 'x'); |
1162 | 0 | num -= 10; |
1163 | 0 | } |
1164 | 0 | if (num >= 9) { |
1165 | 0 | s->append(uppercase ? "IX" : "ix"); |
1166 | 0 | num -= 9; |
1167 | 0 | } else if (num >= 5) { |
1168 | 0 | s->append(uppercase ? 'V' : 'v'); |
1169 | 0 | num -= 5; |
1170 | 0 | } else if (num >= 4) { |
1171 | 0 | s->append(uppercase ? "IV" : "iv"); |
1172 | 0 | num -= 4; |
1173 | 0 | } |
1174 | 0 | while (num >= 1) { |
1175 | 0 | s->append(uppercase ? 'I' : 'i'); |
1176 | 0 | num -= 1; |
1177 | 0 | } |
1178 | 0 | return s; |
1179 | 0 | } |
1180 | | |
1181 | 0 | GString *Catalog::makeLetterLabel(int num, GBool uppercase) { |
1182 | 0 | GString *s; |
1183 | 0 | int m, n, i; |
1184 | |
|
1185 | 0 | m = (num - 1) / 26 + 1; |
1186 | 0 | n = (num - 1) % 26; |
1187 | 0 | s = new GString(); |
1188 | 0 | for (i = 0; i < m; ++i) { |
1189 | 0 | s->append((char)((uppercase ? 'A' : 'a') + n)); |
1190 | 0 | } |
1191 | 0 | return s; |
1192 | 0 | } |
1193 | | |
1194 | 0 | int Catalog::getPageNumFromPageLabel(TextString *pageLabel) { |
1195 | 0 | PageLabelNode *label; |
1196 | 0 | int pageNum, prefixLength, i, n; |
1197 | |
|
1198 | 0 | if (!pageLabels) { |
1199 | 0 | return -1; |
1200 | 0 | } |
1201 | 0 | for (i = 0; i < pageLabels->getLength(); ++i) { |
1202 | 0 | label = (PageLabelNode *)pageLabels->get(i); |
1203 | 0 | prefixLength = label->prefix->getLength(); |
1204 | 0 | if (pageLabel->getLength() < prefixLength || |
1205 | 0 | memcmp(pageLabel->getUnicode(), label->prefix->getUnicode(), |
1206 | 0 | prefixLength * sizeof(Unicode))) { |
1207 | 0 | continue; |
1208 | 0 | } |
1209 | 0 | if (label->style == '\0' && pageLabel->getLength() == prefixLength) { |
1210 | 0 | return label->firstPage; |
1211 | 0 | } |
1212 | 0 | if (!convertPageLabelToInt(pageLabel, prefixLength, label->style, &n)) { |
1213 | 0 | continue; |
1214 | 0 | } |
1215 | 0 | if (n < label->start) { |
1216 | 0 | continue; |
1217 | 0 | } |
1218 | 0 | pageNum = label->firstPage + n - label->start; |
1219 | 0 | if (pageNum <= label->lastPage) { |
1220 | 0 | return pageNum; |
1221 | 0 | } |
1222 | 0 | } |
1223 | 0 | return -1; |
1224 | 0 | } |
1225 | | |
1226 | | // Attempts to convert pageLabel[prefixLength .. end] to an integer, |
1227 | | // following the specified page label style. If successful, sets *n |
1228 | | // and returns true; else returns false. |
1229 | | GBool Catalog::convertPageLabelToInt(TextString *pageLabel, int prefixLength, |
1230 | 0 | char style, int *n) { |
1231 | 0 | Unicode *u; |
1232 | 0 | Unicode delta; |
1233 | 0 | int len, i; |
1234 | |
|
1235 | 0 | len = pageLabel->getLength(); |
1236 | 0 | if (len <= prefixLength) { |
1237 | 0 | return gFalse; |
1238 | 0 | } |
1239 | 0 | u = pageLabel->getUnicode(); |
1240 | 0 | if (style == 'D') { |
1241 | 0 | *n = 0; |
1242 | 0 | for (i = prefixLength; i < len; ++i) { |
1243 | 0 | if (u[i] < (Unicode)'0' || u[i] > (Unicode)'9') { |
1244 | 0 | return gFalse; |
1245 | 0 | } |
1246 | 0 | *n = *n * 10 + (u[i] - (Unicode)'0'); |
1247 | 0 | } |
1248 | 0 | return gTrue; |
1249 | 0 | } else if (style == 'R' || style == 'r') { |
1250 | 0 | delta = style - 'R'; |
1251 | 0 | *n = 0; |
1252 | 0 | i = prefixLength; |
1253 | 0 | while (i < len && u[i] == (Unicode)'M' + delta) { |
1254 | 0 | *n += 1000; |
1255 | 0 | ++i; |
1256 | 0 | } |
1257 | 0 | if (i+1 < len && u[i] == (Unicode)'C' + delta && |
1258 | 0 | u[i+1] == (Unicode)'M' + delta) { |
1259 | 0 | *n += 900; |
1260 | 0 | i += 2; |
1261 | 0 | } else if (i < len && u[i] == (Unicode)'D' + delta) { |
1262 | 0 | *n += 500; |
1263 | 0 | ++i; |
1264 | 0 | } else if (i+1 < len && u[i] == (Unicode)'C' + delta && |
1265 | 0 | u[i+1] == (Unicode)'D' + delta) { |
1266 | 0 | *n += 400; |
1267 | 0 | i += 2; |
1268 | 0 | } |
1269 | 0 | while (i < len && u[i] == (Unicode)'C' + delta) { |
1270 | 0 | *n += 100; |
1271 | 0 | ++i; |
1272 | 0 | } |
1273 | 0 | if (i+1 < len && u[i] == (Unicode)'X' + delta && |
1274 | 0 | u[i+1] == (Unicode)'C' + delta) { |
1275 | 0 | *n += 90; |
1276 | 0 | i += 2; |
1277 | 0 | } else if (i < len && u[i] == (Unicode)'L' + delta) { |
1278 | 0 | *n += 50; |
1279 | 0 | ++i; |
1280 | 0 | } else if (i+1 < len && u[i] == (Unicode)'X' + delta && |
1281 | 0 | u[i+1] == (Unicode)'L' + delta) { |
1282 | 0 | *n += 40; |
1283 | 0 | i += 2; |
1284 | 0 | } |
1285 | 0 | while (i < len && u[i] == (Unicode)'X' + delta) { |
1286 | 0 | *n += 10; |
1287 | 0 | ++i; |
1288 | 0 | } |
1289 | 0 | if (i+1 < len && u[i] == (Unicode)'I' + delta && |
1290 | 0 | u[i+1] == (Unicode)'X' + delta) { |
1291 | 0 | *n += 9; |
1292 | 0 | i += 2; |
1293 | 0 | } else if (i < len && u[i] == (Unicode)'V' + delta) { |
1294 | 0 | *n += 5; |
1295 | 0 | ++i; |
1296 | 0 | } else if (i+1 < len && u[i] == (Unicode)'I' + delta && |
1297 | 0 | u[i+1] == (Unicode)'V' + delta) { |
1298 | 0 | *n += 4; |
1299 | 0 | i += 2; |
1300 | 0 | } |
1301 | 0 | while (i < len && u[i] == (Unicode)'I' + delta) { |
1302 | 0 | *n += 1; |
1303 | 0 | ++i; |
1304 | 0 | } |
1305 | 0 | return i == len; |
1306 | 0 | } else if (style == 'A' || style == 'a') { |
1307 | 0 | if (u[prefixLength] < (Unicode)style || |
1308 | 0 | u[prefixLength] > (Unicode)style + 25) { |
1309 | 0 | return gFalse; |
1310 | 0 | } |
1311 | 0 | for (i = prefixLength + 1; i < len; ++i) { |
1312 | 0 | if (u[i] != u[prefixLength]) { |
1313 | 0 | return gFalse; |
1314 | 0 | } |
1315 | 0 | } |
1316 | 0 | *n = (len - prefixLength - 1) * 26 + (u[prefixLength] - (Unicode)style) + 1; |
1317 | 0 | return gTrue; |
1318 | 0 | } |
1319 | 0 | return gFalse; |
1320 | 0 | } |
1321 | | |
1322 | 0 | GBool Catalog::usesJavaScript() { |
1323 | 0 | Object catDict; |
1324 | 0 | if (!xref->getCatalog(&catDict)->isDict()) { |
1325 | 0 | catDict.free(); |
1326 | 0 | return gFalse; |
1327 | 0 | } |
1328 | | |
1329 | 0 | GBool usesJS = gFalse; |
1330 | | |
1331 | | // check for Catalog.Names.JavaScript |
1332 | 0 | Object namesObj; |
1333 | 0 | if (catDict.dictLookup("Names", &namesObj)->isDict()) { |
1334 | 0 | Object jsNamesObj; |
1335 | 0 | namesObj.dictLookup("JavaScript", &jsNamesObj); |
1336 | 0 | if (jsNamesObj.isDict()) { |
1337 | 0 | usesJS = gTrue; |
1338 | 0 | } |
1339 | 0 | jsNamesObj.free(); |
1340 | 0 | } |
1341 | 0 | namesObj.free(); |
1342 | | |
1343 | | // look for JavaScript actionas in Page.AA |
1344 | 0 | if (!usesJS) { |
1345 | 0 | char *touchedObjs = (char *)gmalloc(xref->getNumObjects()); |
1346 | 0 | memset(touchedObjs, 0, xref->getNumObjects()); |
1347 | 0 | Object pagesObj; |
1348 | 0 | usesJS = scanPageTreeForJavaScript(catDict.dictLookupNF("Pages", &pagesObj), |
1349 | 0 | touchedObjs); |
1350 | 0 | pagesObj.free(); |
1351 | 0 | gfree(touchedObjs); |
1352 | 0 | } |
1353 | |
|
1354 | 0 | catDict.free(); |
1355 | |
|
1356 | 0 | return usesJS; |
1357 | 0 | } |
1358 | | |
1359 | | GBool Catalog::scanPageTreeForJavaScript(Object *pageNodeRef, |
1360 | 0 | char *touchedObjs) { |
1361 | | // check for an invalid object reference (e.g., in a damaged PDF file) |
1362 | 0 | if (pageNodeRef->isRef() && |
1363 | 0 | (pageNodeRef->getRefNum() < 0 || |
1364 | 0 | pageNodeRef->getRefNum() >= xref->getNumObjects())) { |
1365 | 0 | return gFalse; |
1366 | 0 | } |
1367 | | |
1368 | | // check for a page tree loop |
1369 | 0 | Object pageNode; |
1370 | 0 | if (pageNodeRef->isRef()) { |
1371 | 0 | if (touchedObjs[pageNodeRef->getRefNum()]) { |
1372 | 0 | return gFalse; |
1373 | 0 | } |
1374 | 0 | touchedObjs[pageNodeRef->getRefNum()] = 1; |
1375 | 0 | xref->fetch(pageNodeRef->getRefNum(), pageNodeRef->getRefGen(), &pageNode); |
1376 | 0 | } else { |
1377 | 0 | pageNodeRef->copy(&pageNode); |
1378 | 0 | } |
1379 | | |
1380 | | // scan the page tree node |
1381 | 0 | GBool usesJS = gFalse; |
1382 | 0 | if (pageNode.isDict()) { |
1383 | 0 | Object kids; |
1384 | 0 | if (checkDictLookup(&pageNode, "Kids", &kids, touchedObjs)->isArray()) { |
1385 | 0 | for (int i = 0; i < kids.arrayGetLength() && !usesJS; ++i) { |
1386 | 0 | Object kid; |
1387 | 0 | if (scanPageTreeForJavaScript(kids.arrayGetNF(i, &kid), touchedObjs)) { |
1388 | 0 | usesJS = gTrue; |
1389 | 0 | } |
1390 | 0 | kid.free(); |
1391 | 0 | } |
1392 | 0 | } else { |
1393 | | |
1394 | | // scan Page.AA |
1395 | 0 | Object pageAA; |
1396 | 0 | if (checkDictLookup(&pageNode, "AA", &pageAA, touchedObjs)->isDict()) { |
1397 | 0 | if (scanAAForJavaScript(&pageAA)) { |
1398 | 0 | usesJS = gTrue; |
1399 | 0 | } |
1400 | 0 | } |
1401 | 0 | pageAA.free(); |
1402 | | |
1403 | | // scanPage.Annots |
1404 | 0 | if (!usesJS) { |
1405 | 0 | Object annots; |
1406 | 0 | if (checkDictLookup(&pageNode, "Annots", |
1407 | 0 | &annots, touchedObjs)->isArray()) { |
1408 | 0 | for (int i = 0; i < annots.arrayGetLength() && !usesJS; ++i) { |
1409 | 0 | Object annot; |
1410 | 0 | if (checkArrayGet(&annots, i, &annot, touchedObjs)->isDict()) { |
1411 | 0 | Object annotAA; |
1412 | 0 | if (checkDictLookup(&annot, "AA", &annotAA, |
1413 | 0 | touchedObjs)->isDict()) { |
1414 | 0 | if (scanAAForJavaScript(&annotAA)) { |
1415 | 0 | usesJS = gTrue; |
1416 | 0 | } |
1417 | 0 | } |
1418 | 0 | annotAA.free(); |
1419 | 0 | } |
1420 | 0 | annot.free(); |
1421 | 0 | } |
1422 | 0 | } |
1423 | 0 | annots.free(); |
1424 | 0 | } |
1425 | 0 | } |
1426 | 0 | kids.free(); |
1427 | 0 | } |
1428 | |
|
1429 | 0 | pageNode.free(); |
1430 | |
|
1431 | 0 | return usesJS; |
1432 | 0 | } |
1433 | | |
1434 | 0 | GBool Catalog::scanAAForJavaScript(Object *aaObj) { |
1435 | 0 | GBool usesJS = gFalse; |
1436 | 0 | for (int i = 0; i < aaObj->dictGetLength() && !usesJS; ++i) { |
1437 | 0 | Object action; |
1438 | 0 | if (aaObj->dictGetVal(i, &action)->isDict()) { |
1439 | 0 | Object js; |
1440 | 0 | if (!action.dictLookupNF("JS", &js)->isNull()) { |
1441 | 0 | usesJS = gTrue; |
1442 | 0 | } |
1443 | 0 | js.free(); |
1444 | 0 | } |
1445 | 0 | action.free(); |
1446 | 0 | } |
1447 | 0 | return usesJS; |
1448 | 0 | } |
1449 | | |
1450 | | Object *Catalog::checkDictLookup(Object *dictObj, const char *key, |
1451 | 51.6k | Object *element, char *touchedObjs) { |
1452 | 51.6k | Object refObj; |
1453 | 51.6k | dictObj->dictLookupNF(key, &refObj); |
1454 | 51.6k | if (refObj.isRef()) { |
1455 | 1.02k | int num = refObj.getRefNum(); |
1456 | 1.02k | if (num >= 0 && num < xref->getNumObjects() && !touchedObjs[num]) { |
1457 | 967 | touchedObjs[num] = 1; |
1458 | 967 | xref->fetch(num, refObj.getRefGen(), element); |
1459 | 967 | } else { |
1460 | 62 | element->initNull(); |
1461 | 62 | } |
1462 | 1.02k | refObj.free(); |
1463 | 50.5k | } else { |
1464 | 50.5k | *element = refObj; |
1465 | 50.5k | } |
1466 | 51.6k | return element; |
1467 | 51.6k | } |
1468 | | |
1469 | | Object *Catalog::checkArrayGet(Object *arrayObj, int i, |
1470 | 40.2k | Object *element, char *touchedObjs) { |
1471 | 40.2k | Object refObj; |
1472 | 40.2k | arrayObj->arrayGetNF(i, &refObj); |
1473 | 40.2k | if (refObj.isRef()) { |
1474 | 14.1k | int num = refObj.getRefNum(); |
1475 | 14.1k | if (num >= 0 && num < xref->getNumObjects() && !touchedObjs[num]) { |
1476 | 11.7k | touchedObjs[num] = 1; |
1477 | 11.7k | xref->fetch(num, refObj.getRefGen(), element); |
1478 | 11.7k | } else { |
1479 | 2.46k | element->initNull(); |
1480 | 2.46k | } |
1481 | 14.1k | refObj.free(); |
1482 | 26.0k | } else { |
1483 | 26.0k | *element = refObj; |
1484 | 26.0k | } |
1485 | 40.2k | return element; |
1486 | 40.2k | } |