/src/qpdf/libqpdf/NNTree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/NNTree.hh> |
2 | | |
3 | | #include <qpdf/QTC.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | #include <exception> |
7 | | |
8 | | static std::string |
9 | | get_description(QPDFObjectHandle& node) |
10 | 11.7k | { |
11 | 11.7k | std::string result("Name/Number tree node"); |
12 | 11.7k | if (node.isIndirect()) { |
13 | 8.75k | result += " (object " + std::to_string(node.getObjectID()) + ")"; |
14 | 8.75k | } |
15 | 11.7k | return result; |
16 | 11.7k | } |
17 | | |
18 | | static void |
19 | | warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
20 | 9.89k | { |
21 | 9.89k | qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); |
22 | 9.89k | } |
23 | | |
24 | | static void |
25 | | error(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
26 | 1.83k | { |
27 | 1.83k | throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg); |
28 | 1.83k | } |
29 | | |
30 | | NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : |
31 | | impl(impl), |
32 | | item_number(-1) |
33 | 75.3k | { |
34 | 75.3k | } |
35 | | |
36 | | void |
37 | | NNTreeIterator::updateIValue(bool allow_invalid) |
38 | 103k | { |
39 | | // ivalue should never be used inside the class since we return a pointer/reference to it. Every |
40 | | // bit of code that ever changes what object the iterator points to should take care to call |
41 | | // updateIValue. Failure to do this means that any old references to *iter will point to |
42 | | // incorrect objects, though the next dereference of the iterator will fix it. This isn't |
43 | | // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise |
44 | | // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to |
45 | | // verify by reasoning about the code that this constraint is always satisfied. Whenever we |
46 | | // update what the iterator points to, we should call setItemNumber, which calls this. If we |
47 | | // change what the iterator points to in some other way, such as replacing a value or removing |
48 | | // an item and making the iterator point at a different item in potentially the same position, |
49 | | // we must call updateIValue as well. These cases are handled, and for good measure, we also |
50 | | // call updateIValue in operator* and operator->. |
51 | | |
52 | 103k | bool okay = false; |
53 | 103k | if ((item_number >= 0) && this->node.isDictionary()) { |
54 | 100k | auto items = this->node.getKey(impl.details.itemsKey()); |
55 | 100k | if (this->item_number + 1 < items.getArrayNItems()) { |
56 | 100k | okay = true; |
57 | 100k | this->ivalue.first = items.getArrayItem(this->item_number); |
58 | 100k | this->ivalue.second = items.getArrayItem(1 + this->item_number); |
59 | 100k | } else { |
60 | 421 | error(impl.qpdf, node, "update ivalue: items array is too short"); |
61 | 421 | } |
62 | 100k | } |
63 | 103k | if (!okay) { |
64 | 2.64k | if (!allow_invalid) { |
65 | 0 | throw std::logic_error("attempt made to dereference an invalid" |
66 | 0 | " name/number tree iterator"); |
67 | 0 | } |
68 | 2.64k | this->ivalue.first = QPDFObjectHandle(); |
69 | 2.64k | this->ivalue.second = QPDFObjectHandle(); |
70 | 2.64k | } |
71 | 103k | } |
72 | | |
73 | | NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) : |
74 | | node(node), |
75 | | kid_number(kid_number) |
76 | 12.4k | { |
77 | 12.4k | } |
78 | | |
79 | | QPDFObjectHandle |
80 | | NNTreeIterator::getNextKid(PathElement& pe, bool backward) |
81 | 1.35k | { |
82 | 1.35k | QPDFObjectHandle result; |
83 | 1.35k | bool found = false; |
84 | 3.68k | while (!found) { |
85 | 2.33k | pe.kid_number += backward ? -1 : 1; |
86 | 2.33k | auto kids = pe.node.getKey("/Kids"); |
87 | 2.33k | if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) { |
88 | 1.79k | result = kids.getArrayItem(pe.kid_number); |
89 | 1.79k | if (result.isDictionary() && |
90 | 1.79k | (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) { |
91 | 812 | found = true; |
92 | 984 | } else { |
93 | 984 | QTC::TC("qpdf", "NNTree skip invalid kid"); |
94 | 984 | warn( |
95 | 984 | impl.qpdf, |
96 | 984 | pe.node, |
97 | 984 | ("skipping over invalid kid at index " + std::to_string(pe.kid_number))); |
98 | 984 | } |
99 | 1.79k | } else { |
100 | 534 | result = QPDFObjectHandle::newNull(); |
101 | 534 | found = true; |
102 | 534 | } |
103 | 2.33k | } |
104 | 1.35k | return result; |
105 | 1.35k | } |
106 | | |
107 | | bool |
108 | | NNTreeIterator::valid() const |
109 | 95.9k | { |
110 | 95.9k | return this->item_number >= 0; |
111 | 95.9k | } |
112 | | |
113 | | void |
114 | | NNTreeIterator::increment(bool backward) |
115 | 14.3k | { |
116 | 14.3k | if (this->item_number < 0) { |
117 | 0 | QTC::TC("qpdf", "NNTree increment end()"); |
118 | 0 | deepen(impl.oh, !backward, true); |
119 | 0 | return; |
120 | 0 | } |
121 | 14.3k | bool found_valid_key = false; |
122 | 33.2k | while (valid() && (!found_valid_key)) { |
123 | 18.8k | this->item_number += backward ? -2 : 2; |
124 | 18.8k | auto items = this->node.getKey(impl.details.itemsKey()); |
125 | 18.8k | if ((this->item_number < 0) || (this->item_number >= items.getArrayNItems())) { |
126 | 928 | bool found = false; |
127 | 928 | setItemNumber(QPDFObjectHandle(), -1); |
128 | 2.28k | while (!(found || this->path.empty())) { |
129 | 1.35k | auto& element = this->path.back(); |
130 | 1.35k | auto pe_node = getNextKid(element, backward); |
131 | 1.35k | if (pe_node.isNull()) { |
132 | 534 | this->path.pop_back(); |
133 | 824 | } else { |
134 | 824 | found = deepen(pe_node, !backward, false); |
135 | 824 | } |
136 | 1.35k | } |
137 | 928 | } |
138 | 18.8k | if (this->item_number >= 0) { |
139 | 18.2k | items = this->node.getKey(impl.details.itemsKey()); |
140 | 18.2k | if (this->item_number + 1 >= items.getArrayNItems()) { |
141 | 212 | QTC::TC("qpdf", "NNTree skip item at end of short items"); |
142 | 212 | warn(impl.qpdf, this->node, "items array doesn't have enough elements"); |
143 | 18.0k | } else if (!impl.details.keyValid(items.getArrayItem(this->item_number))) { |
144 | 4.26k | QTC::TC("qpdf", "NNTree skip invalid key"); |
145 | 4.26k | warn( |
146 | 4.26k | impl.qpdf, |
147 | 4.26k | this->node, |
148 | 4.26k | ("item " + std::to_string(this->item_number) + " has the wrong type")); |
149 | 13.7k | } else { |
150 | 13.7k | found_valid_key = true; |
151 | 13.7k | } |
152 | 18.2k | } |
153 | 18.8k | } |
154 | 14.3k | } |
155 | | |
156 | | void |
157 | | NNTreeIterator::resetLimits(QPDFObjectHandle node, std::list<PathElement>::iterator parent) |
158 | 7.35k | { |
159 | 7.35k | bool done = false; |
160 | 9.88k | while (!done) { |
161 | 7.35k | if (parent == this->path.end()) { |
162 | 4.83k | QTC::TC("qpdf", "NNTree remove limits from root"); |
163 | 4.83k | node.removeKey("/Limits"); |
164 | 4.83k | done = true; |
165 | 4.83k | break; |
166 | 4.83k | } |
167 | 2.52k | auto kids = node.getKey("/Kids"); |
168 | 2.52k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
169 | 2.52k | auto items = node.getKey(impl.details.itemsKey()); |
170 | 2.52k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
171 | | |
172 | 2.52k | bool changed = true; |
173 | 2.52k | QPDFObjectHandle first; |
174 | 2.52k | QPDFObjectHandle last; |
175 | 2.52k | if (nitems >= 2) { |
176 | 2.42k | first = items.getArrayItem(0); |
177 | 2.42k | last = items.getArrayItem((nitems - 1) & ~1); |
178 | 2.42k | } else if (nkids > 0) { |
179 | 100 | auto first_kid = kids.getArrayItem(0); |
180 | 100 | auto last_kid = kids.getArrayItem(nkids - 1); |
181 | 100 | if (first_kid.isDictionary() && last_kid.isDictionary()) { |
182 | 100 | auto first_limits = first_kid.getKey("/Limits"); |
183 | 100 | auto last_limits = last_kid.getKey("/Limits"); |
184 | 100 | if (first_limits.isArray() && (first_limits.getArrayNItems() >= 2) && |
185 | 100 | last_limits.isArray() && (last_limits.getArrayNItems() >= 2)) { |
186 | 100 | first = first_limits.getArrayItem(0); |
187 | 100 | last = last_limits.getArrayItem(1); |
188 | 100 | } |
189 | 100 | } |
190 | 100 | } |
191 | 2.52k | if (first.isInitialized() && last.isInitialized()) { |
192 | 2.52k | auto limits = QPDFObjectHandle::newArray(); |
193 | 2.52k | limits.appendItem(first); |
194 | 2.52k | limits.appendItem(last); |
195 | 2.52k | auto olimits = node.getKey("/Limits"); |
196 | 2.52k | if (olimits.isArray() && (olimits.getArrayNItems() == 2)) { |
197 | 2.33k | auto ofirst = olimits.getArrayItem(0); |
198 | 2.33k | auto olast = olimits.getArrayItem(1); |
199 | 2.33k | if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) && |
200 | 2.33k | (impl.details.compareKeys(first, ofirst) == 0) && |
201 | 2.33k | (impl.details.compareKeys(last, olast) == 0)) { |
202 | 1.99k | QTC::TC("qpdf", "NNTree limits didn't change"); |
203 | 1.99k | changed = false; |
204 | 1.99k | } |
205 | 2.33k | } |
206 | 2.52k | if (changed) { |
207 | 533 | node.replaceKey("/Limits", limits); |
208 | 533 | } |
209 | 2.52k | } else { |
210 | 0 | QTC::TC("qpdf", "NNTree unable to determine limits"); |
211 | 0 | warn(impl.qpdf, node, "unable to determine limits"); |
212 | 0 | } |
213 | | |
214 | 2.52k | if ((!changed) || (parent == this->path.begin())) { |
215 | 2.52k | done = true; |
216 | 2.52k | } else { |
217 | 0 | node = parent->node; |
218 | 0 | --parent; |
219 | 0 | } |
220 | 2.52k | } |
221 | 7.35k | } |
222 | | |
223 | | void |
224 | | NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent) |
225 | 7.08k | { |
226 | | // Split some node along the path to the item pointed to by this iterator, and adjust the |
227 | | // iterator so it points to the same item. |
228 | | |
229 | | // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine |
230 | | // this tree: |
231 | | // |
232 | | // root: << /Kids [ A B C D ] >> |
233 | | // A: << /Nums [ 1 2 3 4 ] >> |
234 | | // B: << /Nums [ 5 6 7 8 ] >> |
235 | | // C: << /Nums [ 9 10 11 12 ] >> |
236 | | // D: << /Kids [ E F ] |
237 | | // E: << /Nums [ 13 14 15 16 ] >> |
238 | | // F: << /Nums [ 17 18 19 20 ] >> |
239 | | |
240 | | // iter1 (points to 19) |
241 | | // path: |
242 | | // - { node: root: kid_number: 3 } |
243 | | // - { node: D, kid_number: 1 } |
244 | | // node: F |
245 | | // item_number: 2 |
246 | | |
247 | | // iter2 (points to 1) |
248 | | // path: |
249 | | // - { node: root, kid_number: 0} |
250 | | // node: A |
251 | | // item_number: 0 |
252 | | |
253 | 7.08k | if (!valid()) { |
254 | 0 | throw std::logic_error("NNTreeIterator::split called an invalid iterator"); |
255 | 0 | } |
256 | | |
257 | | // Find the array we actually need to split, which is either this node's kids or items. |
258 | 7.08k | auto kids = to_split.getKey("/Kids"); |
259 | 7.08k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
260 | 7.08k | auto items = to_split.getKey(impl.details.itemsKey()); |
261 | 7.08k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
262 | | |
263 | 7.08k | QPDFObjectHandle first_half; |
264 | 7.08k | int n = 0; |
265 | 7.08k | std::string key; |
266 | 7.08k | int threshold = 0; |
267 | 7.08k | if (nkids > 0) { |
268 | 100 | QTC::TC("qpdf", "NNTree split kids"); |
269 | 100 | first_half = kids; |
270 | 100 | n = nkids; |
271 | 100 | threshold = impl.split_threshold; |
272 | 100 | key = "/Kids"; |
273 | 6.98k | } else if (nitems > 0) { |
274 | 6.98k | QTC::TC("qpdf", "NNTree split items"); |
275 | 6.98k | first_half = items; |
276 | 6.98k | n = nitems; |
277 | 6.98k | threshold = 2 * impl.split_threshold; |
278 | 6.98k | key = impl.details.itemsKey(); |
279 | 6.98k | } else { |
280 | 0 | throw std::logic_error("NNTreeIterator::split called on invalid node"); |
281 | 0 | } |
282 | | |
283 | 7.08k | if (n <= threshold) { |
284 | 6.95k | return; |
285 | 6.95k | } |
286 | | |
287 | 135 | bool is_root = (parent == this->path.end()); |
288 | 135 | bool is_leaf = (nitems > 0); |
289 | | |
290 | | // CURRENT STATE: tree is in original state; iterator is valid and unchanged. |
291 | | |
292 | 135 | if (is_root) { |
293 | | // What we want to do is to create a new node for the second half of the items and put it in |
294 | | // the parent's /Kids array right after the element that points to the current to_split |
295 | | // node, but if we're splitting root, there is no parent, so handle that first. |
296 | | |
297 | | // In the non-root case, parent points to the path element whose /Kids contains the first |
298 | | // half node, and the first half node is to_split. If we are splitting the root, we need to |
299 | | // push everything down a level, but we want to keep the actual root object the same so that |
300 | | // indirect references to it remain intact (and also in case it might be a direct object, |
301 | | // which it shouldn't be but that case probably exists in the wild). To achieve this, we |
302 | | // create a new node for the first half and then replace /Kids in the root to contain it. |
303 | | // Then we adjust the path so that the first element is root and the second element, if any, |
304 | | // is the new first half. In this way, we make the root case identical to the non-root case |
305 | | // so remaining logic can handle them in the same way. |
306 | | |
307 | 35 | auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
308 | 35 | first_node.replaceKey(key, first_half); |
309 | 35 | QPDFObjectHandle new_kids = QPDFObjectHandle::newArray(); |
310 | 35 | new_kids.appendItem(first_node); |
311 | 35 | to_split.removeKey("/Limits"); // already shouldn't be there for root |
312 | 35 | to_split.removeKey(impl.details.itemsKey()); |
313 | 35 | to_split.replaceKey("/Kids", new_kids); |
314 | 35 | if (is_leaf) { |
315 | 34 | QTC::TC("qpdf", "NNTree split root + leaf"); |
316 | 34 | this->node = first_node; |
317 | 34 | } else { |
318 | 1 | QTC::TC("qpdf", "NNTree split root + !leaf"); |
319 | 1 | auto next = this->path.begin(); |
320 | 1 | next->node = first_node; |
321 | 1 | } |
322 | 35 | this->path.emplace_front(to_split, 0); |
323 | 35 | parent = this->path.begin(); |
324 | 35 | to_split = first_node; |
325 | 35 | } |
326 | | |
327 | | // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that |
328 | | // parent[/Kids][kid_number] == to_split and (++parent).node == to_split. |
329 | | |
330 | | // Create a second half array, and transfer the second half of the items into the second half |
331 | | // array. |
332 | 135 | QPDFObjectHandle second_half = QPDFObjectHandle::newArray(); |
333 | 135 | int start_idx = ((n / 2) & ~1); |
334 | 4.69k | while (first_half.getArrayNItems() > start_idx) { |
335 | 4.55k | second_half.appendItem(first_half.getArrayItem(start_idx)); |
336 | 4.55k | first_half.eraseItem(start_idx); |
337 | 4.55k | } |
338 | 135 | resetLimits(to_split, parent); |
339 | | |
340 | | // Create a new node to contain the second half |
341 | 135 | QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
342 | 135 | second_node.replaceKey(key, second_half); |
343 | 135 | resetLimits(second_node, parent); |
344 | | |
345 | | // CURRENT STATE: half the items from the kids or items array in the node being split have been |
346 | | // moved into a new node. The new node is not yet attached to the tree. The iterator may have a |
347 | | // path element or leaf node that is out of bounds. |
348 | | |
349 | | // We need to adjust the parent to add the second node to /Kids and, if needed, update |
350 | | // kid_number to traverse through it. We need to update to_split's path element, or the node if |
351 | | // this is a leaf, so that the kid/item number points to the right place. |
352 | | |
353 | 135 | auto parent_kids = parent->node.getKey("/Kids"); |
354 | 135 | parent_kids.insertItem(parent->kid_number + 1, second_node); |
355 | 135 | auto cur_elem = parent; |
356 | 135 | ++cur_elem; // points to end() for leaf nodes |
357 | 135 | int old_idx = (is_leaf ? this->item_number : cur_elem->kid_number); |
358 | 135 | if (old_idx >= start_idx) { |
359 | 87 | ++parent->kid_number; |
360 | 87 | if (is_leaf) { |
361 | 87 | QTC::TC("qpdf", "NNTree split second half item"); |
362 | 87 | setItemNumber(second_node, this->item_number - start_idx); |
363 | 87 | } else { |
364 | 0 | QTC::TC("qpdf", "NNTree split second half kid"); |
365 | 0 | cur_elem->node = second_node; |
366 | 0 | cur_elem->kid_number -= start_idx; |
367 | 0 | } |
368 | 87 | } |
369 | 135 | if (!is_root) { |
370 | 100 | QTC::TC("qpdf", "NNTree split parent"); |
371 | 100 | auto next = parent->node; |
372 | 100 | resetLimits(next, parent); |
373 | 100 | --parent; |
374 | 100 | split(next, parent); |
375 | 100 | } |
376 | 135 | } |
377 | | |
378 | | std::list<NNTreeIterator::PathElement>::iterator |
379 | | NNTreeIterator::lastPathElement() |
380 | 13.9k | { |
381 | 13.9k | auto result = this->path.end(); |
382 | 13.9k | if (!this->path.empty()) { |
383 | 4.31k | --result; |
384 | 4.31k | } |
385 | 13.9k | return result; |
386 | 13.9k | } |
387 | | |
388 | | void |
389 | | NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value) |
390 | 6.11k | { |
391 | 6.11k | if (!valid()) { |
392 | 0 | QTC::TC("qpdf", "NNTree insertAfter inserts first"); |
393 | 0 | impl.insertFirst(key, value); |
394 | 0 | deepen(impl.oh, true, false); |
395 | 0 | return; |
396 | 0 | } |
397 | | |
398 | 6.11k | auto items = this->node.getKey(impl.details.itemsKey()); |
399 | 6.11k | if (!items.isArray()) { |
400 | 0 | error(impl.qpdf, node, "node contains no items array"); |
401 | 0 | } |
402 | 6.11k | if (items.getArrayNItems() < this->item_number + 2) { |
403 | 0 | error(impl.qpdf, node, "insert: items array is too short"); |
404 | 0 | } |
405 | 6.11k | items.insertItem(this->item_number + 2, key); |
406 | 6.11k | items.insertItem(this->item_number + 3, value); |
407 | 6.11k | resetLimits(this->node, lastPathElement()); |
408 | 6.11k | split(this->node, lastPathElement()); |
409 | 6.11k | increment(false); |
410 | 6.11k | } |
411 | | |
412 | | void |
413 | | NNTreeIterator::remove() |
414 | 0 | { |
415 | | // Remove this item, leaving the tree valid and this iterator pointing to the next item. |
416 | |
|
417 | 0 | if (!valid()) { |
418 | 0 | throw std::logic_error("attempt made to remove an invalid iterator"); |
419 | 0 | } |
420 | 0 | auto items = this->node.getKey(impl.details.itemsKey()); |
421 | 0 | int nitems = items.getArrayNItems(); |
422 | 0 | if (this->item_number + 2 > nitems) { |
423 | 0 | error(impl.qpdf, this->node, "found short items array while removing an item"); |
424 | 0 | } |
425 | |
|
426 | 0 | items.eraseItem(this->item_number); |
427 | 0 | items.eraseItem(this->item_number); |
428 | 0 | nitems -= 2; |
429 | |
|
430 | 0 | if (nitems > 0) { |
431 | | // There are still items left |
432 | |
|
433 | 0 | if ((this->item_number == 0) || (this->item_number == nitems)) { |
434 | | // We removed either the first or last item of an items array that remains non-empty, so |
435 | | // we have to adjust limits. |
436 | 0 | QTC::TC("qpdf", "NNTree remove reset limits"); |
437 | 0 | resetLimits(this->node, lastPathElement()); |
438 | 0 | } |
439 | |
|
440 | 0 | if (this->item_number == nitems) { |
441 | | // We removed the last item of a non-empty items array, so advance to the successor of |
442 | | // the previous item. |
443 | 0 | QTC::TC("qpdf", "NNTree erased last item"); |
444 | 0 | this->item_number -= 2; |
445 | 0 | increment(false); |
446 | 0 | } else if (this->item_number < nitems) { |
447 | | // We don't have to do anything since the removed item's successor now occupies its |
448 | | // former location. |
449 | 0 | QTC::TC("qpdf", "NNTree erased non-last item"); |
450 | 0 | updateIValue(); |
451 | 0 | } else { |
452 | | // We already checked to ensure this condition would not happen. |
453 | 0 | throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase"); |
454 | 0 | } |
455 | 0 | return; |
456 | 0 | } |
457 | | |
458 | 0 | if (this->path.empty()) { |
459 | | // Special case: if this is the root node, we can leave it empty. |
460 | 0 | QTC::TC("qpdf", "NNTree erased all items on leaf/root"); |
461 | 0 | setItemNumber(impl.oh, -1); |
462 | 0 | return; |
463 | 0 | } |
464 | | |
465 | 0 | QTC::TC("qpdf", "NNTree items is empty after remove"); |
466 | | |
467 | | // We removed the last item from this items array, so we need to remove this node from the |
468 | | // parent on up the tree. Then we need to position ourselves at the removed item's successor. |
469 | 0 | bool done = false; |
470 | 0 | while (!done) { |
471 | 0 | auto element = lastPathElement(); |
472 | 0 | auto parent = element; |
473 | 0 | --parent; |
474 | 0 | auto kids = element->node.getKey("/Kids"); |
475 | 0 | kids.eraseItem(element->kid_number); |
476 | 0 | auto nkids = kids.getArrayNItems(); |
477 | 0 | if (nkids > 0) { |
478 | | // The logic here is similar to the items case. |
479 | 0 | if ((element->kid_number == 0) || (element->kid_number == nkids)) { |
480 | 0 | QTC::TC("qpdf", "NNTree erased first or last kid"); |
481 | 0 | resetLimits(element->node, parent); |
482 | 0 | } |
483 | 0 | if (element->kid_number == nkids) { |
484 | | // Move to the successor of the last child of the previous kid. |
485 | 0 | setItemNumber(QPDFObjectHandle(), -1); |
486 | 0 | --element->kid_number; |
487 | 0 | deepen(kids.getArrayItem(element->kid_number), false, true); |
488 | 0 | if (valid()) { |
489 | 0 | increment(false); |
490 | 0 | if (!valid()) { |
491 | 0 | QTC::TC("qpdf", "NNTree erased last item in tree"); |
492 | 0 | } else { |
493 | 0 | QTC::TC("qpdf", "NNTree erased last kid"); |
494 | 0 | } |
495 | 0 | } |
496 | 0 | } else { |
497 | | // Next kid is in deleted kid's position |
498 | 0 | QTC::TC("qpdf", "NNTree erased non-last kid"); |
499 | 0 | deepen(kids.getArrayItem(element->kid_number), true, true); |
500 | 0 | } |
501 | 0 | done = true; |
502 | 0 | } else if (parent == this->path.end()) { |
503 | | // We erased the very last item. Convert the root to an empty items array. |
504 | 0 | QTC::TC("qpdf", "NNTree non-flat tree is empty after remove"); |
505 | 0 | element->node.removeKey("/Kids"); |
506 | 0 | element->node.replaceKey(impl.details.itemsKey(), QPDFObjectHandle::newArray()); |
507 | 0 | this->path.clear(); |
508 | 0 | setItemNumber(impl.oh, -1); |
509 | 0 | done = true; |
510 | 0 | } else { |
511 | | // Walk up the tree and continue |
512 | 0 | QTC::TC("qpdf", "NNTree remove walking up tree"); |
513 | 0 | this->path.pop_back(); |
514 | 0 | } |
515 | 0 | } |
516 | 0 | } |
517 | | |
518 | | NNTreeIterator& |
519 | | NNTreeIterator::operator++() |
520 | 8.28k | { |
521 | 8.28k | increment(false); |
522 | 8.28k | return *this; |
523 | 8.28k | } |
524 | | |
525 | | NNTreeIterator& |
526 | | NNTreeIterator::operator--() |
527 | 0 | { |
528 | 0 | increment(true); |
529 | 0 | return *this; |
530 | 0 | } |
531 | | |
532 | | NNTreeIterator::reference |
533 | | NNTreeIterator::operator*() |
534 | 8.49k | { |
535 | 8.49k | updateIValue(false); |
536 | 8.49k | return this->ivalue; |
537 | 8.49k | } |
538 | | |
539 | | NNTreeIterator::pointer |
540 | | NNTreeIterator::operator->() |
541 | 62.4k | { |
542 | 62.4k | updateIValue(false); |
543 | 62.4k | return &(this->ivalue); |
544 | 62.4k | } |
545 | | |
546 | | bool |
547 | | NNTreeIterator::operator==(NNTreeIterator const& other) const |
548 | 31.6k | { |
549 | 31.6k | if ((this->item_number == -1) && (other.item_number == -1)) { |
550 | 4.63k | return true; |
551 | 4.63k | } |
552 | 27.0k | if (this->path.size() != other.path.size()) { |
553 | 11.5k | return false; |
554 | 11.5k | } |
555 | 15.4k | auto tpi = this->path.begin(); |
556 | 15.4k | auto opi = other.path.begin(); |
557 | 15.4k | while (tpi != this->path.end()) { |
558 | 0 | if (tpi->kid_number != opi->kid_number) { |
559 | 0 | return false; |
560 | 0 | } |
561 | 0 | ++tpi; |
562 | 0 | ++opi; |
563 | 0 | } |
564 | 15.4k | if (this->item_number != other.item_number) { |
565 | 15.4k | return false; |
566 | 15.4k | } |
567 | 0 | return true; |
568 | 15.4k | } |
569 | | |
570 | | void |
571 | | NNTreeIterator::setItemNumber(QPDFObjectHandle const& node, int n) |
572 | 31.1k | { |
573 | 31.1k | this->node = node; |
574 | 31.1k | this->item_number = n; |
575 | 31.1k | updateIValue(); |
576 | 31.1k | } |
577 | | |
578 | | void |
579 | | NNTreeIterator::addPathElement(QPDFObjectHandle const& node, int kid_number) |
580 | 12.3k | { |
581 | 12.3k | this->path.emplace_back(node, kid_number); |
582 | 12.3k | } |
583 | | |
584 | | bool |
585 | | NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty) |
586 | 19.6k | { |
587 | | // Starting at this node, descend through the first or last kid until we reach a node with |
588 | | // items. If we succeed, return true; otherwise return false and leave path alone. |
589 | | |
590 | 19.6k | auto opath = this->path; |
591 | 19.6k | bool failed = false; |
592 | | |
593 | 19.6k | QPDFObjGen::set seen; |
594 | 19.6k | for (auto const& i: this->path) { |
595 | 1.14k | seen.add(i.node); |
596 | 1.14k | } |
597 | 28.8k | while (!failed) { |
598 | 28.7k | if (!seen.add(node)) { |
599 | 100 | QTC::TC("qpdf", "NNTree deepen: loop"); |
600 | 100 | warn(impl.qpdf, node, "loop detected while traversing name/number tree"); |
601 | 100 | failed = true; |
602 | 100 | break; |
603 | 100 | } |
604 | | |
605 | 28.6k | if (!node.isDictionary()) { |
606 | 1.36k | QTC::TC("qpdf", "NNTree node is not a dictionary"); |
607 | 1.36k | warn(impl.qpdf, node, "non-dictionary node while traversing name/number tree"); |
608 | 1.36k | failed = true; |
609 | 1.36k | break; |
610 | 1.36k | } |
611 | | |
612 | 27.2k | auto kids = node.getKey("/Kids"); |
613 | 27.2k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
614 | 27.2k | auto items = node.getKey(impl.details.itemsKey()); |
615 | 27.2k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
616 | 27.2k | if (nitems > 0) { |
617 | 15.4k | setItemNumber(node, first ? 0 : nitems - 2); |
618 | 15.4k | break; |
619 | 15.4k | } else if (nkids > 0) { |
620 | 9.17k | int kid_number = first ? 0 : nkids - 1; |
621 | 9.17k | addPathElement(node, kid_number); |
622 | 9.17k | auto next = kids.getArrayItem(kid_number); |
623 | 9.17k | if (!next.isIndirect()) { |
624 | 180 | if (impl.auto_repair) { |
625 | 180 | QTC::TC("qpdf", "NNTree fix indirect kid"); |
626 | 180 | warn( |
627 | 180 | impl.qpdf, |
628 | 180 | node, |
629 | 180 | ("converting kid number " + std::to_string(kid_number) + |
630 | 180 | " to an indirect object")); |
631 | 180 | next = impl.qpdf.makeIndirectObject(next); |
632 | 180 | kids.setArrayItem(kid_number, next); |
633 | 180 | } else { |
634 | 0 | QTC::TC("qpdf", "NNTree warn indirect kid"); |
635 | 0 | warn( |
636 | 0 | impl.qpdf, |
637 | 0 | node, |
638 | 0 | ("kid number " + std::to_string(kid_number) + |
639 | 0 | " is not an indirect object")); |
640 | 0 | } |
641 | 180 | } |
642 | 9.17k | node = next; |
643 | 9.17k | } else if (allow_empty && items.isArray()) { |
644 | 1.71k | QTC::TC("qpdf", "NNTree deepen found empty"); |
645 | 1.71k | setItemNumber(node, -1); |
646 | 1.71k | break; |
647 | 1.71k | } else { |
648 | 964 | QTC::TC("qpdf", "NNTree deepen: invalid node"); |
649 | 964 | warn( |
650 | 964 | impl.qpdf, |
651 | 964 | node, |
652 | 964 | ("name/number tree node has neither non-empty " + impl.details.itemsKey() + |
653 | 964 | " nor /Kids")); |
654 | 964 | failed = true; |
655 | 964 | break; |
656 | 964 | } |
657 | 27.2k | } |
658 | 19.6k | if (failed) { |
659 | 1.85k | this->path = opath; |
660 | 1.85k | return false; |
661 | 1.85k | } |
662 | 17.8k | return true; |
663 | 19.6k | } |
664 | | |
665 | | NNTreeImpl::NNTreeImpl( |
666 | | NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) : |
667 | | details(details), |
668 | | qpdf(qpdf), |
669 | | split_threshold(32), |
670 | | oh(oh), |
671 | | auto_repair(auto_repair) |
672 | 1.66k | { |
673 | 1.66k | } |
674 | | |
675 | | void |
676 | | NNTreeImpl::setSplitThreshold(int split_threshold) |
677 | 0 | { |
678 | 0 | this->split_threshold = split_threshold; |
679 | 0 | } |
680 | | |
681 | | NNTreeImpl::iterator |
682 | | NNTreeImpl::begin() |
683 | 18.8k | { |
684 | 18.8k | iterator result(*this); |
685 | 18.8k | result.deepen(this->oh, true, true); |
686 | 18.8k | return result; |
687 | 18.8k | } |
688 | | |
689 | | NNTreeImpl::iterator |
690 | | NNTreeImpl::end() |
691 | 42.7k | { |
692 | 42.7k | return {*this}; |
693 | 42.7k | } |
694 | | |
695 | | NNTreeImpl::iterator |
696 | | NNTreeImpl::last() |
697 | 0 | { |
698 | 0 | iterator result(*this); |
699 | 0 | result.deepen(this->oh, false, true); |
700 | 0 | return result; |
701 | 0 | } |
702 | | |
703 | | int |
704 | | NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) |
705 | 6.66k | { |
706 | 6.66k | int result = 0; |
707 | 6.66k | auto limits = node.getKey("/Limits"); |
708 | 6.66k | if (limits.isArray() && (limits.getArrayNItems() >= 2) && |
709 | 6.66k | details.keyValid(limits.getArrayItem(0)) && details.keyValid(limits.getArrayItem(1))) { |
710 | 6.39k | if (details.compareKeys(key, limits.getArrayItem(0)) < 0) { |
711 | 2.55k | result = -1; |
712 | 3.84k | } else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) { |
713 | 891 | result = 1; |
714 | 891 | } |
715 | 6.39k | } else { |
716 | 268 | QTC::TC("qpdf", "NNTree missing limits"); |
717 | 268 | error(qpdf, node, "node is missing /Limits"); |
718 | 268 | } |
719 | 6.66k | return result; |
720 | 6.66k | } |
721 | | |
722 | | int |
723 | | NNTreeImpl::binarySearch( |
724 | | QPDFObjectHandle key, |
725 | | QPDFObjectHandle items, |
726 | | int num_items, |
727 | | bool return_prev_if_not_found, |
728 | | int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item)) |
729 | 16.9k | { |
730 | 16.9k | int max_idx = 1; |
731 | 65.0k | while (max_idx < num_items) { |
732 | 48.1k | max_idx <<= 1; |
733 | 48.1k | } |
734 | | |
735 | 16.9k | int step = max_idx / 2; |
736 | 16.9k | int checks = max_idx; |
737 | 16.9k | int idx = step; |
738 | 16.9k | int found_idx = -1; |
739 | 16.9k | bool found = false; |
740 | 16.9k | bool found_leq = false; |
741 | 16.9k | int status = 0; |
742 | | |
743 | 72.7k | while ((!found) && (checks > 0)) { |
744 | 55.8k | if (idx < num_items) { |
745 | 48.8k | status = (this->*compare)(key, items, idx); |
746 | 48.8k | if (status >= 0) { |
747 | 26.8k | found_leq = true; |
748 | 26.8k | found_idx = idx; |
749 | 26.8k | } |
750 | 48.8k | } else { |
751 | | // consider item to be below anything after the top |
752 | 6.98k | status = -1; |
753 | 6.98k | } |
754 | | |
755 | 55.8k | if (status == 0) { |
756 | 5.50k | found = true; |
757 | 50.2k | } else { |
758 | 50.2k | checks >>= 1; |
759 | 50.2k | if (checks > 0) { |
760 | 38.8k | step >>= 1; |
761 | 38.8k | if (step == 0) { |
762 | 10.8k | step = 1; |
763 | 10.8k | } |
764 | | |
765 | 38.8k | if (status < 0) { |
766 | 22.2k | idx -= step; |
767 | 22.2k | } else { |
768 | 16.6k | idx += step; |
769 | 16.6k | } |
770 | 38.8k | } |
771 | 50.2k | } |
772 | 55.8k | } |
773 | | |
774 | 16.9k | if (found || (found_leq && return_prev_if_not_found)) { |
775 | 15.3k | return found_idx; |
776 | 15.3k | } else { |
777 | 1.56k | return -1; |
778 | 1.56k | } |
779 | 16.9k | } |
780 | | |
781 | | int |
782 | | NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) |
783 | 41.7k | { |
784 | 41.7k | if (!((items.isArray() && (items.getArrayNItems() > (2 * idx)) && |
785 | 41.7k | details.keyValid(items.getArrayItem(2 * idx))))) { |
786 | 718 | QTC::TC("qpdf", "NNTree item is wrong type"); |
787 | 718 | error( |
788 | 718 | qpdf, |
789 | 718 | this->oh, |
790 | 718 | ("item at index " + std::to_string(2 * idx) + " is not the right type")); |
791 | 718 | } |
792 | 41.7k | return details.compareKeys(key, items.getArrayItem(2 * idx)); |
793 | 41.7k | } |
794 | | |
795 | | int |
796 | | NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) |
797 | 7.05k | { |
798 | 7.05k | if (!(kids.isArray() && (idx < kids.getArrayNItems()) && |
799 | 7.05k | kids.getArrayItem(idx).isDictionary())) { |
800 | 248 | QTC::TC("qpdf", "NNTree kid is invalid"); |
801 | 248 | error(qpdf, this->oh, "invalid kid at index " + std::to_string(idx)); |
802 | 248 | } |
803 | 7.05k | return withinLimits(key, kids.getArrayItem(idx)); |
804 | 7.05k | } |
805 | | |
806 | | void |
807 | | NNTreeImpl::repair() |
808 | 900 | { |
809 | 900 | auto new_node = QPDFObjectHandle::newDictionary(); |
810 | 900 | new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray()); |
811 | 900 | NNTreeImpl repl(details, qpdf, new_node, false); |
812 | 8.49k | for (auto const& i: *this) { |
813 | 8.49k | repl.insert(i.first, i.second); |
814 | 8.49k | } |
815 | 900 | this->oh.replaceKey("/Kids", new_node.getKey("/Kids")); |
816 | 900 | this->oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey())); |
817 | 900 | } |
818 | | |
819 | | NNTreeImpl::iterator |
820 | | NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) |
821 | 16.8k | { |
822 | 16.8k | try { |
823 | 16.8k | return findInternal(key, return_prev_if_not_found); |
824 | 16.8k | } catch (QPDFExc& e) { |
825 | 2.17k | if (this->auto_repair) { |
826 | 1.96k | QTC::TC("qpdf", "NNTree repair"); |
827 | 1.96k | warn(qpdf, this->oh, std::string("attempting to repair after error: ") + e.what()); |
828 | 1.96k | repair(); |
829 | 1.96k | return findInternal(key, return_prev_if_not_found); |
830 | 1.96k | } else { |
831 | 216 | throw; |
832 | 216 | } |
833 | 2.17k | } |
834 | 16.8k | } |
835 | | |
836 | | NNTreeImpl::iterator |
837 | | NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) |
838 | 17.0k | { |
839 | 17.0k | auto first_item = begin(); |
840 | 17.0k | auto last_item = end(); |
841 | 17.0k | if (first_item == end()) { |
842 | | // Empty |
843 | 2.56k | return end(); |
844 | 14.5k | } else if ( |
845 | 14.5k | first_item.valid() && details.keyValid(first_item->first) && |
846 | 14.5k | details.compareKeys(key, first_item->first) < 0) { |
847 | | // Before the first key |
848 | 88 | return end(); |
849 | 14.4k | } else if ( |
850 | 14.4k | last_item.valid() && details.keyValid(last_item->first) && |
851 | 14.4k | details.compareKeys(key, last_item->first) > 0) { |
852 | | // After the last key |
853 | 0 | if (return_prev_if_not_found) { |
854 | 0 | return last_item; |
855 | 0 | } else { |
856 | 0 | return end(); |
857 | 0 | } |
858 | 0 | } |
859 | | |
860 | 14.4k | QPDFObjGen::set seen; |
861 | 14.4k | auto node = this->oh; |
862 | 14.4k | iterator result(*this); |
863 | | |
864 | 18.4k | while (true) { |
865 | 16.9k | if (!seen.add(node)) { |
866 | 0 | QTC::TC("qpdf", "NNTree loop in find"); |
867 | 0 | error(qpdf, node, "loop detected in find"); |
868 | 0 | } |
869 | | |
870 | 16.9k | auto kids = node.getKey("/Kids"); |
871 | 16.9k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
872 | 16.9k | auto items = node.getKey(details.itemsKey()); |
873 | 16.9k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
874 | 16.9k | if (nitems > 0) { |
875 | 12.8k | int idx = binarySearch( |
876 | 12.8k | key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem); |
877 | 12.8k | if (idx >= 0) { |
878 | 12.1k | result.setItemNumber(node, 2 * idx); |
879 | 12.1k | } |
880 | 12.8k | break; |
881 | 12.8k | } else if (nkids > 0) { |
882 | 4.03k | int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid); |
883 | 4.03k | if (idx == -1) { |
884 | 157 | QTC::TC("qpdf", "NNTree -1 in binary search"); |
885 | 157 | error( |
886 | 157 | qpdf, |
887 | 157 | node, |
888 | 157 | "unexpected -1 from binary search of kids;" |
889 | 157 | " limits may by wrong"); |
890 | 157 | } |
891 | 4.03k | result.addPathElement(node, idx); |
892 | 4.03k | node = kids.getArrayItem(idx); |
893 | 4.03k | } else { |
894 | 23 | QTC::TC("qpdf", "NNTree bad node during find"); |
895 | 23 | error(qpdf, node, "bad node during find"); |
896 | 23 | } |
897 | 16.9k | } |
898 | | |
899 | 14.4k | return result; |
900 | 17.0k | } |
901 | | |
902 | | NNTreeImpl::iterator |
903 | | NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value) |
904 | 869 | { |
905 | 869 | auto iter = begin(); |
906 | 869 | QPDFObjectHandle items; |
907 | 869 | if (iter.node.isDictionary()) { |
908 | 869 | items = iter.node.getKey(details.itemsKey()); |
909 | 869 | } |
910 | 869 | if (!(items.isArray())) { |
911 | 0 | QTC::TC("qpdf", "NNTree no valid items node in insertFirst"); |
912 | 0 | error(qpdf, this->oh, "unable to find a valid items node"); |
913 | 0 | } |
914 | 869 | items.insertItem(0, key); |
915 | 869 | items.insertItem(1, value); |
916 | 869 | iter.setItemNumber(iter.node, 0); |
917 | 869 | iter.resetLimits(iter.node, iter.lastPathElement()); |
918 | 869 | iter.split(iter.node, iter.lastPathElement()); |
919 | 869 | return iter; |
920 | 869 | } |
921 | | |
922 | | NNTreeImpl::iterator |
923 | | NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value) |
924 | 8.49k | { |
925 | 8.49k | auto iter = find(key, true); |
926 | 8.49k | if (!iter.valid()) { |
927 | 869 | QTC::TC("qpdf", "NNTree insert inserts first"); |
928 | 869 | return insertFirst(key, value); |
929 | 7.63k | } else if (details.compareKeys(key, iter->first) == 0) { |
930 | 1.29k | QTC::TC("qpdf", "NNTree insert replaces"); |
931 | 1.29k | auto items = iter.node.getKey(details.itemsKey()); |
932 | 1.29k | items.setArrayItem(iter.item_number + 1, value); |
933 | 1.29k | iter.updateIValue(); |
934 | 6.33k | } else { |
935 | 6.33k | QTC::TC("qpdf", "NNTree insert inserts after"); |
936 | 6.33k | iter.insertAfter(key, value); |
937 | 6.33k | } |
938 | 7.63k | return iter; |
939 | 8.49k | } |
940 | | |
941 | | bool |
942 | | NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value) |
943 | 0 | { |
944 | 0 | auto iter = find(key, false); |
945 | 0 | if (!iter.valid()) { |
946 | 0 | QTC::TC("qpdf", "NNTree remove not found"); |
947 | 0 | return false; |
948 | 0 | } |
949 | 0 | if (value) { |
950 | 0 | *value = iter->second; |
951 | 0 | } |
952 | 0 | iter.remove(); |
953 | 0 | return true; |
954 | 0 | } |