/src/qpdf/libqpdf/NNTree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/NNTree.hh> |
2 | | |
3 | | #include <qpdf/QTC.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | #include <exception> |
7 | | |
8 | | static std::string |
9 | | get_description(QPDFObjectHandle& node) |
10 | 18.4k | { |
11 | 18.4k | std::string result("Name/Number tree node"); |
12 | 18.4k | if (node.isIndirect()) { |
13 | 17.4k | result += " (object " + std::to_string(node.getObjectID()) + ")"; |
14 | 17.4k | } |
15 | 18.4k | return result; |
16 | 18.4k | } |
17 | | |
18 | | static void |
19 | | warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
20 | 17.8k | { |
21 | 17.8k | qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); |
22 | 17.8k | } |
23 | | |
24 | | static void |
25 | | error(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
26 | 527 | { |
27 | 527 | throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg); |
28 | 527 | } |
29 | | |
30 | | NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : |
31 | 61.6k | impl(impl) |
32 | 61.6k | { |
33 | 61.6k | } |
34 | | |
35 | | void |
36 | | NNTreeIterator::updateIValue(bool allow_invalid) |
37 | 86.3k | { |
38 | | // ivalue should never be used inside the class since we return a pointer/reference to it. Every |
39 | | // bit of code that ever changes what object the iterator points to should take care to call |
40 | | // updateIValue. Failure to do this means that any old references to *iter will point to |
41 | | // incorrect objects, though the next dereference of the iterator will fix it. This isn't |
42 | | // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise |
43 | | // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to |
44 | | // verify by reasoning about the code that this constraint is always satisfied. Whenever we |
45 | | // update what the iterator points to, we should call setItemNumber, which calls this. If we |
46 | | // change what the iterator points to in some other way, such as replacing a value or removing |
47 | | // an item and making the iterator point at a different item in potentially the same position, |
48 | | // we must call updateIValue as well. These cases are handled, and for good measure, we also |
49 | | // call updateIValue in operator* and operator->. |
50 | | |
51 | 86.3k | bool okay = false; |
52 | 86.3k | if (item_number >= 0 && node.isDictionary()) { |
53 | 83.7k | auto items = node.getKey(impl.details.itemsKey()); |
54 | 83.7k | if (item_number + 1 < items.getArrayNItems()) { |
55 | 83.6k | okay = true; |
56 | 83.6k | ivalue.first = items.getArrayItem(item_number); |
57 | 83.6k | ivalue.second = items.getArrayItem(1 + item_number); |
58 | 83.6k | } else { |
59 | 12 | error(impl.qpdf, node, "update ivalue: items array is too short"); |
60 | 12 | } |
61 | 83.7k | } |
62 | 86.3k | if (!okay) { |
63 | 2.62k | if (!allow_invalid) { |
64 | 0 | throw std::logic_error( |
65 | 0 | "attempt made to dereference an invalid name/number tree iterator"); |
66 | 0 | } |
67 | 2.62k | ivalue.first = QPDFObjectHandle(); |
68 | 2.62k | ivalue.second = QPDFObjectHandle(); |
69 | 2.62k | } |
70 | 86.3k | } |
71 | | |
72 | | NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) : |
73 | 6.12k | node(node), |
74 | 6.12k | kid_number(kid_number) |
75 | 6.12k | { |
76 | 6.12k | } |
77 | | |
78 | | QPDFObjectHandle |
79 | | NNTreeIterator::getNextKid(PathElement& pe, bool backward) |
80 | 3.17k | { |
81 | 3.17k | QPDFObjectHandle result; |
82 | 3.17k | bool found = false; |
83 | 11.8k | while (!found) { |
84 | 8.66k | pe.kid_number += backward ? -1 : 1; |
85 | 8.66k | auto kids = pe.node.getKey("/Kids"); |
86 | 8.66k | if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) { |
87 | 7.74k | result = kids.getArrayItem(pe.kid_number); |
88 | 7.74k | if (result.isDictionary() && |
89 | 7.74k | (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) { |
90 | 2.22k | found = true; |
91 | 5.51k | } else { |
92 | 5.51k | QTC::TC("qpdf", "NNTree skip invalid kid"); |
93 | 5.51k | warn( |
94 | 5.51k | impl.qpdf, |
95 | 5.51k | pe.node, |
96 | 5.51k | ("skipping over invalid kid at index " + std::to_string(pe.kid_number))); |
97 | 5.51k | } |
98 | 7.74k | } else { |
99 | 922 | result = QPDFObjectHandle::newNull(); |
100 | 922 | found = true; |
101 | 922 | } |
102 | 8.66k | } |
103 | 3.17k | return result; |
104 | 3.17k | } |
105 | | |
106 | | bool |
107 | | NNTreeIterator::valid() const |
108 | 89.4k | { |
109 | 89.4k | return item_number >= 0; |
110 | 89.4k | } |
111 | | |
112 | | void |
113 | | NNTreeIterator::increment(bool backward) |
114 | 16.2k | { |
115 | 16.2k | if (item_number < 0) { |
116 | 0 | QTC::TC("qpdf", "NNTree increment end()"); |
117 | 0 | deepen(impl.oh, !backward, true); |
118 | 0 | return; |
119 | 0 | } |
120 | 16.2k | bool found_valid_key = false; |
121 | 42.2k | while (valid() && !found_valid_key) { |
122 | 26.0k | item_number += backward ? -2 : 2; |
123 | 26.0k | auto items = node.getKey(impl.details.itemsKey()); |
124 | 26.0k | if (item_number < 0 || item_number >= items.getArrayNItems()) { |
125 | 1.62k | bool found = false; |
126 | 1.62k | setItemNumber(QPDFObjectHandle(), -1); |
127 | 4.79k | while (!(found || path.empty())) { |
128 | 3.17k | auto& element = path.back(); |
129 | 3.17k | auto pe_node = getNextKid(element, backward); |
130 | 3.17k | if (pe_node.isNull()) { |
131 | 922 | path.pop_back(); |
132 | 2.24k | } else { |
133 | 2.24k | found = deepen(pe_node, !backward, false); |
134 | 2.24k | } |
135 | 3.17k | } |
136 | 1.62k | } |
137 | 26.0k | if (item_number >= 0) { |
138 | 25.6k | items = node.getKey(impl.details.itemsKey()); |
139 | 25.6k | if (item_number + 1 >= items.getArrayNItems()) { |
140 | 794 | QTC::TC("qpdf", "NNTree skip item at end of short items"); |
141 | 794 | warn(impl.qpdf, node, "items array doesn't have enough elements"); |
142 | 24.8k | } else if (!impl.details.keyValid(items.getArrayItem(item_number))) { |
143 | 9.03k | QTC::TC("qpdf", "NNTree skip invalid key"); |
144 | 9.03k | warn( |
145 | 9.03k | impl.qpdf, |
146 | 9.03k | node, |
147 | 9.03k | ("item " + std::to_string(item_number) + " has the wrong type")); |
148 | 15.7k | } else { |
149 | 15.7k | found_valid_key = true; |
150 | 15.7k | } |
151 | 25.6k | } |
152 | 26.0k | } |
153 | 16.2k | } |
154 | | |
155 | | void |
156 | | NNTreeIterator::resetLimits(QPDFObjectHandle a_node, std::list<PathElement>::iterator parent) |
157 | 5.19k | { |
158 | 5.19k | bool done = false; |
159 | 6.00k | while (!done) { |
160 | 5.19k | if (parent == path.end()) { |
161 | 4.38k | QTC::TC("qpdf", "NNTree remove limits from root"); |
162 | 4.38k | a_node.removeKey("/Limits"); |
163 | 4.38k | done = true; |
164 | 4.38k | break; |
165 | 4.38k | } |
166 | 813 | auto kids = a_node.getKey("/Kids"); |
167 | 813 | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
168 | 813 | auto items = a_node.getKey(impl.details.itemsKey()); |
169 | 813 | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
170 | | |
171 | 813 | bool changed = true; |
172 | 813 | QPDFObjectHandle first; |
173 | 813 | QPDFObjectHandle last; |
174 | 813 | if (nitems >= 2) { |
175 | 790 | first = items.getArrayItem(0); |
176 | 790 | last = items.getArrayItem((nitems - 1) & ~1); |
177 | 790 | } else if (nkids > 0) { |
178 | 23 | auto first_kid = kids.getArrayItem(0); |
179 | 23 | auto last_kid = kids.getArrayItem(nkids - 1); |
180 | 23 | if (first_kid.isDictionary() && last_kid.isDictionary()) { |
181 | 23 | auto first_limits = first_kid.getKey("/Limits"); |
182 | 23 | auto last_limits = last_kid.getKey("/Limits"); |
183 | 23 | if (first_limits.isArray() && (first_limits.getArrayNItems() >= 2) && |
184 | 23 | last_limits.isArray() && (last_limits.getArrayNItems() >= 2)) { |
185 | 23 | first = first_limits.getArrayItem(0); |
186 | 23 | last = last_limits.getArrayItem(1); |
187 | 23 | } |
188 | 23 | } |
189 | 23 | } |
190 | 813 | if (first && last) { |
191 | 813 | auto limits = QPDFObjectHandle::newArray(); |
192 | 813 | limits.appendItem(first); |
193 | 813 | limits.appendItem(last); |
194 | 813 | auto olimits = a_node.getKey("/Limits"); |
195 | 813 | if (olimits.isArray() && (olimits.getArrayNItems() == 2)) { |
196 | 659 | auto ofirst = olimits.getArrayItem(0); |
197 | 659 | auto olast = olimits.getArrayItem(1); |
198 | 659 | if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) && |
199 | 659 | (impl.details.compareKeys(first, ofirst) == 0) && |
200 | 659 | (impl.details.compareKeys(last, olast) == 0)) { |
201 | 571 | QTC::TC("qpdf", "NNTree limits didn't change"); |
202 | 571 | changed = false; |
203 | 571 | } |
204 | 659 | } |
205 | 813 | if (changed && !a_node.isSameObjectAs(path.begin()->node)) { |
206 | 219 | a_node.replaceKey("/Limits", limits); |
207 | 219 | } |
208 | 813 | } else { |
209 | 0 | QTC::TC("qpdf", "NNTree unable to determine limits"); |
210 | 0 | warn(impl.qpdf, a_node, "unable to determine limits"); |
211 | 0 | } |
212 | | |
213 | 813 | if (!changed || parent == path.begin()) { |
214 | 813 | done = true; |
215 | 813 | } else { |
216 | 0 | a_node = parent->node; |
217 | 0 | --parent; |
218 | 0 | } |
219 | 813 | } |
220 | 5.19k | } |
221 | | |
222 | | void |
223 | | NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent) |
224 | 5.04k | { |
225 | | // Split some node along the path to the item pointed to by this iterator, and adjust the |
226 | | // iterator so it points to the same item. |
227 | | |
228 | | // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine |
229 | | // this tree: |
230 | | // |
231 | | // root: << /Kids [ A B C D ] >> |
232 | | // A: << /Nums [ 1 2 3 4 ] >> |
233 | | // B: << /Nums [ 5 6 7 8 ] >> |
234 | | // C: << /Nums [ 9 10 11 12 ] >> |
235 | | // D: << /Kids [ E F ] |
236 | | // E: << /Nums [ 13 14 15 16 ] >> |
237 | | // F: << /Nums [ 17 18 19 20 ] >> |
238 | | |
239 | | // iter1 (points to 19) |
240 | | // path: |
241 | | // - { node: root: kid_number: 3 } |
242 | | // - { node: D, kid_number: 1 } |
243 | | // node: F |
244 | | // item_number: 2 |
245 | | |
246 | | // iter2 (points to 1) |
247 | | // path: |
248 | | // - { node: root, kid_number: 0} |
249 | | // node: A |
250 | | // item_number: 0 |
251 | | |
252 | 5.04k | if (!valid()) { |
253 | 0 | throw std::logic_error("NNTreeIterator::split called an invalid iterator"); |
254 | 0 | } |
255 | | |
256 | | // Find the array we actually need to split, which is either this node's kids or items. |
257 | 5.04k | auto kids = to_split.getKey("/Kids"); |
258 | 5.04k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
259 | 5.04k | auto items = to_split.getKey(impl.details.itemsKey()); |
260 | 5.04k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
261 | | |
262 | 5.04k | QPDFObjectHandle first_half; |
263 | 5.04k | int n = 0; |
264 | 5.04k | std::string key; |
265 | 5.04k | int threshold = 0; |
266 | 5.04k | if (nkids > 0) { |
267 | 23 | QTC::TC("qpdf", "NNTree split kids"); |
268 | 23 | first_half = kids; |
269 | 23 | n = nkids; |
270 | 23 | threshold = impl.split_threshold; |
271 | 23 | key = "/Kids"; |
272 | 5.01k | } else if (nitems > 0) { |
273 | 5.01k | QTC::TC("qpdf", "NNTree split items"); |
274 | 5.01k | first_half = items; |
275 | 5.01k | n = nitems; |
276 | 5.01k | threshold = 2 * impl.split_threshold; |
277 | 5.01k | key = impl.details.itemsKey(); |
278 | 5.01k | } else { |
279 | 0 | throw std::logic_error("NNTreeIterator::split called on invalid node"); |
280 | 0 | } |
281 | | |
282 | 5.04k | if (n <= threshold) { |
283 | 4.96k | return; |
284 | 4.96k | } |
285 | | |
286 | 79 | bool is_root = (parent == path.end()); |
287 | 79 | bool is_leaf = (nitems > 0); |
288 | | |
289 | | // CURRENT STATE: tree is in original state; iterator is valid and unchanged. |
290 | | |
291 | 79 | if (is_root) { |
292 | | // What we want to do is to create a new node for the second half of the items and put it in |
293 | | // the parent's /Kids array right after the element that points to the current to_split |
294 | | // node, but if we're splitting root, there is no parent, so handle that first. |
295 | | |
296 | | // In the non-root case, parent points to the path element whose /Kids contains the first |
297 | | // half node, and the first half node is to_split. If we are splitting the root, we need to |
298 | | // push everything down a level, but we want to keep the actual root object the same so that |
299 | | // indirect references to it remain intact (and also in case it might be a direct object, |
300 | | // which it shouldn't be but that case probably exists in the wild). To achieve this, we |
301 | | // create a new node for the first half and then replace /Kids in the root to contain it. |
302 | | // Then we adjust the path so that the first element is root and the second element, if any, |
303 | | // is the new first half. In this way, we make the root case identical to the non-root case |
304 | | // so remaining logic can handle them in the same way. |
305 | | |
306 | 56 | auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
307 | 56 | first_node.replaceKey(key, first_half); |
308 | 56 | QPDFObjectHandle new_kids = QPDFObjectHandle::newArray(); |
309 | 56 | new_kids.appendItem(first_node); |
310 | 56 | to_split.removeKey("/Limits"); // already shouldn't be there for root |
311 | 56 | to_split.removeKey(impl.details.itemsKey()); |
312 | 56 | to_split.replaceKey("/Kids", new_kids); |
313 | 56 | if (is_leaf) { |
314 | 54 | QTC::TC("qpdf", "NNTree split root + leaf"); |
315 | 54 | node = first_node; |
316 | 54 | } else { |
317 | 2 | QTC::TC("qpdf", "NNTree split root + !leaf"); |
318 | 2 | auto next = path.begin(); |
319 | 2 | next->node = first_node; |
320 | 2 | } |
321 | 56 | this->path.emplace_front(to_split, 0); |
322 | 56 | parent = path.begin(); |
323 | 56 | to_split = first_node; |
324 | 56 | } |
325 | | |
326 | | // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that |
327 | | // parent[/Kids][kid_number] == to_split and (++parent).node == to_split. |
328 | | |
329 | | // Create a second half array, and transfer the second half of the items into the second half |
330 | | // array. |
331 | 79 | QPDFObjectHandle second_half = QPDFObjectHandle::newArray(); |
332 | 79 | int start_idx = ((n / 2) & ~1); |
333 | 2.69k | while (first_half.getArrayNItems() > start_idx) { |
334 | 2.61k | second_half.appendItem(first_half.getArrayItem(start_idx)); |
335 | 2.61k | first_half.eraseItem(start_idx); |
336 | 2.61k | } |
337 | 79 | resetLimits(to_split, parent); |
338 | | |
339 | | // Create a new node to contain the second half |
340 | 79 | QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
341 | 79 | second_node.replaceKey(key, second_half); |
342 | 79 | resetLimits(second_node, parent); |
343 | | |
344 | | // CURRENT STATE: half the items from the kids or items array in the node being split have been |
345 | | // moved into a new node. The new node is not yet attached to the tree. The iterator may have a |
346 | | // path element or leaf node that is out of bounds. |
347 | | |
348 | | // We need to adjust the parent to add the second node to /Kids and, if needed, update |
349 | | // kid_number to traverse through it. We need to update to_split's path element, or the node if |
350 | | // this is a leaf, so that the kid/item number points to the right place. |
351 | | |
352 | 79 | auto parent_kids = parent->node.getKey("/Kids"); |
353 | 79 | parent_kids.insertItem(parent->kid_number + 1, second_node); |
354 | 79 | auto cur_elem = parent; |
355 | 79 | ++cur_elem; // points to end() for leaf nodes |
356 | 79 | int old_idx = (is_leaf ? item_number : cur_elem->kid_number); |
357 | 79 | if (old_idx >= start_idx) { |
358 | 43 | ++parent->kid_number; |
359 | 43 | if (is_leaf) { |
360 | 43 | QTC::TC("qpdf", "NNTree split second half item"); |
361 | 43 | setItemNumber(second_node, item_number - start_idx); |
362 | 43 | } else { |
363 | 0 | QTC::TC("qpdf", "NNTree split second half kid"); |
364 | 0 | cur_elem->node = second_node; |
365 | 0 | cur_elem->kid_number -= start_idx; |
366 | 0 | } |
367 | 43 | } |
368 | 79 | if (!is_root) { |
369 | 23 | QTC::TC("qpdf", "NNTree split parent"); |
370 | 23 | auto next = parent->node; |
371 | 23 | resetLimits(next, parent); |
372 | 23 | --parent; |
373 | 23 | split(next, parent); |
374 | 23 | } |
375 | 79 | } |
376 | | |
377 | | std::list<NNTreeIterator::PathElement>::iterator |
378 | | NNTreeIterator::lastPathElement() |
379 | 10.0k | { |
380 | 10.0k | auto result = path.end(); |
381 | 10.0k | if (!path.empty()) { |
382 | 1.27k | --result; |
383 | 1.27k | } |
384 | 10.0k | return result; |
385 | 10.0k | } |
386 | | |
387 | | void |
388 | | NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value) |
389 | 4.26k | { |
390 | 4.26k | if (!valid()) { |
391 | 0 | QTC::TC("qpdf", "NNTree insertAfter inserts first"); |
392 | 0 | impl.insertFirst(key, value); |
393 | 0 | deepen(impl.oh, true, false); |
394 | 0 | return; |
395 | 0 | } |
396 | | |
397 | 4.26k | auto items = node.getKey(impl.details.itemsKey()); |
398 | 4.26k | if (!items.isArray()) { |
399 | 0 | error(impl.qpdf, node, "node contains no items array"); |
400 | 0 | } |
401 | 4.26k | if (items.getArrayNItems() < item_number + 2) { |
402 | 0 | error(impl.qpdf, node, "insert: items array is too short"); |
403 | 0 | } |
404 | 4.26k | items.insertItem(item_number + 2, key); |
405 | 4.26k | items.insertItem(item_number + 3, value); |
406 | 4.26k | resetLimits(node, lastPathElement()); |
407 | 4.26k | split(node, lastPathElement()); |
408 | 4.26k | increment(false); |
409 | 4.26k | } |
410 | | |
411 | | void |
412 | | NNTreeIterator::remove() |
413 | 0 | { |
414 | | // Remove this item, leaving the tree valid and this iterator pointing to the next item. |
415 | |
|
416 | 0 | if (!valid()) { |
417 | 0 | throw std::logic_error("attempt made to remove an invalid iterator"); |
418 | 0 | } |
419 | 0 | auto items = node.getKey(impl.details.itemsKey()); |
420 | 0 | int nitems = items.getArrayNItems(); |
421 | 0 | if (item_number + 2 > nitems) { |
422 | 0 | error(impl.qpdf, node, "found short items array while removing an item"); |
423 | 0 | } |
424 | |
|
425 | 0 | items.eraseItem(item_number); |
426 | 0 | items.eraseItem(item_number); |
427 | 0 | nitems -= 2; |
428 | |
|
429 | 0 | if (nitems > 0) { |
430 | | // There are still items left |
431 | |
|
432 | 0 | if (item_number == 0 || item_number == nitems) { |
433 | | // We removed either the first or last item of an items array that remains non-empty, so |
434 | | // we have to adjust limits. |
435 | 0 | QTC::TC("qpdf", "NNTree remove reset limits"); |
436 | 0 | resetLimits(node, lastPathElement()); |
437 | 0 | } |
438 | |
|
439 | 0 | if (item_number == nitems) { |
440 | | // We removed the last item of a non-empty items array, so advance to the successor of |
441 | | // the previous item. |
442 | 0 | QTC::TC("qpdf", "NNTree erased last item"); |
443 | 0 | item_number -= 2; |
444 | 0 | increment(false); |
445 | 0 | } else if (item_number < nitems) { |
446 | | // We don't have to do anything since the removed item's successor now occupies its |
447 | | // former location. |
448 | 0 | QTC::TC("qpdf", "NNTree erased non-last item"); |
449 | 0 | updateIValue(); |
450 | 0 | } else { |
451 | | // We already checked to ensure this condition would not happen. |
452 | 0 | throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase"); |
453 | 0 | } |
454 | 0 | return; |
455 | 0 | } |
456 | | |
457 | 0 | if (path.empty()) { |
458 | | // Special case: if this is the root node, we can leave it empty. |
459 | 0 | QTC::TC("qpdf", "NNTree erased all items on leaf/root"); |
460 | 0 | setItemNumber(impl.oh, -1); |
461 | 0 | return; |
462 | 0 | } |
463 | | |
464 | 0 | QTC::TC("qpdf", "NNTree items is empty after remove"); |
465 | | |
466 | | // We removed the last item from this items array, so we need to remove this node from the |
467 | | // parent on up the tree. Then we need to position ourselves at the removed item's successor. |
468 | 0 | bool done = false; |
469 | 0 | while (!done) { |
470 | 0 | auto element = lastPathElement(); |
471 | 0 | auto parent = element; |
472 | 0 | --parent; |
473 | 0 | auto kids = element->node.getKey("/Kids"); |
474 | 0 | kids.eraseItem(element->kid_number); |
475 | 0 | auto nkids = kids.getArrayNItems(); |
476 | 0 | if (nkids > 0) { |
477 | | // The logic here is similar to the items case. |
478 | 0 | if ((element->kid_number == 0) || (element->kid_number == nkids)) { |
479 | 0 | QTC::TC("qpdf", "NNTree erased first or last kid"); |
480 | 0 | resetLimits(element->node, parent); |
481 | 0 | } |
482 | 0 | if (element->kid_number == nkids) { |
483 | | // Move to the successor of the last child of the previous kid. |
484 | 0 | setItemNumber(QPDFObjectHandle(), -1); |
485 | 0 | --element->kid_number; |
486 | 0 | deepen(kids.getArrayItem(element->kid_number), false, true); |
487 | 0 | if (valid()) { |
488 | 0 | increment(false); |
489 | 0 | if (!valid()) { |
490 | 0 | QTC::TC("qpdf", "NNTree erased last item in tree"); |
491 | 0 | } else { |
492 | 0 | QTC::TC("qpdf", "NNTree erased last kid"); |
493 | 0 | } |
494 | 0 | } |
495 | 0 | } else { |
496 | | // Next kid is in deleted kid's position |
497 | 0 | QTC::TC("qpdf", "NNTree erased non-last kid"); |
498 | 0 | deepen(kids.getArrayItem(element->kid_number), true, true); |
499 | 0 | } |
500 | 0 | done = true; |
501 | 0 | } else if (parent == path.end()) { |
502 | | // We erased the very last item. Convert the root to an empty items array. |
503 | 0 | QTC::TC("qpdf", "NNTree non-flat tree is empty after remove"); |
504 | 0 | element->node.removeKey("/Kids"); |
505 | 0 | element->node.replaceKey(impl.details.itemsKey(), QPDFObjectHandle::newArray()); |
506 | 0 | path.clear(); |
507 | 0 | setItemNumber(impl.oh, -1); |
508 | 0 | done = true; |
509 | 0 | } else { |
510 | | // Walk up the tree and continue |
511 | 0 | QTC::TC("qpdf", "NNTree remove walking up tree"); |
512 | 0 | path.pop_back(); |
513 | 0 | } |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | NNTreeIterator& |
518 | | NNTreeIterator::operator++() |
519 | 11.9k | { |
520 | 11.9k | increment(false); |
521 | 11.9k | return *this; |
522 | 11.9k | } |
523 | | |
524 | | NNTreeIterator& |
525 | | NNTreeIterator::operator--() |
526 | 0 | { |
527 | 0 | increment(true); |
528 | 0 | return *this; |
529 | 0 | } |
530 | | |
531 | | NNTreeIterator::reference |
532 | | NNTreeIterator::operator*() |
533 | 11.9k | { |
534 | 11.9k | updateIValue(false); |
535 | 11.9k | return ivalue; |
536 | 11.9k | } |
537 | | |
538 | | NNTreeIterator::pointer |
539 | | NNTreeIterator::operator->() |
540 | 37.4k | { |
541 | 37.4k | updateIValue(false); |
542 | 37.4k | return &ivalue; |
543 | 37.4k | } |
544 | | |
545 | | bool |
546 | | NNTreeIterator::operator==(NNTreeIterator const& other) const |
547 | 28.8k | { |
548 | 28.8k | if (item_number == -1 && other.item_number == -1) { |
549 | 3.61k | return true; |
550 | 3.61k | } |
551 | 25.2k | if (path.size() != other.path.size()) { |
552 | 14.1k | return false; |
553 | 14.1k | } |
554 | 11.0k | auto tpi = path.begin(); |
555 | 11.0k | auto opi = other.path.begin(); |
556 | 11.0k | while (tpi != path.end()) { |
557 | 0 | if (tpi->kid_number != opi->kid_number) { |
558 | 0 | return false; |
559 | 0 | } |
560 | 0 | ++tpi; |
561 | 0 | ++opi; |
562 | 0 | } |
563 | 11.0k | if (item_number != other.item_number) { |
564 | 11.0k | return false; |
565 | 11.0k | } |
566 | 0 | return true; |
567 | 11.0k | } |
568 | | |
569 | | void |
570 | | NNTreeIterator::setItemNumber(QPDFObjectHandle const& a_node, int n) |
571 | 29.8k | { |
572 | 29.8k | node = a_node; |
573 | 29.8k | item_number = n; |
574 | 29.8k | updateIValue(); |
575 | 29.8k | } |
576 | | |
577 | | void |
578 | | NNTreeIterator::addPathElement(QPDFObjectHandle const& a_node, int kid_number) |
579 | 6.07k | { |
580 | 6.07k | path.emplace_back(a_node, kid_number); |
581 | 6.07k | } |
582 | | |
583 | | bool |
584 | | NNTreeIterator::deepen(QPDFObjectHandle a_node, bool first, bool allow_empty) |
585 | 17.9k | { |
586 | | // Starting at this node, descend through the first or last kid until we reach a node with |
587 | | // items. If we succeed, return true; otherwise return false and leave path alone. |
588 | | |
589 | 17.9k | auto opath = path; |
590 | 17.9k | bool failed = false; |
591 | | |
592 | 17.9k | QPDFObjGen::set seen; |
593 | 17.9k | for (auto const& i: path) { |
594 | 5.28k | seen.add(i.node); |
595 | 5.28k | } |
596 | 22.1k | while (!failed) { |
597 | 22.1k | if (!seen.add(a_node)) { |
598 | 929 | QTC::TC("qpdf", "NNTree deepen: loop"); |
599 | 929 | warn(impl.qpdf, a_node, "loop detected while traversing name/number tree"); |
600 | 929 | failed = true; |
601 | 929 | break; |
602 | 929 | } |
603 | | |
604 | 21.2k | if (!a_node.isDictionary()) { |
605 | 554 | QTC::TC("qpdf", "NNTree node is not a dictionary"); |
606 | 554 | warn(impl.qpdf, a_node, "non-dictionary node while traversing name/number tree"); |
607 | 554 | failed = true; |
608 | 554 | break; |
609 | 554 | } |
610 | | |
611 | 20.6k | auto kids = a_node.getKey("/Kids"); |
612 | 20.6k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
613 | 20.6k | auto items = a_node.getKey(impl.details.itemsKey()); |
614 | 20.6k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
615 | 20.6k | if (nitems > 0) { |
616 | 15.0k | setItemNumber(a_node, first ? 0 : nitems - 2); |
617 | 15.0k | break; |
618 | 15.0k | } else if (nkids > 0) { |
619 | 4.17k | int kid_number = first ? 0 : nkids - 1; |
620 | 4.17k | addPathElement(a_node, kid_number); |
621 | 4.17k | auto next = kids.getArrayItem(kid_number); |
622 | 4.17k | if (!next.isIndirect()) { |
623 | 127 | if (impl.auto_repair) { |
624 | 127 | QTC::TC("qpdf", "NNTree fix indirect kid"); |
625 | 127 | warn( |
626 | 127 | impl.qpdf, |
627 | 127 | a_node, |
628 | 127 | ("converting kid number " + std::to_string(kid_number) + |
629 | 127 | " to an indirect object")); |
630 | 127 | next = impl.qpdf.makeIndirectObject(next); |
631 | 127 | kids.setArrayItem(kid_number, next); |
632 | 127 | } else { |
633 | 0 | QTC::TC("qpdf", "NNTree warn indirect kid"); |
634 | 0 | warn( |
635 | 0 | impl.qpdf, |
636 | 0 | a_node, |
637 | 0 | ("kid number " + std::to_string(kid_number) + |
638 | 0 | " is not an indirect object")); |
639 | 0 | } |
640 | 127 | } |
641 | 4.17k | a_node = next; |
642 | 4.17k | } else if (allow_empty && items.isArray()) { |
643 | 996 | QTC::TC("qpdf", "NNTree deepen found empty"); |
644 | 996 | setItemNumber(a_node, -1); |
645 | 996 | break; |
646 | 996 | } else { |
647 | 439 | QTC::TC("qpdf", "NNTree deepen: invalid node"); |
648 | 439 | warn( |
649 | 439 | impl.qpdf, |
650 | 439 | a_node, |
651 | 439 | ("name/number tree node has neither non-empty " + impl.details.itemsKey() + |
652 | 439 | " nor /Kids")); |
653 | 439 | failed = true; |
654 | 439 | break; |
655 | 439 | } |
656 | 20.6k | } |
657 | 17.9k | if (failed) { |
658 | 1.91k | path = opath; |
659 | 1.91k | return false; |
660 | 1.91k | } |
661 | 16.0k | return true; |
662 | 17.9k | } |
663 | | |
664 | | NNTreeImpl::NNTreeImpl( |
665 | | NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) : |
666 | 1.39k | details(details), |
667 | 1.39k | qpdf(qpdf), |
668 | 1.39k | oh(oh), |
669 | 1.39k | auto_repair(auto_repair) |
670 | 1.39k | { |
671 | 1.39k | } |
672 | | |
673 | | void |
674 | | NNTreeImpl::setSplitThreshold(int threshold) |
675 | 0 | { |
676 | 0 | split_threshold = threshold; |
677 | 0 | } |
678 | | |
679 | | NNTreeImpl::iterator |
680 | | NNTreeImpl::begin() |
681 | 15.7k | { |
682 | 15.7k | iterator result(*this); |
683 | 15.7k | result.deepen(oh, true, true); |
684 | 15.7k | return result; |
685 | 15.7k | } |
686 | | |
687 | | NNTreeImpl::iterator |
688 | | NNTreeImpl::end() |
689 | 33.2k | { |
690 | 33.2k | return {*this}; |
691 | 33.2k | } |
692 | | |
693 | | NNTreeImpl::iterator |
694 | | NNTreeImpl::last() |
695 | 0 | { |
696 | 0 | iterator result(*this); |
697 | 0 | result.deepen(oh, false, true); |
698 | 0 | return result; |
699 | 0 | } |
700 | | |
701 | | int |
702 | | NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) |
703 | 3.40k | { |
704 | 3.40k | int result = 0; |
705 | 3.40k | auto limits = node.getKey("/Limits"); |
706 | 3.40k | if (limits.isArray() && (limits.getArrayNItems() >= 2) && |
707 | 3.40k | details.keyValid(limits.getArrayItem(0)) && details.keyValid(limits.getArrayItem(1))) { |
708 | 3.27k | if (details.compareKeys(key, limits.getArrayItem(0)) < 0) { |
709 | 1.24k | result = -1; |
710 | 2.02k | } else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) { |
711 | 300 | result = 1; |
712 | 300 | } |
713 | 3.27k | } else { |
714 | 131 | QTC::TC("qpdf", "NNTree missing limits"); |
715 | 131 | error(qpdf, node, "node is missing /Limits"); |
716 | 131 | } |
717 | 3.40k | return result; |
718 | 3.40k | } |
719 | | |
720 | | int |
721 | | NNTreeImpl::binarySearch( |
722 | | QPDFObjectHandle key, |
723 | | QPDFObjectHandle items, |
724 | | int num_items, |
725 | | bool return_prev_if_not_found, |
726 | | int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item)) |
727 | 14.5k | { |
728 | 14.5k | int max_idx = 1; |
729 | 59.3k | while (max_idx < num_items) { |
730 | 44.7k | max_idx <<= 1; |
731 | 44.7k | } |
732 | | |
733 | 14.5k | int step = max_idx / 2; |
734 | 14.5k | int checks = max_idx; |
735 | 14.5k | int idx = step; |
736 | 14.5k | int found_idx = -1; |
737 | 14.5k | bool found = false; |
738 | 14.5k | bool found_leq = false; |
739 | 14.5k | int status = 0; |
740 | | |
741 | 65.3k | while ((!found) && (checks > 0)) { |
742 | 50.7k | if (idx < num_items) { |
743 | 47.2k | status = (this->*compare)(key, items, idx); |
744 | 47.2k | if (status >= 0) { |
745 | 22.8k | found_leq = true; |
746 | 22.8k | found_idx = idx; |
747 | 22.8k | } |
748 | 47.2k | } else { |
749 | | // consider item to be below anything after the top |
750 | 3.56k | status = -1; |
751 | 3.56k | } |
752 | | |
753 | 50.7k | if (status == 0) { |
754 | 8.89k | found = true; |
755 | 41.8k | } else { |
756 | 41.8k | checks >>= 1; |
757 | 41.8k | if (checks > 0) { |
758 | 36.1k | step >>= 1; |
759 | 36.1k | if (step == 0) { |
760 | 8.22k | step = 1; |
761 | 8.22k | } |
762 | | |
763 | 36.1k | if (status < 0) { |
764 | 24.9k | idx -= step; |
765 | 24.9k | } else { |
766 | 11.2k | idx += step; |
767 | 11.2k | } |
768 | 36.1k | } |
769 | 41.8k | } |
770 | 50.7k | } |
771 | | |
772 | 14.5k | if (found || (found_leq && return_prev_if_not_found)) { |
773 | 13.3k | return found_idx; |
774 | 13.3k | } else { |
775 | 1.26k | return -1; |
776 | 1.26k | } |
777 | 14.5k | } |
778 | | |
779 | | int |
780 | | NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) |
781 | 43.5k | { |
782 | 43.5k | if (!((items.isArray() && (items.getArrayNItems() > (2 * idx)) && |
783 | 43.5k | details.keyValid(items.getArrayItem(2 * idx))))) { |
784 | 92 | QTC::TC("qpdf", "NNTree item is wrong type"); |
785 | 92 | error(qpdf, oh, ("item at index " + std::to_string(2 * idx) + " is not the right type")); |
786 | 92 | } |
787 | 43.5k | return details.compareKeys(key, items.getArrayItem(2 * idx)); |
788 | 43.5k | } |
789 | | |
790 | | int |
791 | | NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) |
792 | 3.69k | { |
793 | 3.69k | if (!(kids.isArray() && (idx < kids.getArrayNItems()) && |
794 | 3.69k | kids.getArrayItem(idx).isDictionary())) { |
795 | 285 | QTC::TC("qpdf", "NNTree kid is invalid"); |
796 | 285 | error(qpdf, oh, "invalid kid at index " + std::to_string(idx)); |
797 | 285 | } |
798 | 3.69k | return withinLimits(key, kids.getArrayItem(idx)); |
799 | 3.69k | } |
800 | | |
801 | | void |
802 | | NNTreeImpl::repair() |
803 | 469 | { |
804 | 469 | auto new_node = QPDFObjectHandle::newDictionary(); |
805 | 469 | new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray()); |
806 | 469 | NNTreeImpl repl(details, qpdf, new_node, false); |
807 | 11.9k | for (auto const& i: *this) { |
808 | 11.9k | repl.insert(i.first, i.second); |
809 | 11.9k | } |
810 | 469 | oh.replaceKey("/Kids", new_node.getKey("/Kids")); |
811 | 469 | oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey())); |
812 | 469 | } |
813 | | |
814 | | NNTreeImpl::iterator |
815 | | NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) |
816 | 14.1k | { |
817 | 14.1k | try { |
818 | 14.1k | return findInternal(key, return_prev_if_not_found); |
819 | 14.1k | } catch (QPDFExc& e) { |
820 | 503 | if (auto_repair) { |
821 | 477 | QTC::TC("qpdf", "NNTree repair"); |
822 | 477 | warn(qpdf, oh, std::string("attempting to repair after error: ") + e.what()); |
823 | 477 | repair(); |
824 | 477 | return findInternal(key, return_prev_if_not_found); |
825 | 477 | } else { |
826 | 26 | throw; |
827 | 26 | } |
828 | 503 | } |
829 | 14.1k | } |
830 | | |
831 | | NNTreeImpl::iterator |
832 | | NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) |
833 | 14.5k | { |
834 | 14.5k | auto first_item = begin(); |
835 | 14.5k | auto last_item = end(); |
836 | 14.5k | if (first_item == end()) { |
837 | | // Empty |
838 | 1.44k | return end(); |
839 | 13.0k | } else if ( |
840 | 13.0k | first_item.valid() && details.keyValid(first_item->first) && |
841 | 13.0k | details.compareKeys(key, first_item->first) < 0) { |
842 | | // Before the first key |
843 | 353 | return end(); |
844 | 12.7k | } else if ( |
845 | 12.7k | last_item.valid() && details.keyValid(last_item->first) && |
846 | 12.7k | details.compareKeys(key, last_item->first) > 0) { |
847 | | // After the last key |
848 | 0 | if (return_prev_if_not_found) { |
849 | 0 | return last_item; |
850 | 0 | } else { |
851 | 0 | return end(); |
852 | 0 | } |
853 | 0 | } |
854 | | |
855 | 12.7k | QPDFObjGen::set seen; |
856 | 12.7k | auto node = oh; |
857 | 12.7k | iterator result(*this); |
858 | | |
859 | 15.0k | while (true) { |
860 | 14.6k | if (!seen.add(node)) { |
861 | 0 | QTC::TC("qpdf", "NNTree loop in find"); |
862 | 0 | error(qpdf, node, "loop detected in find"); |
863 | 0 | } |
864 | | |
865 | 14.6k | auto kids = node.getKey("/Kids"); |
866 | 14.6k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
867 | 14.6k | auto items = node.getKey(details.itemsKey()); |
868 | 14.6k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
869 | 14.6k | if (nitems > 0) { |
870 | 12.2k | int idx = binarySearch( |
871 | 12.2k | key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem); |
872 | 12.2k | if (idx >= 0) { |
873 | 11.4k | result.setItemNumber(node, 2 * idx); |
874 | 11.4k | } |
875 | 12.2k | break; |
876 | 12.2k | } else if (nkids > 0) { |
877 | 2.32k | int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid); |
878 | 2.32k | if (idx == -1) { |
879 | 6 | QTC::TC("qpdf", "NNTree -1 in binary search"); |
880 | 6 | error( |
881 | 6 | qpdf, |
882 | 6 | node, |
883 | 6 | "unexpected -1 from binary search of kids;" |
884 | 6 | " limits may by wrong"); |
885 | 6 | } |
886 | 2.32k | result.addPathElement(node, idx); |
887 | 2.32k | node = kids.getArrayItem(idx); |
888 | 2.32k | } else { |
889 | 1 | QTC::TC("qpdf", "NNTree bad node during find"); |
890 | 1 | error(qpdf, node, "bad node during find"); |
891 | 1 | } |
892 | 14.6k | } |
893 | | |
894 | 12.7k | return result; |
895 | 14.5k | } |
896 | | |
897 | | NNTreeImpl::iterator |
898 | | NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value) |
899 | 754 | { |
900 | 754 | auto iter = begin(); |
901 | 754 | QPDFObjectHandle items; |
902 | 754 | if (iter.node.isDictionary()) { |
903 | 754 | items = iter.node.getKey(details.itemsKey()); |
904 | 754 | } |
905 | 754 | if (!(items.isArray())) { |
906 | 0 | QTC::TC("qpdf", "NNTree no valid items node in insertFirst"); |
907 | 0 | error(qpdf, oh, "unable to find a valid items node"); |
908 | 0 | } |
909 | 754 | items.insertItem(0, key); |
910 | 754 | items.insertItem(1, value); |
911 | 754 | iter.setItemNumber(iter.node, 0); |
912 | 754 | iter.resetLimits(iter.node, iter.lastPathElement()); |
913 | 754 | iter.split(iter.node, iter.lastPathElement()); |
914 | 754 | return iter; |
915 | 754 | } |
916 | | |
917 | | NNTreeImpl::iterator |
918 | | NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value) |
919 | 11.9k | { |
920 | 11.9k | auto iter = find(key, true); |
921 | 11.9k | if (!iter.valid()) { |
922 | 754 | QTC::TC("qpdf", "NNTree insert inserts first"); |
923 | 754 | return insertFirst(key, value); |
924 | 11.2k | } else if (details.compareKeys(key, iter->first) == 0) { |
925 | 6.95k | QTC::TC("qpdf", "NNTree insert replaces"); |
926 | 6.95k | auto items = iter.node.getKey(details.itemsKey()); |
927 | 6.95k | items.setArrayItem(iter.item_number + 1, value); |
928 | 6.95k | iter.updateIValue(); |
929 | 6.95k | } else { |
930 | 4.29k | QTC::TC("qpdf", "NNTree insert inserts after"); |
931 | 4.29k | iter.insertAfter(key, value); |
932 | 4.29k | } |
933 | 11.2k | return iter; |
934 | 11.9k | } |
935 | | |
936 | | bool |
937 | | NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value) |
938 | 0 | { |
939 | 0 | auto iter = find(key, false); |
940 | 0 | if (!iter.valid()) { |
941 | 0 | QTC::TC("qpdf", "NNTree remove not found"); |
942 | 0 | return false; |
943 | 0 | } |
944 | 0 | if (value) { |
945 | 0 | *value = iter->second; |
946 | 0 | } |
947 | 0 | iter.remove(); |
948 | 0 | return true; |
949 | 0 | } |