/src/qpdf/libqpdf/NNTree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/NNTree.hh> |
2 | | |
3 | | #include <qpdf/QTC.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | #include <exception> |
7 | | |
8 | | static std::string |
9 | | get_description(QPDFObjectHandle& node) |
10 | 23.8k | { |
11 | 23.8k | std::string result("Name/Number tree node"); |
12 | 23.8k | if (node.isIndirect()) { |
13 | 19.9k | result += " (object " + std::to_string(node.getObjectID()) + ")"; |
14 | 19.9k | } |
15 | 23.8k | return result; |
16 | 23.8k | } |
17 | | |
18 | | static void |
19 | | warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
20 | 20.5k | { |
21 | 20.5k | qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); |
22 | 20.5k | } |
23 | | |
24 | | static void |
25 | | error(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
26 | 3.33k | { |
27 | 3.33k | throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg); |
28 | 3.33k | } |
29 | | |
30 | | NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : |
31 | 202k | impl(impl) |
32 | 202k | { |
33 | 202k | } |
34 | | |
35 | | void |
36 | | NNTreeIterator::updateIValue(bool allow_invalid) |
37 | 289k | { |
38 | | // ivalue should never be used inside the class since we return a pointer/reference to it. Every |
39 | | // bit of code that ever changes what object the iterator points to should take care to call |
40 | | // updateIValue. Failure to do this means that any old references to *iter will point to |
41 | | // incorrect objects, though the next dereference of the iterator will fix it. This isn't |
42 | | // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise |
43 | | // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to |
44 | | // verify by reasoning about the code that this constraint is always satisfied. Whenever we |
45 | | // update what the iterator points to, we should call setItemNumber, which calls this. If we |
46 | | // change what the iterator points to in some other way, such as replacing a value or removing |
47 | | // an item and making the iterator point at a different item in potentially the same position, |
48 | | // we must call updateIValue as well. These cases are handled, and for good measure, we also |
49 | | // call updateIValue in operator* and operator->. |
50 | | |
51 | 289k | bool okay = false; |
52 | 289k | if (item_number >= 0 && node.isDictionary()) { |
53 | 283k | auto items = node.getKey(impl.details.itemsKey()); |
54 | 283k | if (item_number + 1 < items.getArrayNItems()) { |
55 | 282k | okay = true; |
56 | 282k | ivalue.first = items.getArrayItem(item_number); |
57 | 282k | ivalue.second = items.getArrayItem(1 + item_number); |
58 | 282k | } else { |
59 | 878 | error(impl.qpdf, node, "update ivalue: items array is too short"); |
60 | 878 | } |
61 | 283k | } |
62 | 289k | if (!okay) { |
63 | 6.11k | if (!allow_invalid) { |
64 | 0 | throw std::logic_error( |
65 | 0 | "attempt made to dereference an invalid name/number tree iterator"); |
66 | 0 | } |
67 | 6.11k | ivalue.first = QPDFObjectHandle(); |
68 | 6.11k | ivalue.second = QPDFObjectHandle(); |
69 | 6.11k | } |
70 | 289k | } |
71 | | |
72 | | NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) : |
73 | 47.8k | node(node), |
74 | 47.8k | kid_number(kid_number) |
75 | 47.8k | { |
76 | 47.8k | } |
77 | | |
78 | | QPDFObjectHandle |
79 | | NNTreeIterator::getNextKid(PathElement& pe, bool backward) |
80 | 4.38k | { |
81 | 4.38k | QPDFObjectHandle result; |
82 | 4.38k | bool found = false; |
83 | 14.7k | while (!found) { |
84 | 10.3k | pe.kid_number += backward ? -1 : 1; |
85 | 10.3k | auto kids = pe.node.getKey("/Kids"); |
86 | 10.3k | if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) { |
87 | 8.72k | result = kids.getArrayItem(pe.kid_number); |
88 | 8.72k | if (result.isDictionary() && |
89 | 8.72k | (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) { |
90 | 2.72k | found = true; |
91 | 6.00k | } else { |
92 | 6.00k | QTC::TC("qpdf", "NNTree skip invalid kid"); |
93 | 6.00k | warn( |
94 | 6.00k | impl.qpdf, |
95 | 6.00k | pe.node, |
96 | 6.00k | ("skipping over invalid kid at index " + std::to_string(pe.kid_number))); |
97 | 6.00k | } |
98 | 8.72k | } else { |
99 | 1.64k | result = QPDFObjectHandle::newNull(); |
100 | 1.64k | found = true; |
101 | 1.64k | } |
102 | 10.3k | } |
103 | 4.38k | return result; |
104 | 4.38k | } |
105 | | |
106 | | bool |
107 | | NNTreeIterator::valid() const |
108 | 344k | { |
109 | 344k | return item_number >= 0; |
110 | 344k | } |
111 | | |
112 | | void |
113 | | NNTreeIterator::increment(bool backward) |
114 | 69.8k | { |
115 | 69.8k | if (item_number < 0) { |
116 | 0 | QTC::TC("qpdf", "NNTree increment end()"); |
117 | 0 | deepen(impl.oh, !backward, true); |
118 | 0 | return; |
119 | 0 | } |
120 | 69.8k | bool found_valid_key = false; |
121 | 148k | while (valid() && !found_valid_key) { |
122 | 78.6k | item_number += backward ? -2 : 2; |
123 | 78.6k | auto items = node.getKey(impl.details.itemsKey()); |
124 | 78.6k | if (item_number < 0 || item_number >= items.getArrayNItems()) { |
125 | 2.57k | bool found = false; |
126 | 2.57k | setItemNumber(QPDFObjectHandle(), -1); |
127 | 6.95k | while (!(found || path.empty())) { |
128 | 4.38k | auto& element = path.back(); |
129 | 4.38k | auto pe_node = getNextKid(element, backward); |
130 | 4.38k | if (pe_node.isNull()) { |
131 | 1.64k | path.pop_back(); |
132 | 2.73k | } else { |
133 | 2.73k | found = deepen(pe_node, !backward, false); |
134 | 2.73k | } |
135 | 4.38k | } |
136 | 2.57k | } |
137 | 78.6k | if (item_number >= 0) { |
138 | 77.2k | items = node.getKey(impl.details.itemsKey()); |
139 | 77.2k | if (item_number + 1 >= items.getArrayNItems()) { |
140 | 919 | QTC::TC("qpdf", "NNTree skip item at end of short items"); |
141 | 919 | warn(impl.qpdf, node, "items array doesn't have enough elements"); |
142 | 76.3k | } else if (!impl.details.keyValid(items.getArrayItem(item_number))) { |
143 | 7.90k | QTC::TC("qpdf", "NNTree skip invalid key"); |
144 | 7.90k | warn( |
145 | 7.90k | impl.qpdf, |
146 | 7.90k | node, |
147 | 7.90k | ("item " + std::to_string(item_number) + " has the wrong type")); |
148 | 68.4k | } else { |
149 | 68.4k | found_valid_key = true; |
150 | 68.4k | } |
151 | 77.2k | } |
152 | 78.6k | } |
153 | 69.8k | } |
154 | | |
155 | | void |
156 | | NNTreeIterator::resetLimits(QPDFObjectHandle a_node, std::list<PathElement>::iterator parent) |
157 | 33.5k | { |
158 | 33.5k | bool done = false; |
159 | 49.9k | while (!done) { |
160 | 33.5k | if (parent == path.end()) { |
161 | 17.2k | QTC::TC("qpdf", "NNTree remove limits from root"); |
162 | 17.2k | a_node.removeKey("/Limits"); |
163 | 17.2k | done = true; |
164 | 17.2k | break; |
165 | 17.2k | } |
166 | 16.3k | auto kids = a_node.getKey("/Kids"); |
167 | 16.3k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
168 | 16.3k | auto items = a_node.getKey(impl.details.itemsKey()); |
169 | 16.3k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
170 | | |
171 | 16.3k | bool changed = true; |
172 | 16.3k | QPDFObjectHandle first; |
173 | 16.3k | QPDFObjectHandle last; |
174 | 16.3k | if (nitems >= 2) { |
175 | 15.7k | first = items.getArrayItem(0); |
176 | 15.7k | last = items.getArrayItem((nitems - 1) & ~1); |
177 | 15.7k | } else if (nkids > 0) { |
178 | 587 | auto first_kid = kids.getArrayItem(0); |
179 | 587 | auto last_kid = kids.getArrayItem(nkids - 1); |
180 | 587 | if (first_kid.isDictionary() && last_kid.isDictionary()) { |
181 | 587 | auto first_limits = first_kid.getKey("/Limits"); |
182 | 587 | auto last_limits = last_kid.getKey("/Limits"); |
183 | 587 | if (first_limits.isArray() && (first_limits.getArrayNItems() >= 2) && |
184 | 587 | last_limits.isArray() && (last_limits.getArrayNItems() >= 2)) { |
185 | 587 | first = first_limits.getArrayItem(0); |
186 | 587 | last = last_limits.getArrayItem(1); |
187 | 587 | } |
188 | 587 | } |
189 | 587 | } |
190 | 16.3k | if (first && last) { |
191 | 16.3k | auto limits = QPDFObjectHandle::newArray(); |
192 | 16.3k | limits.appendItem(first); |
193 | 16.3k | limits.appendItem(last); |
194 | 16.3k | auto olimits = a_node.getKey("/Limits"); |
195 | 16.3k | if (olimits.isArray() && (olimits.getArrayNItems() == 2)) { |
196 | 14.4k | auto ofirst = olimits.getArrayItem(0); |
197 | 14.4k | auto olast = olimits.getArrayItem(1); |
198 | 14.4k | if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) && |
199 | 14.4k | (impl.details.compareKeys(first, ofirst) == 0) && |
200 | 14.4k | (impl.details.compareKeys(last, olast) == 0)) { |
201 | 10.5k | QTC::TC("qpdf", "NNTree limits didn't change"); |
202 | 10.5k | changed = false; |
203 | 10.5k | } |
204 | 14.4k | } |
205 | 16.3k | if (changed && !a_node.isSameObjectAs(path.begin()->node)) { |
206 | 5.28k | a_node.replaceKey("/Limits", limits); |
207 | 5.28k | } |
208 | 16.3k | } else { |
209 | 0 | QTC::TC("qpdf", "NNTree unable to determine limits"); |
210 | 0 | warn(impl.qpdf, a_node, "unable to determine limits"); |
211 | 0 | } |
212 | | |
213 | 16.3k | if (!changed || parent == path.begin()) { |
214 | 16.3k | done = true; |
215 | 16.3k | } else { |
216 | 0 | a_node = parent->node; |
217 | 0 | --parent; |
218 | 0 | } |
219 | 16.3k | } |
220 | 33.5k | } |
221 | | |
222 | | void |
223 | | NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent) |
224 | 31.6k | { |
225 | | // Split some node along the path to the item pointed to by this iterator, and adjust the |
226 | | // iterator so it points to the same item. |
227 | | |
228 | | // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine |
229 | | // this tree: |
230 | | // |
231 | | // root: << /Kids [ A B C D ] >> |
232 | | // A: << /Nums [ 1 2 3 4 ] >> |
233 | | // B: << /Nums [ 5 6 7 8 ] >> |
234 | | // C: << /Nums [ 9 10 11 12 ] >> |
235 | | // D: << /Kids [ E F ] |
236 | | // E: << /Nums [ 13 14 15 16 ] >> |
237 | | // F: << /Nums [ 17 18 19 20 ] >> |
238 | | |
239 | | // iter1 (points to 19) |
240 | | // path: |
241 | | // - { node: root: kid_number: 3 } |
242 | | // - { node: D, kid_number: 1 } |
243 | | // node: F |
244 | | // item_number: 2 |
245 | | |
246 | | // iter2 (points to 1) |
247 | | // path: |
248 | | // - { node: root, kid_number: 0} |
249 | | // node: A |
250 | | // item_number: 0 |
251 | | |
252 | 31.6k | if (!valid()) { |
253 | 0 | throw std::logic_error("NNTreeIterator::split called an invalid iterator"); |
254 | 0 | } |
255 | | |
256 | | // Find the array we actually need to split, which is either this node's kids or items. |
257 | 31.6k | auto kids = to_split.getKey("/Kids"); |
258 | 31.6k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
259 | 31.6k | auto items = to_split.getKey(impl.details.itemsKey()); |
260 | 31.6k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
261 | | |
262 | 31.6k | QPDFObjectHandle first_half; |
263 | 31.6k | int n = 0; |
264 | 31.6k | std::string key; |
265 | 31.6k | int threshold = 0; |
266 | 31.6k | if (nkids > 0) { |
267 | 587 | QTC::TC("qpdf", "NNTree split kids"); |
268 | 587 | first_half = kids; |
269 | 587 | n = nkids; |
270 | 587 | threshold = impl.split_threshold; |
271 | 587 | key = "/Kids"; |
272 | 31.0k | } else if (nitems > 0) { |
273 | 31.0k | QTC::TC("qpdf", "NNTree split items"); |
274 | 31.0k | first_half = items; |
275 | 31.0k | n = nitems; |
276 | 31.0k | threshold = 2 * impl.split_threshold; |
277 | 31.0k | key = impl.details.itemsKey(); |
278 | 31.0k | } else { |
279 | 0 | throw std::logic_error("NNTreeIterator::split called on invalid node"); |
280 | 0 | } |
281 | | |
282 | 31.6k | if (n <= threshold) { |
283 | 30.6k | return; |
284 | 30.6k | } |
285 | | |
286 | 979 | bool is_root = (parent == path.end()); |
287 | 979 | bool is_leaf = (nitems > 0); |
288 | | |
289 | | // CURRENT STATE: tree is in original state; iterator is valid and unchanged. |
290 | | |
291 | 979 | if (is_root) { |
292 | | // What we want to do is to create a new node for the second half of the items and put it in |
293 | | // the parent's /Kids array right after the element that points to the current to_split |
294 | | // node, but if we're splitting root, there is no parent, so handle that first. |
295 | | |
296 | | // In the non-root case, parent points to the path element whose /Kids contains the first |
297 | | // half node, and the first half node is to_split. If we are splitting the root, we need to |
298 | | // push everything down a level, but we want to keep the actual root object the same so that |
299 | | // indirect references to it remain intact (and also in case it might be a direct object, |
300 | | // which it shouldn't be but that case probably exists in the wild). To achieve this, we |
301 | | // create a new node for the first half and then replace /Kids in the root to contain it. |
302 | | // Then we adjust the path so that the first element is root and the second element, if any, |
303 | | // is the new first half. In this way, we make the root case identical to the non-root case |
304 | | // so remaining logic can handle them in the same way. |
305 | | |
306 | 392 | auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
307 | 392 | first_node.replaceKey(key, first_half); |
308 | 392 | QPDFObjectHandle new_kids = QPDFObjectHandle::newArray(); |
309 | 392 | new_kids.appendItem(first_node); |
310 | 392 | to_split.removeKey("/Limits"); // already shouldn't be there for root |
311 | 392 | to_split.removeKey(impl.details.itemsKey()); |
312 | 392 | to_split.replaceKey("/Kids", new_kids); |
313 | 392 | if (is_leaf) { |
314 | 391 | QTC::TC("qpdf", "NNTree split root + leaf"); |
315 | 391 | node = first_node; |
316 | 391 | } else { |
317 | 1 | QTC::TC("qpdf", "NNTree split root + !leaf"); |
318 | 1 | auto next = path.begin(); |
319 | 1 | next->node = first_node; |
320 | 1 | } |
321 | 392 | this->path.emplace_front(to_split, 0); |
322 | 392 | parent = path.begin(); |
323 | 392 | to_split = first_node; |
324 | 392 | } |
325 | | |
326 | | // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that |
327 | | // parent[/Kids][kid_number] == to_split and (++parent).node == to_split. |
328 | | |
329 | | // Create a second half array, and transfer the second half of the items into the second half |
330 | | // array. |
331 | 979 | QPDFObjectHandle second_half = QPDFObjectHandle::newArray(); |
332 | 979 | int start_idx = ((n / 2) & ~1); |
333 | 34.2k | while (first_half.getArrayNItems() > start_idx) { |
334 | 33.2k | second_half.appendItem(first_half.getArrayItem(start_idx)); |
335 | 33.2k | first_half.eraseItem(start_idx); |
336 | 33.2k | } |
337 | 979 | resetLimits(to_split, parent); |
338 | | |
339 | | // Create a new node to contain the second half |
340 | 979 | QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
341 | 979 | second_node.replaceKey(key, second_half); |
342 | 979 | resetLimits(second_node, parent); |
343 | | |
344 | | // CURRENT STATE: half the items from the kids or items array in the node being split have been |
345 | | // moved into a new node. The new node is not yet attached to the tree. The iterator may have a |
346 | | // path element or leaf node that is out of bounds. |
347 | | |
348 | | // We need to adjust the parent to add the second node to /Kids and, if needed, update |
349 | | // kid_number to traverse through it. We need to update to_split's path element, or the node if |
350 | | // this is a leaf, so that the kid/item number points to the right place. |
351 | | |
352 | 979 | auto parent_kids = parent->node.getKey("/Kids"); |
353 | 979 | parent_kids.insertItem(parent->kid_number + 1, second_node); |
354 | 979 | auto cur_elem = parent; |
355 | 979 | ++cur_elem; // points to end() for leaf nodes |
356 | 979 | int old_idx = (is_leaf ? item_number : cur_elem->kid_number); |
357 | 979 | if (old_idx >= start_idx) { |
358 | 545 | ++parent->kid_number; |
359 | 545 | if (is_leaf) { |
360 | 545 | QTC::TC("qpdf", "NNTree split second half item"); |
361 | 545 | setItemNumber(second_node, item_number - start_idx); |
362 | 545 | } else { |
363 | 0 | QTC::TC("qpdf", "NNTree split second half kid"); |
364 | 0 | cur_elem->node = second_node; |
365 | 0 | cur_elem->kid_number -= start_idx; |
366 | 0 | } |
367 | 545 | } |
368 | 979 | if (!is_root) { |
369 | 587 | QTC::TC("qpdf", "NNTree split parent"); |
370 | 587 | auto next = parent->node; |
371 | 587 | resetLimits(next, parent); |
372 | 587 | --parent; |
373 | 587 | split(next, parent); |
374 | 587 | } |
375 | 979 | } |
376 | | |
377 | | std::list<NNTreeIterator::PathElement>::iterator |
378 | | NNTreeIterator::lastPathElement() |
379 | 62.0k | { |
380 | 62.0k | auto result = path.end(); |
381 | 62.0k | if (!path.empty()) { |
382 | 27.6k | --result; |
383 | 27.6k | } |
384 | 62.0k | return result; |
385 | 62.0k | } |
386 | | |
387 | | void |
388 | | NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value) |
389 | 29.0k | { |
390 | 29.0k | if (!valid()) { |
391 | 0 | QTC::TC("qpdf", "NNTree insertAfter inserts first"); |
392 | 0 | impl.insertFirst(key, value); |
393 | 0 | deepen(impl.oh, true, false); |
394 | 0 | return; |
395 | 0 | } |
396 | | |
397 | 29.0k | auto items = node.getKey(impl.details.itemsKey()); |
398 | 29.0k | if (!items.isArray()) { |
399 | 0 | error(impl.qpdf, node, "node contains no items array"); |
400 | 0 | } |
401 | 29.0k | if (items.getArrayNItems() < item_number + 2) { |
402 | 0 | error(impl.qpdf, node, "insert: items array is too short"); |
403 | 0 | } |
404 | 29.0k | items.insertItem(item_number + 2, key); |
405 | 29.0k | items.insertItem(item_number + 3, value); |
406 | 29.0k | resetLimits(node, lastPathElement()); |
407 | 29.0k | split(node, lastPathElement()); |
408 | 29.0k | increment(false); |
409 | 29.0k | } |
410 | | |
411 | | void |
412 | | NNTreeIterator::remove() |
413 | 0 | { |
414 | | // Remove this item, leaving the tree valid and this iterator pointing to the next item. |
415 | |
|
416 | 0 | if (!valid()) { |
417 | 0 | throw std::logic_error("attempt made to remove an invalid iterator"); |
418 | 0 | } |
419 | 0 | auto items = node.getKey(impl.details.itemsKey()); |
420 | 0 | int nitems = items.getArrayNItems(); |
421 | 0 | if (item_number + 2 > nitems) { |
422 | 0 | error(impl.qpdf, node, "found short items array while removing an item"); |
423 | 0 | } |
424 | |
|
425 | 0 | items.eraseItem(item_number); |
426 | 0 | items.eraseItem(item_number); |
427 | 0 | nitems -= 2; |
428 | |
|
429 | 0 | if (nitems > 0) { |
430 | | // There are still items left |
431 | |
|
432 | 0 | if (item_number == 0 || item_number == nitems) { |
433 | | // We removed either the first or last item of an items array that remains non-empty, so |
434 | | // we have to adjust limits. |
435 | 0 | QTC::TC("qpdf", "NNTree remove reset limits"); |
436 | 0 | resetLimits(node, lastPathElement()); |
437 | 0 | } |
438 | |
|
439 | 0 | if (item_number == nitems) { |
440 | | // We removed the last item of a non-empty items array, so advance to the successor of |
441 | | // the previous item. |
442 | 0 | QTC::TC("qpdf", "NNTree erased last item"); |
443 | 0 | item_number -= 2; |
444 | 0 | increment(false); |
445 | 0 | } else if (item_number < nitems) { |
446 | | // We don't have to do anything since the removed item's successor now occupies its |
447 | | // former location. |
448 | 0 | QTC::TC("qpdf", "NNTree erased non-last item"); |
449 | 0 | updateIValue(); |
450 | 0 | } else { |
451 | | // We already checked to ensure this condition would not happen. |
452 | 0 | throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase"); |
453 | 0 | } |
454 | 0 | return; |
455 | 0 | } |
456 | | |
457 | 0 | if (path.empty()) { |
458 | | // Special case: if this is the root node, we can leave it empty. |
459 | 0 | QTC::TC("qpdf", "NNTree erased all items on leaf/root"); |
460 | 0 | setItemNumber(impl.oh, -1); |
461 | 0 | return; |
462 | 0 | } |
463 | | |
464 | 0 | QTC::TC("qpdf", "NNTree items is empty after remove"); |
465 | | |
466 | | // We removed the last item from this items array, so we need to remove this node from the |
467 | | // parent on up the tree. Then we need to position ourselves at the removed item's successor. |
468 | 0 | bool done = false; |
469 | 0 | while (!done) { |
470 | 0 | auto element = lastPathElement(); |
471 | 0 | auto parent = element; |
472 | 0 | --parent; |
473 | 0 | auto kids = element->node.getKey("/Kids"); |
474 | 0 | kids.eraseItem(element->kid_number); |
475 | 0 | auto nkids = kids.getArrayNItems(); |
476 | 0 | if (nkids > 0) { |
477 | | // The logic here is similar to the items case. |
478 | 0 | if ((element->kid_number == 0) || (element->kid_number == nkids)) { |
479 | 0 | QTC::TC("qpdf", "NNTree erased first or last kid"); |
480 | 0 | resetLimits(element->node, parent); |
481 | 0 | } |
482 | 0 | if (element->kid_number == nkids) { |
483 | | // Move to the successor of the last child of the previous kid. |
484 | 0 | setItemNumber(QPDFObjectHandle(), -1); |
485 | 0 | --element->kid_number; |
486 | 0 | deepen(kids.getArrayItem(element->kid_number), false, true); |
487 | 0 | if (valid()) { |
488 | 0 | increment(false); |
489 | 0 | if (!valid()) { |
490 | 0 | QTC::TC("qpdf", "NNTree erased last item in tree"); |
491 | 0 | } else { |
492 | 0 | QTC::TC("qpdf", "NNTree erased last kid"); |
493 | 0 | } |
494 | 0 | } |
495 | 0 | } else { |
496 | | // Next kid is in deleted kid's position |
497 | 0 | QTC::TC("qpdf", "NNTree erased non-last kid"); |
498 | 0 | deepen(kids.getArrayItem(element->kid_number), true, true); |
499 | 0 | } |
500 | 0 | done = true; |
501 | 0 | } else if (parent == path.end()) { |
502 | | // We erased the very last item. Convert the root to an empty items array. |
503 | 0 | QTC::TC("qpdf", "NNTree non-flat tree is empty after remove"); |
504 | 0 | element->node.removeKey("/Kids"); |
505 | 0 | element->node.replaceKey(impl.details.itemsKey(), QPDFObjectHandle::newArray()); |
506 | 0 | path.clear(); |
507 | 0 | setItemNumber(impl.oh, -1); |
508 | 0 | done = true; |
509 | 0 | } else { |
510 | | // Walk up the tree and continue |
511 | 0 | QTC::TC("qpdf", "NNTree remove walking up tree"); |
512 | 0 | path.pop_back(); |
513 | 0 | } |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | NNTreeIterator& |
518 | | NNTreeIterator::operator++() |
519 | 40.8k | { |
520 | 40.8k | increment(false); |
521 | 40.8k | return *this; |
522 | 40.8k | } |
523 | | |
524 | | NNTreeIterator& |
525 | | NNTreeIterator::operator--() |
526 | 0 | { |
527 | 0 | increment(true); |
528 | 0 | return *this; |
529 | 0 | } |
530 | | |
531 | | NNTreeIterator::reference |
532 | | NNTreeIterator::operator*() |
533 | 41.1k | { |
534 | 41.1k | updateIValue(false); |
535 | 41.1k | return ivalue; |
536 | 41.1k | } |
537 | | |
538 | | NNTreeIterator::pointer |
539 | | NNTreeIterator::operator->() |
540 | 140k | { |
541 | 140k | updateIValue(false); |
542 | 140k | return &ivalue; |
543 | 140k | } |
544 | | |
545 | | bool |
546 | | NNTreeIterator::operator==(NNTreeIterator const& other) const |
547 | 94.2k | { |
548 | 94.2k | if (item_number == -1 && other.item_number == -1) { |
549 | 6.10k | return true; |
550 | 6.10k | } |
551 | 88.1k | if (path.size() != other.path.size()) { |
552 | 60.6k | return false; |
553 | 60.6k | } |
554 | 27.4k | auto tpi = path.begin(); |
555 | 27.4k | auto opi = other.path.begin(); |
556 | 27.4k | while (tpi != path.end()) { |
557 | 0 | if (tpi->kid_number != opi->kid_number) { |
558 | 0 | return false; |
559 | 0 | } |
560 | 0 | ++tpi; |
561 | 0 | ++opi; |
562 | 0 | } |
563 | 27.4k | if (item_number != other.item_number) { |
564 | 27.4k | return false; |
565 | 27.4k | } |
566 | 0 | return true; |
567 | 27.4k | } |
568 | | |
569 | | void |
570 | | NNTreeIterator::setItemNumber(QPDFObjectHandle const& a_node, int n) |
571 | 98.5k | { |
572 | 98.5k | node = a_node; |
573 | 98.5k | item_number = n; |
574 | 98.5k | updateIValue(); |
575 | 98.5k | } |
576 | | |
577 | | void |
578 | | NNTreeIterator::addPathElement(QPDFObjectHandle const& a_node, int kid_number) |
579 | 47.4k | { |
580 | 47.4k | path.emplace_back(a_node, kid_number); |
581 | 47.4k | } |
582 | | |
583 | | bool |
584 | | NNTreeIterator::deepen(QPDFObjectHandle a_node, bool first, bool allow_empty) |
585 | 55.1k | { |
586 | | // Starting at this node, descend through the first or last kid until we reach a node with |
587 | | // items. If we succeed, return true; otherwise return false and leave path alone. |
588 | | |
589 | 55.1k | auto opath = path; |
590 | 55.1k | bool failed = false; |
591 | | |
592 | 55.1k | QPDFObjGen::set seen; |
593 | 55.1k | for (auto const& i: path) { |
594 | 5.13k | seen.add(i.node); |
595 | 5.13k | } |
596 | 83.1k | while (!failed) { |
597 | 82.9k | if (!seen.add(a_node)) { |
598 | 422 | QTC::TC("qpdf", "NNTree deepen: loop"); |
599 | 422 | warn(impl.qpdf, a_node, "loop detected while traversing name/number tree"); |
600 | 422 | failed = true; |
601 | 422 | break; |
602 | 422 | } |
603 | | |
604 | 82.5k | if (!a_node.isDictionary()) { |
605 | 1.23k | QTC::TC("qpdf", "NNTree node is not a dictionary"); |
606 | 1.23k | warn(impl.qpdf, a_node, "non-dictionary node while traversing name/number tree"); |
607 | 1.23k | failed = true; |
608 | 1.23k | break; |
609 | 1.23k | } |
610 | | |
611 | 81.2k | auto kids = a_node.getKey("/Kids"); |
612 | 81.2k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
613 | 81.2k | auto items = a_node.getKey(impl.details.itemsKey()); |
614 | 81.2k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
615 | 81.2k | if (nitems > 0) { |
616 | 48.5k | setItemNumber(a_node, first ? 0 : nitems - 2); |
617 | 48.5k | break; |
618 | 48.5k | } else if (nkids > 0) { |
619 | 27.9k | int kid_number = first ? 0 : nkids - 1; |
620 | 27.9k | addPathElement(a_node, kid_number); |
621 | 27.9k | auto next = kids.getArrayItem(kid_number); |
622 | 27.9k | if (!next.isIndirect()) { |
623 | 266 | if (impl.auto_repair) { |
624 | 266 | QTC::TC("qpdf", "NNTree fix indirect kid"); |
625 | 266 | warn( |
626 | 266 | impl.qpdf, |
627 | 266 | a_node, |
628 | 266 | ("converting kid number " + std::to_string(kid_number) + |
629 | 266 | " to an indirect object")); |
630 | 266 | next = impl.qpdf.makeIndirectObject(next); |
631 | 266 | kids.setArrayItem(kid_number, next); |
632 | 266 | } else { |
633 | 0 | QTC::TC("qpdf", "NNTree warn indirect kid"); |
634 | 0 | warn( |
635 | 0 | impl.qpdf, |
636 | 0 | a_node, |
637 | 0 | ("kid number " + std::to_string(kid_number) + |
638 | 0 | " is not an indirect object")); |
639 | 0 | } |
640 | 266 | } |
641 | 27.9k | a_node = next; |
642 | 27.9k | } else if (allow_empty && items.isArray()) { |
643 | 3.54k | QTC::TC("qpdf", "NNTree deepen found empty"); |
644 | 3.54k | setItemNumber(a_node, -1); |
645 | 3.54k | break; |
646 | 3.54k | } else { |
647 | 1.18k | QTC::TC("qpdf", "NNTree deepen: invalid node"); |
648 | 1.18k | warn( |
649 | 1.18k | impl.qpdf, |
650 | 1.18k | a_node, |
651 | 1.18k | ("name/number tree node has neither non-empty " + impl.details.itemsKey() + |
652 | 1.18k | " nor /Kids")); |
653 | 1.18k | failed = true; |
654 | 1.18k | break; |
655 | 1.18k | } |
656 | 81.2k | } |
657 | 55.1k | if (failed) { |
658 | 2.47k | path = opath; |
659 | 2.47k | return false; |
660 | 2.47k | } |
661 | 52.6k | return true; |
662 | 55.1k | } |
663 | | |
664 | | NNTreeImpl::NNTreeImpl( |
665 | | NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) : |
666 | 3.42k | details(details), |
667 | 3.42k | qpdf(qpdf), |
668 | 3.42k | oh(oh), |
669 | 3.42k | auto_repair(auto_repair) |
670 | 3.42k | { |
671 | 3.42k | } |
672 | | |
673 | | void |
674 | | NNTreeImpl::setSplitThreshold(int threshold) |
675 | 0 | { |
676 | 0 | split_threshold = threshold; |
677 | 0 | } |
678 | | |
679 | | NNTreeImpl::iterator |
680 | | NNTreeImpl::begin() |
681 | 52.3k | { |
682 | 52.3k | iterator result(*this); |
683 | 52.3k | result.deepen(oh, true, true); |
684 | 52.3k | return result; |
685 | 52.3k | } |
686 | | |
687 | | NNTreeImpl::iterator |
688 | | NNTreeImpl::end() |
689 | 105k | { |
690 | 105k | return {*this}; |
691 | 105k | } |
692 | | |
693 | | NNTreeImpl::iterator |
694 | | NNTreeImpl::last() |
695 | 0 | { |
696 | 0 | iterator result(*this); |
697 | 0 | result.deepen(oh, false, true); |
698 | 0 | return result; |
699 | 0 | } |
700 | | |
701 | | int |
702 | | NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) |
703 | 36.6k | { |
704 | 36.6k | int result = 0; |
705 | 36.6k | auto limits = node.getKey("/Limits"); |
706 | 36.6k | if (limits.isArray() && (limits.getArrayNItems() >= 2) && |
707 | 36.6k | details.keyValid(limits.getArrayItem(0)) && details.keyValid(limits.getArrayItem(1))) { |
708 | 36.0k | if (details.compareKeys(key, limits.getArrayItem(0)) < 0) { |
709 | 13.4k | result = -1; |
710 | 22.5k | } else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) { |
711 | 6.42k | result = 1; |
712 | 6.42k | } |
713 | 36.0k | } else { |
714 | 662 | QTC::TC("qpdf", "NNTree missing limits"); |
715 | 662 | error(qpdf, node, "node is missing /Limits"); |
716 | 662 | } |
717 | 36.6k | return result; |
718 | 36.6k | } |
719 | | |
720 | | int |
721 | | NNTreeImpl::binarySearch( |
722 | | QPDFObjectHandle key, |
723 | | QPDFObjectHandle items, |
724 | | int num_items, |
725 | | bool return_prev_if_not_found, |
726 | | int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item)) |
727 | 63.5k | { |
728 | 63.5k | int max_idx = 1; |
729 | 276k | while (max_idx < num_items) { |
730 | 213k | max_idx <<= 1; |
731 | 213k | } |
732 | | |
733 | 63.5k | int step = max_idx / 2; |
734 | 63.5k | int checks = max_idx; |
735 | 63.5k | int idx = step; |
736 | 63.5k | int found_idx = -1; |
737 | 63.5k | bool found = false; |
738 | 63.5k | bool found_leq = false; |
739 | 63.5k | int status = 0; |
740 | | |
741 | 305k | while ((!found) && (checks > 0)) { |
742 | 241k | if (idx < num_items) { |
743 | 218k | status = (this->*compare)(key, items, idx); |
744 | 218k | if (status >= 0) { |
745 | 120k | found_leq = true; |
746 | 120k | found_idx = idx; |
747 | 120k | } |
748 | 218k | } else { |
749 | | // consider item to be below anything after the top |
750 | 23.1k | status = -1; |
751 | 23.1k | } |
752 | | |
753 | 241k | if (status == 0) { |
754 | 26.6k | found = true; |
755 | 215k | } else { |
756 | 215k | checks >>= 1; |
757 | 215k | if (checks > 0) { |
758 | 178k | step >>= 1; |
759 | 178k | if (step == 0) { |
760 | 39.7k | step = 1; |
761 | 39.7k | } |
762 | | |
763 | 178k | if (status < 0) { |
764 | 101k | idx -= step; |
765 | 101k | } else { |
766 | 76.4k | idx += step; |
767 | 76.4k | } |
768 | 178k | } |
769 | 215k | } |
770 | 241k | } |
771 | | |
772 | 63.5k | if (found || (found_leq && return_prev_if_not_found)) { |
773 | 60.7k | return found_idx; |
774 | 60.7k | } else { |
775 | 2.77k | return -1; |
776 | 2.77k | } |
777 | 63.5k | } |
778 | | |
779 | | int |
780 | | NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) |
781 | 181k | { |
782 | 181k | if (!((items.isArray() && (items.getArrayNItems() > (2 * idx)) && |
783 | 181k | details.keyValid(items.getArrayItem(2 * idx))))) { |
784 | 1.02k | QTC::TC("qpdf", "NNTree item is wrong type"); |
785 | 1.02k | error(qpdf, oh, ("item at index " + std::to_string(2 * idx) + " is not the right type")); |
786 | 1.02k | } |
787 | 181k | return details.compareKeys(key, items.getArrayItem(2 * idx)); |
788 | 181k | } |
789 | | |
790 | | int |
791 | | NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) |
792 | 37.3k | { |
793 | 37.3k | if (!(kids.isArray() && (idx < kids.getArrayNItems()) && |
794 | 37.3k | kids.getArrayItem(idx).isDictionary())) { |
795 | 593 | QTC::TC("qpdf", "NNTree kid is invalid"); |
796 | 593 | error(qpdf, oh, "invalid kid at index " + std::to_string(idx)); |
797 | 593 | } |
798 | 37.3k | return withinLimits(key, kids.getArrayItem(idx)); |
799 | 37.3k | } |
800 | | |
801 | | void |
802 | | NNTreeImpl::repair() |
803 | 1.84k | { |
804 | 1.84k | auto new_node = QPDFObjectHandle::newDictionary(); |
805 | 1.84k | new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray()); |
806 | 1.84k | NNTreeImpl repl(details, qpdf, new_node, false); |
807 | 41.1k | for (auto const& i: *this) { |
808 | 41.1k | repl.insert(i.first, i.second); |
809 | 41.1k | } |
810 | 1.84k | oh.replaceKey("/Kids", new_node.getKey("/Kids")); |
811 | 1.84k | oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey())); |
812 | 1.84k | } |
813 | | |
814 | | NNTreeImpl::iterator |
815 | | NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) |
816 | 47.8k | { |
817 | 47.8k | try { |
818 | 47.8k | return findInternal(key, return_prev_if_not_found); |
819 | 47.8k | } catch (QPDFExc& e) { |
820 | 2.98k | if (auto_repair) { |
821 | 2.65k | QTC::TC("qpdf", "NNTree repair"); |
822 | 2.65k | warn(qpdf, oh, std::string("attempting to repair after error: ") + e.what()); |
823 | 2.65k | repair(); |
824 | 2.65k | return findInternal(key, return_prev_if_not_found); |
825 | 2.65k | } else { |
826 | 337 | throw; |
827 | 337 | } |
828 | 2.98k | } |
829 | 47.8k | } |
830 | | |
831 | | NNTreeImpl::iterator |
832 | | NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) |
833 | 48.5k | { |
834 | 48.5k | auto first_item = begin(); |
835 | 48.5k | auto last_item = end(); |
836 | 48.5k | if (first_item == end()) { |
837 | | // Empty |
838 | 3.42k | return end(); |
839 | 45.0k | } else if ( |
840 | 45.0k | first_item.valid() && details.keyValid(first_item->first) && |
841 | 45.0k | details.compareKeys(key, first_item->first) < 0) { |
842 | | // Before the first key |
843 | 335 | return end(); |
844 | 44.7k | } else if ( |
845 | 44.7k | last_item.valid() && details.keyValid(last_item->first) && |
846 | 44.7k | details.compareKeys(key, last_item->first) > 0) { |
847 | | // After the last key |
848 | 0 | if (return_prev_if_not_found) { |
849 | 0 | return last_item; |
850 | 0 | } else { |
851 | 0 | return end(); |
852 | 0 | } |
853 | 0 | } |
854 | | |
855 | 44.7k | QPDFObjGen::set seen; |
856 | 44.7k | auto node = oh; |
857 | 44.7k | iterator result(*this); |
858 | | |
859 | 65.7k | while (true) { |
860 | 63.5k | if (!seen.add(node)) { |
861 | 34 | QTC::TC("qpdf", "NNTree loop in find"); |
862 | 34 | error(qpdf, node, "loop detected in find"); |
863 | 34 | } |
864 | | |
865 | 63.5k | auto kids = node.getKey("/Kids"); |
866 | 63.5k | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
867 | 63.5k | auto items = node.getKey(details.itemsKey()); |
868 | 63.5k | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
869 | 63.5k | if (nitems > 0) { |
870 | 42.6k | int idx = binarySearch( |
871 | 42.6k | key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem); |
872 | 42.6k | if (idx >= 0) { |
873 | 41.2k | result.setItemNumber(node, 2 * idx); |
874 | 41.2k | } |
875 | 42.6k | break; |
876 | 42.6k | } else if (nkids > 0) { |
877 | 20.8k | int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid); |
878 | 20.8k | if (idx == -1) { |
879 | 92 | QTC::TC("qpdf", "NNTree -1 in binary search"); |
880 | 92 | error( |
881 | 92 | qpdf, |
882 | 92 | node, |
883 | 92 | "unexpected -1 from binary search of kids;" |
884 | 92 | " limits may by wrong"); |
885 | 92 | } |
886 | 20.8k | result.addPathElement(node, idx); |
887 | 20.8k | node = kids.getArrayItem(idx); |
888 | 20.8k | } else { |
889 | 80 | QTC::TC("qpdf", "NNTree bad node during find"); |
890 | 80 | error(qpdf, node, "bad node during find"); |
891 | 80 | } |
892 | 63.5k | } |
893 | | |
894 | 44.7k | return result; |
895 | 48.5k | } |
896 | | |
897 | | NNTreeImpl::iterator |
898 | | NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value) |
899 | 2.03k | { |
900 | 2.03k | auto iter = begin(); |
901 | 2.03k | QPDFObjectHandle items; |
902 | 2.03k | if (iter.node.isDictionary()) { |
903 | 2.03k | items = iter.node.getKey(details.itemsKey()); |
904 | 2.03k | } |
905 | 2.03k | if (!(items.isArray())) { |
906 | 0 | QTC::TC("qpdf", "NNTree no valid items node in insertFirst"); |
907 | 0 | error(qpdf, oh, "unable to find a valid items node"); |
908 | 0 | } |
909 | 2.03k | items.insertItem(0, key); |
910 | 2.03k | items.insertItem(1, value); |
911 | 2.03k | iter.setItemNumber(iter.node, 0); |
912 | 2.03k | iter.resetLimits(iter.node, iter.lastPathElement()); |
913 | 2.03k | iter.split(iter.node, iter.lastPathElement()); |
914 | 2.03k | return iter; |
915 | 2.03k | } |
916 | | |
917 | | NNTreeImpl::iterator |
918 | | NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value) |
919 | 41.1k | { |
920 | 41.1k | auto iter = find(key, true); |
921 | 41.1k | if (!iter.valid()) { |
922 | 2.03k | QTC::TC("qpdf", "NNTree insert inserts first"); |
923 | 2.03k | return insertFirst(key, value); |
924 | 39.1k | } else if (details.compareKeys(key, iter->first) == 0) { |
925 | 9.80k | QTC::TC("qpdf", "NNTree insert replaces"); |
926 | 9.80k | auto items = iter.node.getKey(details.itemsKey()); |
927 | 9.80k | items.setArrayItem(iter.item_number + 1, value); |
928 | 9.80k | iter.updateIValue(); |
929 | 29.3k | } else { |
930 | 29.3k | QTC::TC("qpdf", "NNTree insert inserts after"); |
931 | 29.3k | iter.insertAfter(key, value); |
932 | 29.3k | } |
933 | 39.1k | return iter; |
934 | 41.1k | } |
935 | | |
936 | | bool |
937 | | NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value) |
938 | 0 | { |
939 | 0 | auto iter = find(key, false); |
940 | 0 | if (!iter.valid()) { |
941 | 0 | QTC::TC("qpdf", "NNTree remove not found"); |
942 | 0 | return false; |
943 | 0 | } |
944 | 0 | if (value) { |
945 | 0 | *value = iter->second; |
946 | 0 | } |
947 | 0 | iter.remove(); |
948 | 0 | return true; |
949 | 0 | } |