/src/qpdf/libqpdf/NNTree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/NNTree.hh> |
2 | | |
3 | | #include <qpdf/QTC.hh> |
4 | | #include <qpdf/QUtil.hh> |
5 | | |
6 | | #include <exception> |
7 | | |
8 | | static std::string |
9 | | get_description(QPDFObjectHandle& node) |
10 | 0 | { |
11 | 0 | std::string result("Name/Number tree node"); |
12 | 0 | if (node.isIndirect()) { |
13 | 0 | result += " (object " + std::to_string(node.getObjectID()) + ")"; |
14 | 0 | } |
15 | 0 | return result; |
16 | 0 | } |
17 | | |
18 | | static void |
19 | | warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
20 | 0 | { |
21 | 0 | qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); |
22 | 0 | } |
23 | | |
24 | | static void |
25 | | error(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg) |
26 | 0 | { |
27 | 0 | throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg); |
28 | 0 | } |
29 | | |
30 | | NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : |
31 | 0 | impl(impl) |
32 | 0 | { |
33 | 0 | } |
34 | | |
35 | | void |
36 | | NNTreeIterator::updateIValue(bool allow_invalid) |
37 | 0 | { |
38 | | // ivalue should never be used inside the class since we return a pointer/reference to it. Every |
39 | | // bit of code that ever changes what object the iterator points to should take care to call |
40 | | // updateIValue. Failure to do this means that any old references to *iter will point to |
41 | | // incorrect objects, though the next dereference of the iterator will fix it. This isn't |
42 | | // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise |
43 | | // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to |
44 | | // verify by reasoning about the code that this constraint is always satisfied. Whenever we |
45 | | // update what the iterator points to, we should call setItemNumber, which calls this. If we |
46 | | // change what the iterator points to in some other way, such as replacing a value or removing |
47 | | // an item and making the iterator point at a different item in potentially the same position, |
48 | | // we must call updateIValue as well. These cases are handled, and for good measure, we also |
49 | | // call updateIValue in operator* and operator->. |
50 | |
|
51 | 0 | bool okay = false; |
52 | 0 | if ((item_number >= 0) && this->node.isDictionary()) { |
53 | 0 | auto items = this->node.getKey(impl.details.itemsKey()); |
54 | 0 | if (this->item_number + 1 < items.getArrayNItems()) { |
55 | 0 | okay = true; |
56 | 0 | this->ivalue.first = items.getArrayItem(this->item_number); |
57 | 0 | this->ivalue.second = items.getArrayItem(1 + this->item_number); |
58 | 0 | } else { |
59 | 0 | error(impl.qpdf, node, "update ivalue: items array is too short"); |
60 | 0 | } |
61 | 0 | } |
62 | 0 | if (!okay) { |
63 | 0 | if (!allow_invalid) { |
64 | 0 | throw std::logic_error( |
65 | 0 | "attempt made to dereference an invalid name/number tree iterator"); |
66 | 0 | } |
67 | 0 | this->ivalue.first = QPDFObjectHandle(); |
68 | 0 | this->ivalue.second = QPDFObjectHandle(); |
69 | 0 | } |
70 | 0 | } |
71 | | |
72 | | NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) : |
73 | 0 | node(node), |
74 | 0 | kid_number(kid_number) |
75 | 0 | { |
76 | 0 | } |
77 | | |
78 | | QPDFObjectHandle |
79 | | NNTreeIterator::getNextKid(PathElement& pe, bool backward) |
80 | 0 | { |
81 | 0 | QPDFObjectHandle result; |
82 | 0 | bool found = false; |
83 | 0 | while (!found) { |
84 | 0 | pe.kid_number += backward ? -1 : 1; |
85 | 0 | auto kids = pe.node.getKey("/Kids"); |
86 | 0 | if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) { |
87 | 0 | result = kids.getArrayItem(pe.kid_number); |
88 | 0 | if (result.isDictionary() && |
89 | 0 | (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) { |
90 | 0 | found = true; |
91 | 0 | } else { |
92 | 0 | QTC::TC("qpdf", "NNTree skip invalid kid"); |
93 | 0 | warn( |
94 | 0 | impl.qpdf, |
95 | 0 | pe.node, |
96 | 0 | ("skipping over invalid kid at index " + std::to_string(pe.kid_number))); |
97 | 0 | } |
98 | 0 | } else { |
99 | 0 | result = QPDFObjectHandle::newNull(); |
100 | 0 | found = true; |
101 | 0 | } |
102 | 0 | } |
103 | 0 | return result; |
104 | 0 | } |
105 | | |
106 | | bool |
107 | | NNTreeIterator::valid() const |
108 | 0 | { |
109 | 0 | return this->item_number >= 0; |
110 | 0 | } |
111 | | |
112 | | void |
113 | | NNTreeIterator::increment(bool backward) |
114 | 0 | { |
115 | 0 | if (this->item_number < 0) { |
116 | 0 | QTC::TC("qpdf", "NNTree increment end()"); |
117 | 0 | deepen(impl.oh, !backward, true); |
118 | 0 | return; |
119 | 0 | } |
120 | 0 | bool found_valid_key = false; |
121 | 0 | while (valid() && (!found_valid_key)) { |
122 | 0 | this->item_number += backward ? -2 : 2; |
123 | 0 | auto items = this->node.getKey(impl.details.itemsKey()); |
124 | 0 | if ((this->item_number < 0) || (this->item_number >= items.getArrayNItems())) { |
125 | 0 | bool found = false; |
126 | 0 | setItemNumber(QPDFObjectHandle(), -1); |
127 | 0 | while (!(found || this->path.empty())) { |
128 | 0 | auto& element = this->path.back(); |
129 | 0 | auto pe_node = getNextKid(element, backward); |
130 | 0 | if (pe_node.isNull()) { |
131 | 0 | this->path.pop_back(); |
132 | 0 | } else { |
133 | 0 | found = deepen(pe_node, !backward, false); |
134 | 0 | } |
135 | 0 | } |
136 | 0 | } |
137 | 0 | if (this->item_number >= 0) { |
138 | 0 | items = this->node.getKey(impl.details.itemsKey()); |
139 | 0 | if (this->item_number + 1 >= items.getArrayNItems()) { |
140 | 0 | QTC::TC("qpdf", "NNTree skip item at end of short items"); |
141 | 0 | warn(impl.qpdf, this->node, "items array doesn't have enough elements"); |
142 | 0 | } else if (!impl.details.keyValid(items.getArrayItem(this->item_number))) { |
143 | 0 | QTC::TC("qpdf", "NNTree skip invalid key"); |
144 | 0 | warn( |
145 | 0 | impl.qpdf, |
146 | 0 | this->node, |
147 | 0 | ("item " + std::to_string(this->item_number) + " has the wrong type")); |
148 | 0 | } else { |
149 | 0 | found_valid_key = true; |
150 | 0 | } |
151 | 0 | } |
152 | 0 | } |
153 | 0 | } |
154 | | |
155 | | void |
156 | | NNTreeIterator::resetLimits(QPDFObjectHandle node, std::list<PathElement>::iterator parent) |
157 | 0 | { |
158 | 0 | bool done = false; |
159 | 0 | while (!done) { |
160 | 0 | if (parent == this->path.end()) { |
161 | 0 | QTC::TC("qpdf", "NNTree remove limits from root"); |
162 | 0 | node.removeKey("/Limits"); |
163 | 0 | done = true; |
164 | 0 | break; |
165 | 0 | } |
166 | 0 | auto kids = node.getKey("/Kids"); |
167 | 0 | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
168 | 0 | auto items = node.getKey(impl.details.itemsKey()); |
169 | 0 | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
170 | |
|
171 | 0 | bool changed = true; |
172 | 0 | QPDFObjectHandle first; |
173 | 0 | QPDFObjectHandle last; |
174 | 0 | if (nitems >= 2) { |
175 | 0 | first = items.getArrayItem(0); |
176 | 0 | last = items.getArrayItem((nitems - 1) & ~1); |
177 | 0 | } else if (nkids > 0) { |
178 | 0 | auto first_kid = kids.getArrayItem(0); |
179 | 0 | auto last_kid = kids.getArrayItem(nkids - 1); |
180 | 0 | if (first_kid.isDictionary() && last_kid.isDictionary()) { |
181 | 0 | auto first_limits = first_kid.getKey("/Limits"); |
182 | 0 | auto last_limits = last_kid.getKey("/Limits"); |
183 | 0 | if (first_limits.isArray() && (first_limits.getArrayNItems() >= 2) && |
184 | 0 | last_limits.isArray() && (last_limits.getArrayNItems() >= 2)) { |
185 | 0 | first = first_limits.getArrayItem(0); |
186 | 0 | last = last_limits.getArrayItem(1); |
187 | 0 | } |
188 | 0 | } |
189 | 0 | } |
190 | 0 | if (first && last) { |
191 | 0 | auto limits = QPDFObjectHandle::newArray(); |
192 | 0 | limits.appendItem(first); |
193 | 0 | limits.appendItem(last); |
194 | 0 | auto olimits = node.getKey("/Limits"); |
195 | 0 | if (olimits.isArray() && (olimits.getArrayNItems() == 2)) { |
196 | 0 | auto ofirst = olimits.getArrayItem(0); |
197 | 0 | auto olast = olimits.getArrayItem(1); |
198 | 0 | if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) && |
199 | 0 | (impl.details.compareKeys(first, ofirst) == 0) && |
200 | 0 | (impl.details.compareKeys(last, olast) == 0)) { |
201 | 0 | QTC::TC("qpdf", "NNTree limits didn't change"); |
202 | 0 | changed = false; |
203 | 0 | } |
204 | 0 | } |
205 | 0 | if (changed && !node.isSameObjectAs(path.begin()->node)) { |
206 | 0 | node.replaceKey("/Limits", limits); |
207 | 0 | } |
208 | 0 | } else { |
209 | 0 | QTC::TC("qpdf", "NNTree unable to determine limits"); |
210 | 0 | warn(impl.qpdf, node, "unable to determine limits"); |
211 | 0 | } |
212 | |
|
213 | 0 | if ((!changed) || (parent == this->path.begin())) { |
214 | 0 | done = true; |
215 | 0 | } else { |
216 | 0 | node = parent->node; |
217 | 0 | --parent; |
218 | 0 | } |
219 | 0 | } |
220 | 0 | } |
221 | | |
222 | | void |
223 | | NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent) |
224 | 0 | { |
225 | | // Split some node along the path to the item pointed to by this iterator, and adjust the |
226 | | // iterator so it points to the same item. |
227 | | |
228 | | // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine |
229 | | // this tree: |
230 | | // |
231 | | // root: << /Kids [ A B C D ] >> |
232 | | // A: << /Nums [ 1 2 3 4 ] >> |
233 | | // B: << /Nums [ 5 6 7 8 ] >> |
234 | | // C: << /Nums [ 9 10 11 12 ] >> |
235 | | // D: << /Kids [ E F ] |
236 | | // E: << /Nums [ 13 14 15 16 ] >> |
237 | | // F: << /Nums [ 17 18 19 20 ] >> |
238 | | |
239 | | // iter1 (points to 19) |
240 | | // path: |
241 | | // - { node: root: kid_number: 3 } |
242 | | // - { node: D, kid_number: 1 } |
243 | | // node: F |
244 | | // item_number: 2 |
245 | | |
246 | | // iter2 (points to 1) |
247 | | // path: |
248 | | // - { node: root, kid_number: 0} |
249 | | // node: A |
250 | | // item_number: 0 |
251 | |
|
252 | 0 | if (!valid()) { |
253 | 0 | throw std::logic_error("NNTreeIterator::split called an invalid iterator"); |
254 | 0 | } |
255 | | |
256 | | // Find the array we actually need to split, which is either this node's kids or items. |
257 | 0 | auto kids = to_split.getKey("/Kids"); |
258 | 0 | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
259 | 0 | auto items = to_split.getKey(impl.details.itemsKey()); |
260 | 0 | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
261 | |
|
262 | 0 | QPDFObjectHandle first_half; |
263 | 0 | int n = 0; |
264 | 0 | std::string key; |
265 | 0 | int threshold = 0; |
266 | 0 | if (nkids > 0) { |
267 | 0 | QTC::TC("qpdf", "NNTree split kids"); |
268 | 0 | first_half = kids; |
269 | 0 | n = nkids; |
270 | 0 | threshold = impl.split_threshold; |
271 | 0 | key = "/Kids"; |
272 | 0 | } else if (nitems > 0) { |
273 | 0 | QTC::TC("qpdf", "NNTree split items"); |
274 | 0 | first_half = items; |
275 | 0 | n = nitems; |
276 | 0 | threshold = 2 * impl.split_threshold; |
277 | 0 | key = impl.details.itemsKey(); |
278 | 0 | } else { |
279 | 0 | throw std::logic_error("NNTreeIterator::split called on invalid node"); |
280 | 0 | } |
281 | | |
282 | 0 | if (n <= threshold) { |
283 | 0 | return; |
284 | 0 | } |
285 | | |
286 | 0 | bool is_root = (parent == this->path.end()); |
287 | 0 | bool is_leaf = (nitems > 0); |
288 | | |
289 | | // CURRENT STATE: tree is in original state; iterator is valid and unchanged. |
290 | |
|
291 | 0 | if (is_root) { |
292 | | // What we want to do is to create a new node for the second half of the items and put it in |
293 | | // the parent's /Kids array right after the element that points to the current to_split |
294 | | // node, but if we're splitting root, there is no parent, so handle that first. |
295 | | |
296 | | // In the non-root case, parent points to the path element whose /Kids contains the first |
297 | | // half node, and the first half node is to_split. If we are splitting the root, we need to |
298 | | // push everything down a level, but we want to keep the actual root object the same so that |
299 | | // indirect references to it remain intact (and also in case it might be a direct object, |
300 | | // which it shouldn't be but that case probably exists in the wild). To achieve this, we |
301 | | // create a new node for the first half and then replace /Kids in the root to contain it. |
302 | | // Then we adjust the path so that the first element is root and the second element, if any, |
303 | | // is the new first half. In this way, we make the root case identical to the non-root case |
304 | | // so remaining logic can handle them in the same way. |
305 | |
|
306 | 0 | auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
307 | 0 | first_node.replaceKey(key, first_half); |
308 | 0 | QPDFObjectHandle new_kids = QPDFObjectHandle::newArray(); |
309 | 0 | new_kids.appendItem(first_node); |
310 | 0 | to_split.removeKey("/Limits"); // already shouldn't be there for root |
311 | 0 | to_split.removeKey(impl.details.itemsKey()); |
312 | 0 | to_split.replaceKey("/Kids", new_kids); |
313 | 0 | if (is_leaf) { |
314 | 0 | QTC::TC("qpdf", "NNTree split root + leaf"); |
315 | 0 | this->node = first_node; |
316 | 0 | } else { |
317 | 0 | QTC::TC("qpdf", "NNTree split root + !leaf"); |
318 | 0 | auto next = this->path.begin(); |
319 | 0 | next->node = first_node; |
320 | 0 | } |
321 | 0 | this->path.emplace_front(to_split, 0); |
322 | 0 | parent = this->path.begin(); |
323 | 0 | to_split = first_node; |
324 | 0 | } |
325 | | |
326 | | // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that |
327 | | // parent[/Kids][kid_number] == to_split and (++parent).node == to_split. |
328 | | |
329 | | // Create a second half array, and transfer the second half of the items into the second half |
330 | | // array. |
331 | 0 | QPDFObjectHandle second_half = QPDFObjectHandle::newArray(); |
332 | 0 | int start_idx = ((n / 2) & ~1); |
333 | 0 | while (first_half.getArrayNItems() > start_idx) { |
334 | 0 | second_half.appendItem(first_half.getArrayItem(start_idx)); |
335 | 0 | first_half.eraseItem(start_idx); |
336 | 0 | } |
337 | 0 | resetLimits(to_split, parent); |
338 | | |
339 | | // Create a new node to contain the second half |
340 | 0 | QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
341 | 0 | second_node.replaceKey(key, second_half); |
342 | 0 | resetLimits(second_node, parent); |
343 | | |
344 | | // CURRENT STATE: half the items from the kids or items array in the node being split have been |
345 | | // moved into a new node. The new node is not yet attached to the tree. The iterator may have a |
346 | | // path element or leaf node that is out of bounds. |
347 | | |
348 | | // We need to adjust the parent to add the second node to /Kids and, if needed, update |
349 | | // kid_number to traverse through it. We need to update to_split's path element, or the node if |
350 | | // this is a leaf, so that the kid/item number points to the right place. |
351 | |
|
352 | 0 | auto parent_kids = parent->node.getKey("/Kids"); |
353 | 0 | parent_kids.insertItem(parent->kid_number + 1, second_node); |
354 | 0 | auto cur_elem = parent; |
355 | 0 | ++cur_elem; // points to end() for leaf nodes |
356 | 0 | int old_idx = (is_leaf ? this->item_number : cur_elem->kid_number); |
357 | 0 | if (old_idx >= start_idx) { |
358 | 0 | ++parent->kid_number; |
359 | 0 | if (is_leaf) { |
360 | 0 | QTC::TC("qpdf", "NNTree split second half item"); |
361 | 0 | setItemNumber(second_node, this->item_number - start_idx); |
362 | 0 | } else { |
363 | 0 | QTC::TC("qpdf", "NNTree split second half kid"); |
364 | 0 | cur_elem->node = second_node; |
365 | 0 | cur_elem->kid_number -= start_idx; |
366 | 0 | } |
367 | 0 | } |
368 | 0 | if (!is_root) { |
369 | 0 | QTC::TC("qpdf", "NNTree split parent"); |
370 | 0 | auto next = parent->node; |
371 | 0 | resetLimits(next, parent); |
372 | 0 | --parent; |
373 | 0 | split(next, parent); |
374 | 0 | } |
375 | 0 | } |
376 | | |
377 | | std::list<NNTreeIterator::PathElement>::iterator |
378 | | NNTreeIterator::lastPathElement() |
379 | 0 | { |
380 | 0 | auto result = this->path.end(); |
381 | 0 | if (!this->path.empty()) { |
382 | 0 | --result; |
383 | 0 | } |
384 | 0 | return result; |
385 | 0 | } |
386 | | |
387 | | void |
388 | | NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value) |
389 | 0 | { |
390 | 0 | if (!valid()) { |
391 | 0 | QTC::TC("qpdf", "NNTree insertAfter inserts first"); |
392 | 0 | impl.insertFirst(key, value); |
393 | 0 | deepen(impl.oh, true, false); |
394 | 0 | return; |
395 | 0 | } |
396 | | |
397 | 0 | auto items = this->node.getKey(impl.details.itemsKey()); |
398 | 0 | if (!items.isArray()) { |
399 | 0 | error(impl.qpdf, node, "node contains no items array"); |
400 | 0 | } |
401 | 0 | if (items.getArrayNItems() < this->item_number + 2) { |
402 | 0 | error(impl.qpdf, node, "insert: items array is too short"); |
403 | 0 | } |
404 | 0 | items.insertItem(this->item_number + 2, key); |
405 | 0 | items.insertItem(this->item_number + 3, value); |
406 | 0 | resetLimits(this->node, lastPathElement()); |
407 | 0 | split(this->node, lastPathElement()); |
408 | 0 | increment(false); |
409 | 0 | } |
410 | | |
411 | | void |
412 | | NNTreeIterator::remove() |
413 | 0 | { |
414 | | // Remove this item, leaving the tree valid and this iterator pointing to the next item. |
415 | |
|
416 | 0 | if (!valid()) { |
417 | 0 | throw std::logic_error("attempt made to remove an invalid iterator"); |
418 | 0 | } |
419 | 0 | auto items = this->node.getKey(impl.details.itemsKey()); |
420 | 0 | int nitems = items.getArrayNItems(); |
421 | 0 | if (this->item_number + 2 > nitems) { |
422 | 0 | error(impl.qpdf, this->node, "found short items array while removing an item"); |
423 | 0 | } |
424 | |
|
425 | 0 | items.eraseItem(this->item_number); |
426 | 0 | items.eraseItem(this->item_number); |
427 | 0 | nitems -= 2; |
428 | |
|
429 | 0 | if (nitems > 0) { |
430 | | // There are still items left |
431 | |
|
432 | 0 | if ((this->item_number == 0) || (this->item_number == nitems)) { |
433 | | // We removed either the first or last item of an items array that remains non-empty, so |
434 | | // we have to adjust limits. |
435 | 0 | QTC::TC("qpdf", "NNTree remove reset limits"); |
436 | 0 | resetLimits(this->node, lastPathElement()); |
437 | 0 | } |
438 | |
|
439 | 0 | if (this->item_number == nitems) { |
440 | | // We removed the last item of a non-empty items array, so advance to the successor of |
441 | | // the previous item. |
442 | 0 | QTC::TC("qpdf", "NNTree erased last item"); |
443 | 0 | this->item_number -= 2; |
444 | 0 | increment(false); |
445 | 0 | } else if (this->item_number < nitems) { |
446 | | // We don't have to do anything since the removed item's successor now occupies its |
447 | | // former location. |
448 | 0 | QTC::TC("qpdf", "NNTree erased non-last item"); |
449 | 0 | updateIValue(); |
450 | 0 | } else { |
451 | | // We already checked to ensure this condition would not happen. |
452 | 0 | throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase"); |
453 | 0 | } |
454 | 0 | return; |
455 | 0 | } |
456 | | |
457 | 0 | if (this->path.empty()) { |
458 | | // Special case: if this is the root node, we can leave it empty. |
459 | 0 | QTC::TC("qpdf", "NNTree erased all items on leaf/root"); |
460 | 0 | setItemNumber(impl.oh, -1); |
461 | 0 | return; |
462 | 0 | } |
463 | | |
464 | 0 | QTC::TC("qpdf", "NNTree items is empty after remove"); |
465 | | |
466 | | // We removed the last item from this items array, so we need to remove this node from the |
467 | | // parent on up the tree. Then we need to position ourselves at the removed item's successor. |
468 | 0 | bool done = false; |
469 | 0 | while (!done) { |
470 | 0 | auto element = lastPathElement(); |
471 | 0 | auto parent = element; |
472 | 0 | --parent; |
473 | 0 | auto kids = element->node.getKey("/Kids"); |
474 | 0 | kids.eraseItem(element->kid_number); |
475 | 0 | auto nkids = kids.getArrayNItems(); |
476 | 0 | if (nkids > 0) { |
477 | | // The logic here is similar to the items case. |
478 | 0 | if ((element->kid_number == 0) || (element->kid_number == nkids)) { |
479 | 0 | QTC::TC("qpdf", "NNTree erased first or last kid"); |
480 | 0 | resetLimits(element->node, parent); |
481 | 0 | } |
482 | 0 | if (element->kid_number == nkids) { |
483 | | // Move to the successor of the last child of the previous kid. |
484 | 0 | setItemNumber(QPDFObjectHandle(), -1); |
485 | 0 | --element->kid_number; |
486 | 0 | deepen(kids.getArrayItem(element->kid_number), false, true); |
487 | 0 | if (valid()) { |
488 | 0 | increment(false); |
489 | 0 | if (!valid()) { |
490 | 0 | QTC::TC("qpdf", "NNTree erased last item in tree"); |
491 | 0 | } else { |
492 | 0 | QTC::TC("qpdf", "NNTree erased last kid"); |
493 | 0 | } |
494 | 0 | } |
495 | 0 | } else { |
496 | | // Next kid is in deleted kid's position |
497 | 0 | QTC::TC("qpdf", "NNTree erased non-last kid"); |
498 | 0 | deepen(kids.getArrayItem(element->kid_number), true, true); |
499 | 0 | } |
500 | 0 | done = true; |
501 | 0 | } else if (parent == this->path.end()) { |
502 | | // We erased the very last item. Convert the root to an empty items array. |
503 | 0 | QTC::TC("qpdf", "NNTree non-flat tree is empty after remove"); |
504 | 0 | element->node.removeKey("/Kids"); |
505 | 0 | element->node.replaceKey(impl.details.itemsKey(), QPDFObjectHandle::newArray()); |
506 | 0 | this->path.clear(); |
507 | 0 | setItemNumber(impl.oh, -1); |
508 | 0 | done = true; |
509 | 0 | } else { |
510 | | // Walk up the tree and continue |
511 | 0 | QTC::TC("qpdf", "NNTree remove walking up tree"); |
512 | 0 | this->path.pop_back(); |
513 | 0 | } |
514 | 0 | } |
515 | 0 | } |
516 | | |
517 | | NNTreeIterator& |
518 | | NNTreeIterator::operator++() |
519 | 0 | { |
520 | 0 | increment(false); |
521 | 0 | return *this; |
522 | 0 | } |
523 | | |
524 | | NNTreeIterator& |
525 | | NNTreeIterator::operator--() |
526 | 0 | { |
527 | 0 | increment(true); |
528 | 0 | return *this; |
529 | 0 | } |
530 | | |
531 | | NNTreeIterator::reference |
532 | | NNTreeIterator::operator*() |
533 | 0 | { |
534 | 0 | updateIValue(false); |
535 | 0 | return this->ivalue; |
536 | 0 | } |
537 | | |
538 | | NNTreeIterator::pointer |
539 | | NNTreeIterator::operator->() |
540 | 0 | { |
541 | 0 | updateIValue(false); |
542 | 0 | return &(this->ivalue); |
543 | 0 | } |
544 | | |
545 | | bool |
546 | | NNTreeIterator::operator==(NNTreeIterator const& other) const |
547 | 0 | { |
548 | 0 | if ((this->item_number == -1) && (other.item_number == -1)) { |
549 | 0 | return true; |
550 | 0 | } |
551 | 0 | if (this->path.size() != other.path.size()) { |
552 | 0 | return false; |
553 | 0 | } |
554 | 0 | auto tpi = this->path.begin(); |
555 | 0 | auto opi = other.path.begin(); |
556 | 0 | while (tpi != this->path.end()) { |
557 | 0 | if (tpi->kid_number != opi->kid_number) { |
558 | 0 | return false; |
559 | 0 | } |
560 | 0 | ++tpi; |
561 | 0 | ++opi; |
562 | 0 | } |
563 | 0 | if (this->item_number != other.item_number) { |
564 | 0 | return false; |
565 | 0 | } |
566 | 0 | return true; |
567 | 0 | } |
568 | | |
569 | | void |
570 | | NNTreeIterator::setItemNumber(QPDFObjectHandle const& node, int n) |
571 | 0 | { |
572 | 0 | this->node = node; |
573 | 0 | this->item_number = n; |
574 | 0 | updateIValue(); |
575 | 0 | } |
576 | | |
577 | | void |
578 | | NNTreeIterator::addPathElement(QPDFObjectHandle const& node, int kid_number) |
579 | 0 | { |
580 | 0 | this->path.emplace_back(node, kid_number); |
581 | 0 | } |
582 | | |
583 | | bool |
584 | | NNTreeIterator::deepen(QPDFObjectHandle node, bool first, bool allow_empty) |
585 | 0 | { |
586 | | // Starting at this node, descend through the first or last kid until we reach a node with |
587 | | // items. If we succeed, return true; otherwise return false and leave path alone. |
588 | |
|
589 | 0 | auto opath = this->path; |
590 | 0 | bool failed = false; |
591 | |
|
592 | 0 | QPDFObjGen::set seen; |
593 | 0 | for (auto const& i: this->path) { |
594 | 0 | seen.add(i.node); |
595 | 0 | } |
596 | 0 | while (!failed) { |
597 | 0 | if (!seen.add(node)) { |
598 | 0 | QTC::TC("qpdf", "NNTree deepen: loop"); |
599 | 0 | warn(impl.qpdf, node, "loop detected while traversing name/number tree"); |
600 | 0 | failed = true; |
601 | 0 | break; |
602 | 0 | } |
603 | | |
604 | 0 | if (!node.isDictionary()) { |
605 | 0 | QTC::TC("qpdf", "NNTree node is not a dictionary"); |
606 | 0 | warn(impl.qpdf, node, "non-dictionary node while traversing name/number tree"); |
607 | 0 | failed = true; |
608 | 0 | break; |
609 | 0 | } |
610 | | |
611 | 0 | auto kids = node.getKey("/Kids"); |
612 | 0 | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
613 | 0 | auto items = node.getKey(impl.details.itemsKey()); |
614 | 0 | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
615 | 0 | if (nitems > 0) { |
616 | 0 | setItemNumber(node, first ? 0 : nitems - 2); |
617 | 0 | break; |
618 | 0 | } else if (nkids > 0) { |
619 | 0 | int kid_number = first ? 0 : nkids - 1; |
620 | 0 | addPathElement(node, kid_number); |
621 | 0 | auto next = kids.getArrayItem(kid_number); |
622 | 0 | if (!next.isIndirect()) { |
623 | 0 | if (impl.auto_repair) { |
624 | 0 | QTC::TC("qpdf", "NNTree fix indirect kid"); |
625 | 0 | warn( |
626 | 0 | impl.qpdf, |
627 | 0 | node, |
628 | 0 | ("converting kid number " + std::to_string(kid_number) + |
629 | 0 | " to an indirect object")); |
630 | 0 | next = impl.qpdf.makeIndirectObject(next); |
631 | 0 | kids.setArrayItem(kid_number, next); |
632 | 0 | } else { |
633 | 0 | QTC::TC("qpdf", "NNTree warn indirect kid"); |
634 | 0 | warn( |
635 | 0 | impl.qpdf, |
636 | 0 | node, |
637 | 0 | ("kid number " + std::to_string(kid_number) + |
638 | 0 | " is not an indirect object")); |
639 | 0 | } |
640 | 0 | } |
641 | 0 | node = next; |
642 | 0 | } else if (allow_empty && items.isArray()) { |
643 | 0 | QTC::TC("qpdf", "NNTree deepen found empty"); |
644 | 0 | setItemNumber(node, -1); |
645 | 0 | break; |
646 | 0 | } else { |
647 | 0 | QTC::TC("qpdf", "NNTree deepen: invalid node"); |
648 | 0 | warn( |
649 | 0 | impl.qpdf, |
650 | 0 | node, |
651 | 0 | ("name/number tree node has neither non-empty " + impl.details.itemsKey() + |
652 | 0 | " nor /Kids")); |
653 | 0 | failed = true; |
654 | 0 | break; |
655 | 0 | } |
656 | 0 | } |
657 | 0 | if (failed) { |
658 | 0 | this->path = opath; |
659 | 0 | return false; |
660 | 0 | } |
661 | 0 | return true; |
662 | 0 | } |
663 | | |
664 | | NNTreeImpl::NNTreeImpl( |
665 | | NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) : |
666 | 0 | details(details), |
667 | 0 | qpdf(qpdf), |
668 | 0 | oh(oh), |
669 | 0 | auto_repair(auto_repair) |
670 | 0 | { |
671 | 0 | } |
672 | | |
673 | | void |
674 | | NNTreeImpl::setSplitThreshold(int split_threshold) |
675 | 0 | { |
676 | 0 | this->split_threshold = split_threshold; |
677 | 0 | } |
678 | | |
679 | | NNTreeImpl::iterator |
680 | | NNTreeImpl::begin() |
681 | 0 | { |
682 | 0 | iterator result(*this); |
683 | 0 | result.deepen(this->oh, true, true); |
684 | 0 | return result; |
685 | 0 | } |
686 | | |
687 | | NNTreeImpl::iterator |
688 | | NNTreeImpl::end() |
689 | 0 | { |
690 | 0 | return {*this}; |
691 | 0 | } |
692 | | |
693 | | NNTreeImpl::iterator |
694 | | NNTreeImpl::last() |
695 | 0 | { |
696 | 0 | iterator result(*this); |
697 | 0 | result.deepen(this->oh, false, true); |
698 | 0 | return result; |
699 | 0 | } |
700 | | |
701 | | int |
702 | | NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node) |
703 | 0 | { |
704 | 0 | int result = 0; |
705 | 0 | auto limits = node.getKey("/Limits"); |
706 | 0 | if (limits.isArray() && (limits.getArrayNItems() >= 2) && |
707 | 0 | details.keyValid(limits.getArrayItem(0)) && details.keyValid(limits.getArrayItem(1))) { |
708 | 0 | if (details.compareKeys(key, limits.getArrayItem(0)) < 0) { |
709 | 0 | result = -1; |
710 | 0 | } else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) { |
711 | 0 | result = 1; |
712 | 0 | } |
713 | 0 | } else { |
714 | 0 | QTC::TC("qpdf", "NNTree missing limits"); |
715 | 0 | error(qpdf, node, "node is missing /Limits"); |
716 | 0 | } |
717 | 0 | return result; |
718 | 0 | } |
719 | | |
720 | | int |
721 | | NNTreeImpl::binarySearch( |
722 | | QPDFObjectHandle key, |
723 | | QPDFObjectHandle items, |
724 | | int num_items, |
725 | | bool return_prev_if_not_found, |
726 | | int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item)) |
727 | 0 | { |
728 | 0 | int max_idx = 1; |
729 | 0 | while (max_idx < num_items) { |
730 | 0 | max_idx <<= 1; |
731 | 0 | } |
732 | |
|
733 | 0 | int step = max_idx / 2; |
734 | 0 | int checks = max_idx; |
735 | 0 | int idx = step; |
736 | 0 | int found_idx = -1; |
737 | 0 | bool found = false; |
738 | 0 | bool found_leq = false; |
739 | 0 | int status = 0; |
740 | |
|
741 | 0 | while ((!found) && (checks > 0)) { |
742 | 0 | if (idx < num_items) { |
743 | 0 | status = (this->*compare)(key, items, idx); |
744 | 0 | if (status >= 0) { |
745 | 0 | found_leq = true; |
746 | 0 | found_idx = idx; |
747 | 0 | } |
748 | 0 | } else { |
749 | | // consider item to be below anything after the top |
750 | 0 | status = -1; |
751 | 0 | } |
752 | |
|
753 | 0 | if (status == 0) { |
754 | 0 | found = true; |
755 | 0 | } else { |
756 | 0 | checks >>= 1; |
757 | 0 | if (checks > 0) { |
758 | 0 | step >>= 1; |
759 | 0 | if (step == 0) { |
760 | 0 | step = 1; |
761 | 0 | } |
762 | |
|
763 | 0 | if (status < 0) { |
764 | 0 | idx -= step; |
765 | 0 | } else { |
766 | 0 | idx += step; |
767 | 0 | } |
768 | 0 | } |
769 | 0 | } |
770 | 0 | } |
771 | |
|
772 | 0 | if (found || (found_leq && return_prev_if_not_found)) { |
773 | 0 | return found_idx; |
774 | 0 | } else { |
775 | 0 | return -1; |
776 | 0 | } |
777 | 0 | } |
778 | | |
779 | | int |
780 | | NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) |
781 | 0 | { |
782 | 0 | if (!((items.isArray() && (items.getArrayNItems() > (2 * idx)) && |
783 | 0 | details.keyValid(items.getArrayItem(2 * idx))))) { |
784 | 0 | QTC::TC("qpdf", "NNTree item is wrong type"); |
785 | 0 | error( |
786 | 0 | qpdf, |
787 | 0 | this->oh, |
788 | 0 | ("item at index " + std::to_string(2 * idx) + " is not the right type")); |
789 | 0 | } |
790 | 0 | return details.compareKeys(key, items.getArrayItem(2 * idx)); |
791 | 0 | } |
792 | | |
793 | | int |
794 | | NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) |
795 | 0 | { |
796 | 0 | if (!(kids.isArray() && (idx < kids.getArrayNItems()) && |
797 | 0 | kids.getArrayItem(idx).isDictionary())) { |
798 | 0 | QTC::TC("qpdf", "NNTree kid is invalid"); |
799 | 0 | error(qpdf, this->oh, "invalid kid at index " + std::to_string(idx)); |
800 | 0 | } |
801 | 0 | return withinLimits(key, kids.getArrayItem(idx)); |
802 | 0 | } |
803 | | |
804 | | void |
805 | | NNTreeImpl::repair() |
806 | 0 | { |
807 | 0 | auto new_node = QPDFObjectHandle::newDictionary(); |
808 | 0 | new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray()); |
809 | 0 | NNTreeImpl repl(details, qpdf, new_node, false); |
810 | 0 | for (auto const& i: *this) { |
811 | 0 | repl.insert(i.first, i.second); |
812 | 0 | } |
813 | 0 | this->oh.replaceKey("/Kids", new_node.getKey("/Kids")); |
814 | 0 | this->oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey())); |
815 | 0 | } |
816 | | |
817 | | NNTreeImpl::iterator |
818 | | NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) |
819 | 0 | { |
820 | 0 | try { |
821 | 0 | return findInternal(key, return_prev_if_not_found); |
822 | 0 | } catch (QPDFExc& e) { |
823 | 0 | if (this->auto_repair) { |
824 | 0 | QTC::TC("qpdf", "NNTree repair"); |
825 | 0 | warn(qpdf, this->oh, std::string("attempting to repair after error: ") + e.what()); |
826 | 0 | repair(); |
827 | 0 | return findInternal(key, return_prev_if_not_found); |
828 | 0 | } else { |
829 | 0 | throw; |
830 | 0 | } |
831 | 0 | } |
832 | 0 | } |
833 | | |
834 | | NNTreeImpl::iterator |
835 | | NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found) |
836 | 0 | { |
837 | 0 | auto first_item = begin(); |
838 | 0 | auto last_item = end(); |
839 | 0 | if (first_item == end()) { |
840 | | // Empty |
841 | 0 | return end(); |
842 | 0 | } else if ( |
843 | 0 | first_item.valid() && details.keyValid(first_item->first) && |
844 | 0 | details.compareKeys(key, first_item->first) < 0) { |
845 | | // Before the first key |
846 | 0 | return end(); |
847 | 0 | } else if ( |
848 | 0 | last_item.valid() && details.keyValid(last_item->first) && |
849 | 0 | details.compareKeys(key, last_item->first) > 0) { |
850 | | // After the last key |
851 | 0 | if (return_prev_if_not_found) { |
852 | 0 | return last_item; |
853 | 0 | } else { |
854 | 0 | return end(); |
855 | 0 | } |
856 | 0 | } |
857 | | |
858 | 0 | QPDFObjGen::set seen; |
859 | 0 | auto node = this->oh; |
860 | 0 | iterator result(*this); |
861 | |
|
862 | 0 | while (true) { |
863 | 0 | if (!seen.add(node)) { |
864 | 0 | QTC::TC("qpdf", "NNTree loop in find"); |
865 | 0 | error(qpdf, node, "loop detected in find"); |
866 | 0 | } |
867 | |
|
868 | 0 | auto kids = node.getKey("/Kids"); |
869 | 0 | int nkids = kids.isArray() ? kids.getArrayNItems() : 0; |
870 | 0 | auto items = node.getKey(details.itemsKey()); |
871 | 0 | int nitems = items.isArray() ? items.getArrayNItems() : 0; |
872 | 0 | if (nitems > 0) { |
873 | 0 | int idx = binarySearch( |
874 | 0 | key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem); |
875 | 0 | if (idx >= 0) { |
876 | 0 | result.setItemNumber(node, 2 * idx); |
877 | 0 | } |
878 | 0 | break; |
879 | 0 | } else if (nkids > 0) { |
880 | 0 | int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid); |
881 | 0 | if (idx == -1) { |
882 | 0 | QTC::TC("qpdf", "NNTree -1 in binary search"); |
883 | 0 | error( |
884 | 0 | qpdf, |
885 | 0 | node, |
886 | 0 | "unexpected -1 from binary search of kids;" |
887 | 0 | " limits may by wrong"); |
888 | 0 | } |
889 | 0 | result.addPathElement(node, idx); |
890 | 0 | node = kids.getArrayItem(idx); |
891 | 0 | } else { |
892 | 0 | QTC::TC("qpdf", "NNTree bad node during find"); |
893 | 0 | error(qpdf, node, "bad node during find"); |
894 | 0 | } |
895 | 0 | } |
896 | |
|
897 | 0 | return result; |
898 | 0 | } |
899 | | |
900 | | NNTreeImpl::iterator |
901 | | NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value) |
902 | 0 | { |
903 | 0 | auto iter = begin(); |
904 | 0 | QPDFObjectHandle items; |
905 | 0 | if (iter.node.isDictionary()) { |
906 | 0 | items = iter.node.getKey(details.itemsKey()); |
907 | 0 | } |
908 | 0 | if (!(items.isArray())) { |
909 | 0 | QTC::TC("qpdf", "NNTree no valid items node in insertFirst"); |
910 | 0 | error(qpdf, this->oh, "unable to find a valid items node"); |
911 | 0 | } |
912 | 0 | items.insertItem(0, key); |
913 | 0 | items.insertItem(1, value); |
914 | 0 | iter.setItemNumber(iter.node, 0); |
915 | 0 | iter.resetLimits(iter.node, iter.lastPathElement()); |
916 | 0 | iter.split(iter.node, iter.lastPathElement()); |
917 | 0 | return iter; |
918 | 0 | } |
919 | | |
920 | | NNTreeImpl::iterator |
921 | | NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value) |
922 | 0 | { |
923 | 0 | auto iter = find(key, true); |
924 | 0 | if (!iter.valid()) { |
925 | 0 | QTC::TC("qpdf", "NNTree insert inserts first"); |
926 | 0 | return insertFirst(key, value); |
927 | 0 | } else if (details.compareKeys(key, iter->first) == 0) { |
928 | 0 | QTC::TC("qpdf", "NNTree insert replaces"); |
929 | 0 | auto items = iter.node.getKey(details.itemsKey()); |
930 | 0 | items.setArrayItem(iter.item_number + 1, value); |
931 | 0 | iter.updateIValue(); |
932 | 0 | } else { |
933 | 0 | QTC::TC("qpdf", "NNTree insert inserts after"); |
934 | 0 | iter.insertAfter(key, value); |
935 | 0 | } |
936 | 0 | return iter; |
937 | 0 | } |
938 | | |
939 | | bool |
940 | | NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value) |
941 | 0 | { |
942 | 0 | auto iter = find(key, false); |
943 | 0 | if (!iter.valid()) { |
944 | 0 | QTC::TC("qpdf", "NNTree remove not found"); |
945 | 0 | return false; |
946 | 0 | } |
947 | 0 | if (value) { |
948 | 0 | *value = iter->second; |
949 | 0 | } |
950 | 0 | iter.remove(); |
951 | 0 | return true; |
952 | 0 | } |