/src/qpdf/libqpdf/NNTree.cc
Line | Count | Source (jump to first uncovered line) |
1 | | #include <qpdf/assert_debug.h> |
2 | | |
3 | | #include <qpdf/NNTree.hh> |
4 | | |
5 | | #include <qpdf/QPDFObjectHandle_private.hh> |
6 | | #include <qpdf/QPDF_private.hh> |
7 | | #include <qpdf/QTC.hh> |
8 | | #include <qpdf/QUtil.hh> |
9 | | |
10 | | #include <bit> |
11 | | #include <exception> |
12 | | #include <utility> |
13 | | |
14 | | using namespace qpdf; |
15 | | |
16 | | static std::string |
17 | | get_description(QPDFObjectHandle const& node) |
18 | 10.1k | { |
19 | 10.1k | std::string result("Name/Number tree node"); |
20 | 10.1k | if (node.indirect()) { |
21 | 8.28k | result += " (object " + std::to_string(node.getObjectID()) + ")"; |
22 | 8.28k | } |
23 | 10.1k | return result; |
24 | 10.1k | } |
25 | | |
26 | | void |
27 | | NNTreeImpl::warn(QPDFObjectHandle const& node, std::string const& msg) |
28 | 6.51k | { |
29 | 6.51k | qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg); |
30 | 6.51k | if (++error_count > 5 && qpdf.reconstructed_xref()) { |
31 | 2.11k | error(node, "too many errors - giving up"); |
32 | 2.11k | } |
33 | 6.51k | } |
34 | | |
35 | | void |
36 | | NNTreeImpl::error(QPDFObjectHandle const& node, std::string const& msg) |
37 | 3.60k | { |
38 | 3.60k | throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg); |
39 | 3.60k | } |
40 | | |
41 | | NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) : |
42 | 177k | impl(impl) |
43 | 177k | { |
44 | 177k | } |
45 | | |
46 | | void |
47 | | NNTreeIterator::updateIValue(bool allow_invalid) |
48 | 277k | { |
49 | | // ivalue should never be used inside the class since we return a pointer/reference to it. Every |
50 | | // bit of code that ever changes what object the iterator points to should take care to call |
51 | | // updateIValue. Failure to do this means that any old references to *iter will point to |
52 | | // incorrect objects, though the next dereference of the iterator will fix it. This isn't |
53 | | // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise |
54 | | // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to |
55 | | // verify by reasoning about the code that this constraint is always satisfied. Whenever we |
56 | | // update what the iterator points to, we should call setItemNumber, which calls this. If we |
57 | | // change what the iterator points to in some other way, such as replacing a value or removing |
58 | | // an item and making the iterator point at a different item in potentially the same position, |
59 | | // we must call updateIValue as well. These cases are handled, and for good measure, we also |
60 | | // call updateIValue in operator* and operator->. |
61 | | |
62 | 277k | if (item_number < 0 || !node.isDictionary()) { |
63 | 2.24k | if (!allow_invalid) { |
64 | 0 | throw std::logic_error( |
65 | 0 | "attempt made to dereference an invalid name/number tree iterator"); |
66 | 0 | } |
67 | 2.24k | ivalue.first = QPDFObjectHandle(); |
68 | 2.24k | ivalue.second = QPDFObjectHandle(); |
69 | 2.24k | return; |
70 | 2.24k | } |
71 | 275k | Array items = node.getKey(impl.details.itemsKey()); |
72 | 275k | if (!std::cmp_less(item_number + 1, items.size())) { |
73 | 0 | impl.error(node, "update ivalue: items array is too short"); |
74 | 0 | } |
75 | 275k | ivalue.first = items[item_number]; |
76 | 275k | ivalue.second = items[1 + item_number]; |
77 | 275k | } |
78 | | |
79 | | NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) : |
80 | 59.4k | node(node), |
81 | 59.4k | kid_number(kid_number) |
82 | 59.4k | { |
83 | 59.4k | } |
84 | | |
85 | | QPDFObjectHandle |
86 | | NNTreeIterator::getNextKid(PathElement& pe, bool backward) |
87 | 816 | { |
88 | 1.53k | while (true) { |
89 | 1.35k | pe.kid_number += backward ? -1 : 1; |
90 | 1.35k | Array kids = pe.node.getKey("/Kids"); |
91 | 1.35k | if (pe.kid_number >= 0 && std::cmp_less(pe.kid_number, kids.size())) { |
92 | 1.13k | auto result = kids[pe.kid_number]; |
93 | 1.13k | if (result.isDictionary() && |
94 | 1.13k | (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) { |
95 | 415 | return result; |
96 | 717 | } else { |
97 | 717 | impl.warn( |
98 | 717 | pe.node, "skipping over invalid kid at index " + std::to_string(pe.kid_number)); |
99 | 717 | } |
100 | 1.13k | } else { |
101 | 223 | return QPDFObjectHandle::newNull(); |
102 | 223 | } |
103 | 1.35k | } |
104 | 816 | } |
105 | | |
106 | | // iterator can be incremented or decremented, or dereferenced. This does not imply that it points |
107 | | // to a valid item. |
108 | | bool |
109 | | NNTreeIterator::valid() const |
110 | 205k | { |
111 | 205k | return item_number >= 0; |
112 | 205k | } |
113 | | |
114 | | void |
115 | | NNTreeIterator::increment(bool backward) |
116 | 51.5k | { |
117 | 51.5k | if (item_number < 0) { |
118 | 0 | deepen(impl.oh, !backward, true); |
119 | 0 | return; |
120 | 0 | } |
121 | | |
122 | 53.5k | while (valid()) { |
123 | 53.0k | item_number += backward ? -2 : 2; |
124 | 53.0k | Array items = node.getKey(impl.details.itemsKey()); |
125 | 53.0k | if (item_number < 0 || std::cmp_greater_equal(item_number, items.size())) { |
126 | 724 | bool found = false; |
127 | 724 | setItemNumber(QPDFObjectHandle(), -1); |
128 | 1.54k | while (!(found || path.empty())) { |
129 | 816 | auto& element = path.back(); |
130 | 816 | auto pe_node = getNextKid(element, backward); |
131 | 816 | if (pe_node.null()) { |
132 | 223 | path.pop_back(); |
133 | 593 | } else { |
134 | 593 | found = deepen(pe_node, !backward, false); |
135 | 593 | } |
136 | 816 | } |
137 | 724 | } |
138 | 53.0k | if (item_number >= 0) { |
139 | 52.6k | items = node.getKey(impl.details.itemsKey()); |
140 | 52.6k | if (std::cmp_greater_equal(item_number + 1, items.size())) { |
141 | 206 | impl.warn(node, "items array doesn't have enough elements"); |
142 | 52.4k | } else if (!impl.details.keyValid(items[item_number])) { |
143 | 1.42k | impl.warn(node, ("item " + std::to_string(item_number) + " has the wrong type")); |
144 | 51.0k | } else if (!items[item_number + 1]) { |
145 | 15 | impl.warn(node, "item " + std::to_string(item_number) + " is null"); |
146 | 51.0k | } else { |
147 | 51.0k | return; |
148 | 51.0k | } |
149 | 52.6k | } |
150 | 53.0k | } |
151 | 51.5k | } |
152 | | |
153 | | void |
154 | | NNTreeIterator::resetLimits(QPDFObjectHandle a_node, std::list<PathElement>::iterator parent) |
155 | 14.6k | { |
156 | 14.6k | while (true) { |
157 | 14.6k | if (parent == path.end()) { |
158 | 8.27k | a_node.removeKey("/Limits"); |
159 | 8.27k | break; |
160 | 8.27k | } |
161 | 6.41k | Array kids = a_node.getKey("/Kids"); |
162 | 6.41k | size_t nkids = kids.size(); |
163 | 6.41k | Array items = a_node.getKey(impl.details.itemsKey()); |
164 | 6.41k | size_t nitems = items.size(); |
165 | | |
166 | 6.41k | bool changed = true; |
167 | 6.41k | QPDFObjectHandle first; |
168 | 6.41k | QPDFObjectHandle last; |
169 | 6.41k | if (nitems >= 2) { |
170 | 6.19k | first = items[0]; |
171 | 6.19k | last = items[(nitems - 1u) & ~1u]; |
172 | 6.19k | } else if (nkids > 0) { |
173 | 222 | auto first_kid = kids[0]; |
174 | 222 | auto last_kid = kids[nkids - 1u]; |
175 | 222 | if (first_kid.isDictionary() && last_kid.isDictionary()) { |
176 | 222 | Array first_limits = first_kid.getKey("/Limits"); |
177 | 222 | Array last_limits = last_kid.getKey("/Limits"); |
178 | 222 | if (first_limits.size() >= 2 && last_limits.size() >= 2) { |
179 | 222 | first = first_limits[0]; |
180 | 222 | last = last_limits[1]; |
181 | 222 | } |
182 | 222 | } |
183 | 222 | } |
184 | 6.41k | if (first && last) { |
185 | 6.41k | Array limits({first, last}); |
186 | 6.41k | Array olimits = a_node.getKey("/Limits"); |
187 | 6.41k | if (olimits.size() == 2) { |
188 | 5.69k | auto ofirst = olimits[0]; |
189 | 5.69k | auto olast = olimits[1]; |
190 | 5.69k | if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) && |
191 | 5.69k | impl.details.compareKeys(first, ofirst) == 0 && |
192 | 5.69k | impl.details.compareKeys(last, olast) == 0) { |
193 | 4.98k | changed = false; |
194 | 4.98k | } |
195 | 5.69k | } |
196 | 6.41k | if (changed && !a_node.isSameObjectAs(path.begin()->node)) { |
197 | 1.21k | a_node.replaceKey("/Limits", limits); |
198 | 1.21k | } |
199 | 6.41k | } else { |
200 | 0 | impl.warn(a_node, "unable to determine limits"); |
201 | 0 | } |
202 | | |
203 | 6.41k | if (!changed || parent == path.begin()) { |
204 | 6.41k | break; |
205 | 6.41k | } else { |
206 | 0 | a_node = parent->node; |
207 | 0 | --parent; |
208 | 0 | } |
209 | 6.41k | } |
210 | 14.6k | } |
211 | | |
212 | | void |
213 | | NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent) |
214 | 13.9k | { |
215 | | // Split some node along the path to the item pointed to by this iterator, and adjust the |
216 | | // iterator so it points to the same item. |
217 | | |
218 | | // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine |
219 | | // this tree: |
220 | | // |
221 | | // root: << /Kids [ A B C D ] >> |
222 | | // A: << /Nums [ 1 2 3 4 ] >> |
223 | | // B: << /Nums [ 5 6 7 8 ] >> |
224 | | // C: << /Nums [ 9 10 11 12 ] >> |
225 | | // D: << /Kids [ E F ] |
226 | | // E: << /Nums [ 13 14 15 16 ] >> |
227 | | // F: << /Nums [ 17 18 19 20 ] >> |
228 | | |
229 | | // iter1 (points to 19) |
230 | | // path: |
231 | | // - { node: root: kid_number: 3 } |
232 | | // - { node: D, kid_number: 1 } |
233 | | // node: F |
234 | | // item_number: 2 |
235 | | |
236 | | // iter2 (points to 1) |
237 | | // path: |
238 | | // - { node: root, kid_number: 0} |
239 | | // node: A |
240 | | // item_number: 0 |
241 | | |
242 | 13.9k | if (!valid()) { |
243 | 0 | throw std::logic_error("NNTreeIterator::split called an invalid iterator"); |
244 | 0 | } |
245 | | |
246 | | // Find the array we actually need to split, which is either this node's kids or items. |
247 | 13.9k | Array kids = to_split.getKey("/Kids"); |
248 | 13.9k | size_t nkids = kids.size(); |
249 | 13.9k | Array items = to_split.getKey(impl.details.itemsKey()); |
250 | 13.9k | size_t nitems = items.size(); |
251 | | |
252 | 13.9k | Array first_half; |
253 | 13.9k | size_t n = 0; |
254 | 13.9k | std::string key; |
255 | 13.9k | size_t threshold = static_cast<size_t>(impl.split_threshold); |
256 | 13.9k | if (nkids > 0) { |
257 | 222 | first_half = kids; |
258 | 222 | n = nkids; |
259 | 222 | key = "/Kids"; |
260 | 13.7k | } else if (nitems > 0) { |
261 | 13.7k | first_half = items; |
262 | 13.7k | n = nitems; |
263 | 13.7k | threshold *= 2; |
264 | 13.7k | key = impl.details.itemsKey(); |
265 | 13.7k | } else { |
266 | 0 | throw std::logic_error("NNTreeIterator::split called on invalid node"); |
267 | 0 | } |
268 | | |
269 | 13.9k | if (n <= threshold) { |
270 | 13.6k | return; |
271 | 13.6k | } |
272 | | |
273 | 364 | bool is_root = parent == path.end(); |
274 | 364 | bool is_leaf = nitems > 0; |
275 | | |
276 | | // CURRENT STATE: tree is in original state; iterator is valid and unchanged. |
277 | | |
278 | 364 | if (is_root) { |
279 | | // What we want to do is to create a new node for the second half of the items and put it in |
280 | | // the parent's /Kids array right after the element that points to the current to_split |
281 | | // node, but if we're splitting root, there is no parent, so handle that first. |
282 | | |
283 | | // In the non-root case, parent points to the path element whose /Kids contains the first |
284 | | // half node, and the first half node is to_split. If we are splitting the root, we need to |
285 | | // push everything down a level, but we want to keep the actual root object the same so that |
286 | | // indirect references to it remain intact (and also in case it might be a direct object, |
287 | | // which it shouldn't be but that case probably exists in the wild). To achieve this, we |
288 | | // create a new node for the first half and then replace /Kids in the root to contain it. |
289 | | // Then we adjust the path so that the first element is root and the second element, if any, |
290 | | // is the new first half. In this way, we make the root case identical to the non-root case |
291 | | // so remaining logic can handle them in the same way. |
292 | | |
293 | 142 | auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
294 | 142 | first_node.replaceKey(key, first_half); |
295 | 142 | Array new_kids; |
296 | 142 | new_kids.push_back(first_node); |
297 | 142 | to_split.removeKey("/Limits"); // already shouldn't be there for root |
298 | 142 | to_split.removeKey(impl.details.itemsKey()); |
299 | 142 | to_split.replaceKey("/Kids", new_kids); |
300 | 142 | if (is_leaf) { |
301 | 140 | node = first_node; |
302 | 140 | } else { |
303 | 2 | auto next = path.begin(); |
304 | 2 | next->node = first_node; |
305 | 2 | } |
306 | 142 | this->path.emplace_front(to_split, 0); |
307 | 142 | parent = path.begin(); |
308 | 142 | to_split = first_node; |
309 | 142 | } |
310 | | |
311 | | // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that |
312 | | // parent[/Kids][kid_number] == to_split and (++parent).node == to_split. |
313 | | |
314 | | // Create a second half array, and transfer the second half of the items into the second half |
315 | | // array. |
316 | 364 | Array second_half; |
317 | 364 | auto start_idx = static_cast<int>((n / 2) & ~1u); |
318 | 12.6k | while (std::cmp_greater(first_half.size(), start_idx)) { |
319 | 12.3k | second_half.push_back(first_half[start_idx]); |
320 | 12.3k | first_half.erase(start_idx); |
321 | 12.3k | } |
322 | 364 | resetLimits(to_split, parent); |
323 | | |
324 | | // Create a new node to contain the second half |
325 | 364 | QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary()); |
326 | 364 | second_node.replaceKey(key, second_half); |
327 | 364 | resetLimits(second_node, parent); |
328 | | |
329 | | // CURRENT STATE: half the items from the kids or items array in the node being split have been |
330 | | // moved into a new node. The new node is not yet attached to the tree. The iterator may have a |
331 | | // path element or leaf node that is out of bounds. |
332 | | |
333 | | // We need to adjust the parent to add the second node to /Kids and, if needed, update |
334 | | // kid_number to traverse through it. We need to update to_split's path element, or the node if |
335 | | // this is a leaf, so that the kid/item number points to the right place. |
336 | | |
337 | 364 | Array parent_kids = parent->node.getKey("/Kids"); |
338 | 364 | parent_kids.insert(parent->kid_number + 1, second_node); |
339 | 364 | auto cur_elem = parent; |
340 | 364 | ++cur_elem; // points to end() for leaf nodes |
341 | 364 | int old_idx = (is_leaf ? item_number : cur_elem->kid_number); |
342 | 364 | if (old_idx >= start_idx) { |
343 | 217 | ++parent->kid_number; |
344 | 217 | if (is_leaf) { |
345 | 217 | setItemNumber(second_node, item_number - start_idx); |
346 | 217 | } else { |
347 | 0 | cur_elem->node = second_node; |
348 | 0 | cur_elem->kid_number -= start_idx; |
349 | 0 | } |
350 | 217 | } |
351 | 364 | if (!is_root) { |
352 | 222 | auto next = parent->node; |
353 | 222 | resetLimits(next, parent); |
354 | 222 | --parent; |
355 | 222 | split(next, parent); |
356 | 222 | } |
357 | 364 | } |
358 | | |
359 | | std::list<NNTreeIterator::PathElement>::iterator |
360 | | NNTreeIterator::lastPathElement() |
361 | 27.4k | { |
362 | 27.4k | auto result = path.end(); |
363 | 27.4k | if (!path.empty()) { |
364 | 10.9k | --result; |
365 | 10.9k | } |
366 | 27.4k | return result; |
367 | 27.4k | } |
368 | | |
369 | | void |
370 | | NNTreeIterator::insertAfter(QPDFObjectHandle const& key, QPDFObjectHandle const& value) |
371 | 12.7k | { |
372 | 12.7k | if (!valid()) { |
373 | 0 | impl.insertFirst(key, value); |
374 | 0 | deepen(impl.oh, true, false); |
375 | 0 | return; |
376 | 0 | } |
377 | | |
378 | 12.7k | Array items = node.getKey(impl.details.itemsKey()); |
379 | 12.7k | if (!items) { |
380 | 0 | impl.error(node, "node contains no items array"); |
381 | 0 | } |
382 | | |
383 | 12.7k | if (std::cmp_less(items.size(), item_number + 2)) { |
384 | 0 | impl.error(node, "insert: items array is too short"); |
385 | 0 | } |
386 | 12.7k | if (!(key && value)) { |
387 | 0 | impl.error(node, "insert: key or value is null"); |
388 | 0 | } |
389 | 12.7k | items.insert(item_number + 2, key); |
390 | 12.7k | items.insert(item_number + 3, value); |
391 | 12.7k | resetLimits(node, lastPathElement()); |
392 | 12.7k | split(node, lastPathElement()); |
393 | 12.7k | increment(false); |
394 | 12.7k | } |
395 | | |
396 | | void |
397 | | NNTreeIterator::remove() |
398 | 0 | { |
399 | | // Remove this item, leaving the tree valid and this iterator pointing to the next item. |
400 | |
|
401 | 0 | if (!valid()) { |
402 | 0 | throw std::logic_error("attempt made to remove an invalid iterator"); |
403 | 0 | } |
404 | 0 | Array items = node.getKey(impl.details.itemsKey()); |
405 | 0 | int nitems = static_cast<int>(items.size()); |
406 | 0 | if (std::cmp_greater(item_number + 2, nitems)) { |
407 | 0 | impl.error(node, "found short items array while removing an item"); |
408 | 0 | } |
409 | |
|
410 | 0 | items.erase(item_number); |
411 | 0 | items.erase(item_number); |
412 | 0 | nitems -= 2; |
413 | |
|
414 | 0 | if (nitems > 0) { |
415 | | // There are still items left |
416 | |
|
417 | 0 | if (item_number == 0 || item_number == nitems) { |
418 | | // We removed either the first or last item of an items array that remains non-empty, so |
419 | | // we have to adjust limits. |
420 | 0 | resetLimits(node, lastPathElement()); |
421 | 0 | } |
422 | |
|
423 | 0 | if (item_number == nitems) { |
424 | | // We removed the last item of a non-empty items array, so advance to the successor of |
425 | | // the previous item. |
426 | 0 | item_number -= 2; |
427 | 0 | increment(false); |
428 | 0 | } else if (item_number < nitems) { |
429 | | // We don't have to do anything since the removed item's successor now occupies its |
430 | | // former location. |
431 | 0 | updateIValue(); |
432 | 0 | } else { |
433 | | // We already checked to ensure this condition would not happen. |
434 | 0 | throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase"); |
435 | 0 | } |
436 | 0 | return; |
437 | 0 | } |
438 | | |
439 | 0 | if (path.empty()) { |
440 | | // Special case: if this is the root node, we can leave it empty. |
441 | 0 | setItemNumber(impl.oh, -1); |
442 | 0 | return; |
443 | 0 | } |
444 | | |
445 | | // We removed the last item from this items array, so we need to remove this node from the |
446 | | // parent on up the tree. Then we need to position ourselves at the removed item's successor. |
447 | 0 | while (true) { |
448 | 0 | auto element = lastPathElement(); |
449 | 0 | auto parent = element; |
450 | 0 | --parent; |
451 | 0 | Array kids = element->node.getKey("/Kids"); |
452 | 0 | kids.erase(element->kid_number); |
453 | 0 | auto nkids = kids.size(); |
454 | 0 | if (nkids > 0) { |
455 | | // The logic here is similar to the items case. |
456 | 0 | if (element->kid_number == 0 || std::cmp_equal(element->kid_number, nkids)) { |
457 | 0 | resetLimits(element->node, parent); |
458 | 0 | } |
459 | 0 | if (std::cmp_equal(element->kid_number, nkids)) { |
460 | | // Move to the successor of the last child of the previous kid. |
461 | 0 | setItemNumber({}, -1); |
462 | 0 | --element->kid_number; |
463 | 0 | deepen(kids[element->kid_number], false, true); |
464 | 0 | if (valid()) { |
465 | 0 | increment(false); |
466 | 0 | QTC::TC("qpdf", "NNTree erased last kid/item in tree", valid() ? 0 : 1); |
467 | 0 | } |
468 | 0 | } else { |
469 | | // Next kid is in deleted kid's position |
470 | 0 | deepen(kids.get(element->kid_number), true, true); |
471 | 0 | } |
472 | 0 | return; |
473 | 0 | } |
474 | | |
475 | 0 | if (parent == path.end()) { |
476 | | // We erased the very last item. Convert the root to an empty items array. |
477 | 0 | element->node.removeKey("/Kids"); |
478 | 0 | element->node.replaceKey(impl.details.itemsKey(), Array()); |
479 | 0 | path.clear(); |
480 | 0 | setItemNumber(impl.oh, -1); |
481 | 0 | return; |
482 | 0 | } |
483 | | |
484 | | // Walk up the tree and continue |
485 | 0 | path.pop_back(); |
486 | 0 | } |
487 | 0 | } |
488 | | |
489 | | NNTreeIterator& |
490 | | NNTreeIterator::operator++() |
491 | 38.7k | { |
492 | 38.7k | increment(false); |
493 | 38.7k | return *this; |
494 | 38.7k | } |
495 | | |
496 | | NNTreeIterator& |
497 | | NNTreeIterator::operator--() |
498 | 0 | { |
499 | 0 | increment(true); |
500 | 0 | return *this; |
501 | 0 | } |
502 | | |
503 | | NNTreeIterator::reference |
504 | | NNTreeIterator::operator*() |
505 | 39.0k | { |
506 | 39.0k | updateIValue(false); |
507 | 39.0k | return ivalue; |
508 | 39.0k | } |
509 | | |
510 | | NNTreeIterator::pointer |
511 | | NNTreeIterator::operator->() |
512 | 127k | { |
513 | 127k | updateIValue(false); |
514 | 127k | return &ivalue; |
515 | 127k | } |
516 | | |
517 | | bool |
518 | | NNTreeIterator::operator==(NNTreeIterator const& other) const |
519 | 84.4k | { |
520 | 84.4k | if (item_number == -1 && other.item_number == -1) { |
521 | 2.42k | return true; |
522 | 2.42k | } |
523 | 82.0k | if (path.size() != other.path.size()) { |
524 | 66.5k | return false; |
525 | 66.5k | } |
526 | 15.4k | auto tpi = path.begin(); |
527 | 15.4k | auto opi = other.path.begin(); |
528 | 15.4k | while (tpi != path.end()) { |
529 | 0 | if (tpi->kid_number != opi->kid_number) { |
530 | 0 | return false; |
531 | 0 | } |
532 | 0 | ++tpi; |
533 | 0 | ++opi; |
534 | 0 | } |
535 | 15.4k | return item_number == other.item_number; |
536 | 15.4k | } |
537 | | |
538 | | void |
539 | | NNTreeIterator::setItemNumber(QPDFObjectHandle const& a_node, int n) |
540 | 85.6k | { |
541 | 85.6k | node = a_node; |
542 | 85.6k | item_number = n; |
543 | 85.6k | updateIValue(); |
544 | 85.6k | } |
545 | | |
546 | | void |
547 | | NNTreeIterator::addPathElement(QPDFObjectHandle const& a_node, int kid_number) |
548 | 59.2k | { |
549 | 59.2k | path.emplace_back(a_node, kid_number); |
550 | 59.2k | } |
551 | | |
552 | | bool |
553 | | NNTreeIterator::deepen(QPDFObjectHandle a_node, bool first, bool allow_empty) |
554 | 46.1k | { |
555 | | // Starting at this node, descend through the first or last kid until we reach a node with |
556 | | // items. If we succeed, return true; otherwise return false and leave path alone. |
557 | | |
558 | 46.1k | auto opath = path; |
559 | | |
560 | 46.1k | auto fail = [this, &opath](QPDFObjectHandle const& failed_node, std::string const& msg) { |
561 | 1.41k | impl.warn(failed_node, msg); |
562 | 1.41k | path = opath; |
563 | 1.41k | return false; |
564 | 1.41k | }; |
565 | | |
566 | 46.1k | QPDFObjGen::set seen; |
567 | 46.1k | for (auto const& i: path) { |
568 | 499 | seen.add(i.node); |
569 | 499 | } |
570 | 78.0k | while (true) { |
571 | 77.9k | if (!seen.add(a_node)) { |
572 | 114 | return fail(a_node, "loop detected while traversing name/number tree"); |
573 | 114 | } |
574 | | |
575 | 77.8k | if (!a_node.isDictionary()) { |
576 | 600 | return fail(a_node, "non-dictionary node while traversing name/number tree"); |
577 | 600 | } |
578 | | |
579 | 77.2k | Array items = a_node.getKey(impl.details.itemsKey()); |
580 | 77.2k | int nitems = static_cast<int>(items.size()); |
581 | 77.2k | if (nitems > 1) { |
582 | 42.8k | setItemNumber(a_node, first ? 0 : nitems - 2); |
583 | 42.8k | break; |
584 | 42.8k | } |
585 | | |
586 | 34.3k | Array kids = a_node.getKey("/Kids"); |
587 | 34.3k | int nkids = static_cast<int>(kids.size()); |
588 | 34.3k | if (nkids > 0) { |
589 | 32.0k | int kid_number = first ? 0 : nkids - 1; |
590 | 32.0k | addPathElement(a_node, kid_number); |
591 | 32.0k | auto next = kids[kid_number]; |
592 | 32.0k | if (!next) { |
593 | 83 | return fail(a_node, "kid number " + std::to_string(kid_number) + " is invalid"); |
594 | 83 | } |
595 | 31.9k | if (!next.indirect()) { |
596 | 204 | if (impl.auto_repair) { |
597 | 204 | impl.warn( |
598 | 204 | a_node, |
599 | 204 | "converting kid number " + std::to_string(kid_number) + |
600 | 204 | " to an indirect object"); |
601 | 204 | next = impl.qpdf.makeIndirectObject(next); |
602 | 204 | kids.set(kid_number, next); |
603 | 204 | } else { |
604 | 0 | impl.warn( |
605 | 0 | a_node, |
606 | 0 | "kid number " + std::to_string(kid_number) + " is not an indirect object"); |
607 | 0 | } |
608 | 204 | } |
609 | 31.9k | a_node = next; |
610 | 31.9k | } else if (allow_empty && items) { |
611 | 1.51k | setItemNumber(a_node, -1); |
612 | 1.51k | break; |
613 | 1.51k | } else { |
614 | 800 | return fail( |
615 | 800 | a_node, |
616 | 800 | "name/number tree node has neither non-empty " + impl.details.itemsKey() + |
617 | 800 | " nor /Kids"); |
618 | 800 | } |
619 | 34.3k | } |
620 | 44.5k | return true; |
621 | 46.1k | } |
622 | | |
623 | | NNTreeImpl::NNTreeImpl( |
624 | | NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) : |
625 | 1.77k | details(details), |
626 | 1.77k | qpdf(qpdf), |
627 | 1.77k | oh(oh), |
628 | 1.77k | auto_repair(auto_repair) |
629 | 1.77k | { |
630 | 1.77k | } |
631 | | |
632 | | void |
633 | | NNTreeImpl::setSplitThreshold(int threshold) |
634 | 0 | { |
635 | 0 | split_threshold = threshold; |
636 | 0 | } |
637 | | |
638 | | NNTreeImpl::iterator |
639 | | NNTreeImpl::begin() |
640 | 45.7k | { |
641 | 45.7k | iterator result(*this); |
642 | 45.7k | result.deepen(oh, true, true); |
643 | 45.7k | return result; |
644 | 45.7k | } |
645 | | |
646 | | NNTreeImpl::iterator |
647 | | NNTreeImpl::end() |
648 | 90.5k | { |
649 | 90.5k | return {*this}; |
650 | 90.5k | } |
651 | | |
652 | | NNTreeImpl::iterator |
653 | | NNTreeImpl::last() |
654 | 0 | { |
655 | 0 | iterator result(*this); |
656 | 0 | result.deepen(oh, false, true); |
657 | 0 | return result; |
658 | 0 | } |
659 | | |
660 | | int |
661 | | NNTreeImpl::withinLimits(QPDFObjectHandle const& key, QPDFObjectHandle const& node) |
662 | 56.7k | { |
663 | 56.7k | Array limits = node.getKey("/Limits"); |
664 | 56.7k | if (!(details.keyValid(limits[0]) && details.keyValid(limits[1]))) { |
665 | 308 | error(node, "node is missing /Limits"); |
666 | 308 | } |
667 | 56.7k | if (details.compareKeys(key, limits[0]) < 0) { |
668 | 22.6k | return -1; |
669 | 22.6k | } |
670 | 34.1k | if (details.compareKeys(key, limits[1]) > 0) { |
671 | 7.13k | return 1; |
672 | 7.13k | } |
673 | 27.0k | return 0; |
674 | 34.1k | } |
675 | | |
676 | | int |
677 | | NNTreeImpl::binarySearch( |
678 | | QPDFObjectHandle key, |
679 | | QPDFObjectHandle items, |
680 | | size_t num_items, |
681 | | bool return_prev_if_not_found, |
682 | | int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item)) |
683 | 68.2k | { |
684 | 68.2k | size_t max_idx = std::bit_ceil(num_items); |
685 | | |
686 | 68.2k | int step = static_cast<int>(max_idx / 2); |
687 | 68.2k | size_t checks = max_idx; |
688 | 68.2k | int idx = step; |
689 | 68.2k | int found_idx = -1; |
690 | | |
691 | 249k | while (checks > 0) { |
692 | 233k | int status = -1; |
693 | 233k | if (std::cmp_less(idx, num_items)) { |
694 | 222k | status = (this->*compare)(key, items, idx); |
695 | 222k | if (status == 0) { |
696 | 52.2k | return idx; |
697 | 52.2k | } |
698 | 169k | if (status > 0) { |
699 | 68.7k | found_idx = idx; |
700 | 68.7k | } |
701 | 169k | } |
702 | 181k | checks >>= 1; |
703 | 181k | if (checks > 0) { |
704 | 165k | step >>= 1; |
705 | 165k | if (step == 0) { |
706 | 22.1k | step = 1; |
707 | 22.1k | } |
708 | | |
709 | 165k | if (status < 0) { |
710 | 103k | idx -= step; |
711 | 103k | } else { |
712 | 61.3k | idx += step; |
713 | 61.3k | } |
714 | 165k | } |
715 | 181k | } |
716 | | |
717 | 16.0k | return return_prev_if_not_found ? found_idx : -1; |
718 | 68.2k | } |
719 | | |
720 | | int |
721 | | NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx) |
722 | 164k | { |
723 | 164k | if (!(std::cmp_greater(items.size(), 2 * idx) && details.keyValid(items[2 * idx]))) { |
724 | 614 | error(oh, ("item at index " + std::to_string(2 * idx) + " is not the right type")); |
725 | 614 | } |
726 | 164k | return details.compareKeys(key, items[2 * idx]); |
727 | 164k | } |
728 | | |
729 | | int |
730 | | NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx) |
731 | 57.2k | { |
732 | 57.2k | if (!(std::cmp_less(idx, kids.size()) && kids[idx].isDictionary())) { |
733 | 442 | error(oh, "invalid kid at index " + std::to_string(idx)); |
734 | 442 | } |
735 | 57.2k | return withinLimits(key, kids[idx]); |
736 | 57.2k | } |
737 | | |
738 | | void |
739 | | NNTreeImpl::repair() |
740 | 726 | { |
741 | 726 | auto new_node = QPDFObjectHandle::newDictionary(); |
742 | 726 | new_node.replaceKey(details.itemsKey(), Array()); |
743 | 726 | NNTreeImpl repl(details, qpdf, new_node, false); |
744 | 39.0k | for (auto const& [key, value]: *this) { |
745 | 39.0k | if (key && value) { |
746 | 39.0k | repl.insert(key, value); |
747 | 39.0k | } |
748 | 39.0k | } |
749 | 726 | oh.replaceKey("/Kids", new_node.getKey("/Kids")); |
750 | 726 | oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey())); |
751 | 726 | } |
752 | | |
753 | | NNTreeImpl::iterator |
754 | | NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found) |
755 | 43.8k | { |
756 | 43.8k | try { |
757 | 43.8k | return findInternal(key, return_prev_if_not_found); |
758 | 43.8k | } catch (QPDFExc& e) { |
759 | 2.75k | if (auto_repair) { |
760 | 2.54k | warn(oh, std::string("attempting to repair after error: ") + e.what()); |
761 | 2.54k | repair(); |
762 | 2.54k | return findInternal(key, return_prev_if_not_found); |
763 | 2.54k | } else { |
764 | 208 | throw; |
765 | 208 | } |
766 | 2.75k | } |
767 | 43.8k | } |
768 | | |
769 | | NNTreeImpl::iterator |
770 | | NNTreeImpl::findInternal(QPDFObjectHandle const& key, bool return_prev_if_not_found) |
771 | 43.9k | { |
772 | 43.9k | auto first_item = begin(); |
773 | 43.9k | auto last_item = end(); |
774 | 43.9k | if (first_item == end()) { |
775 | 1.33k | return end(); |
776 | 1.33k | } |
777 | 42.6k | if (first_item.valid() && details.keyValid(first_item->first) && |
778 | 42.6k | details.compareKeys(key, first_item->first) < 0) { |
779 | | // Before the first key |
780 | 375 | return end(); |
781 | 375 | } |
782 | 42.2k | qpdf_assert_debug(!last_item.valid()); |
783 | | |
784 | 41.1k | QPDFObjGen::set seen; |
785 | 41.1k | auto node = oh; |
786 | 41.1k | iterator result(*this); |
787 | | |
788 | 69.2k | while (true) { |
789 | 68.3k | if (!seen.add(node)) { |
790 | 37 | error(node, "loop detected in find"); |
791 | 37 | } |
792 | | |
793 | 68.3k | Array items = node.getKey(details.itemsKey()); |
794 | 68.3k | size_t nitems = items.size(); |
795 | 68.3k | if (nitems > 1) { |
796 | 40.1k | int idx = binarySearch( |
797 | 40.1k | key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem); |
798 | 40.1k | if (idx >= 0) { |
799 | 39.2k | result.setItemNumber(node, 2 * idx); |
800 | 39.2k | } |
801 | 40.1k | return result; |
802 | 40.1k | } |
803 | | |
804 | 28.1k | Array kids = node.getKey("/Kids"); |
805 | 28.1k | size_t nkids = kids.size(); |
806 | 28.1k | if (nkids > 0) { |
807 | 28.1k | int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid); |
808 | 28.1k | if (idx == -1) { |
809 | 44 | error(node, "unexpected -1 from binary search of kids; limits may by wrong"); |
810 | 44 | } |
811 | 28.1k | result.addPathElement(node, idx); |
812 | 28.1k | node = kids[idx]; |
813 | 28.1k | } else { |
814 | 73 | error(node, "bad node during find"); |
815 | 73 | } |
816 | 28.1k | } |
817 | 41.1k | } |
818 | | |
819 | | NNTreeImpl::iterator |
820 | | NNTreeImpl::insertFirst(QPDFObjectHandle const& key, QPDFObjectHandle const& value) |
821 | 1.00k | { |
822 | 1.00k | auto iter = begin(); |
823 | 1.00k | Array items(nullptr); |
824 | 1.00k | if (iter.node.isDictionary()) { |
825 | 1.00k | items = iter.node.getKey(details.itemsKey()); |
826 | 1.00k | } |
827 | 1.00k | if (!items) { |
828 | 0 | error(oh, "unable to find a valid items node"); |
829 | 0 | } |
830 | 1.00k | if (!(key && value)) { |
831 | 0 | error(oh, "unable to insert null key or value"); |
832 | 0 | } |
833 | 1.00k | items.insert(0, key); |
834 | 1.00k | items.insert(1, value); |
835 | 1.00k | iter.setItemNumber(iter.node, 0); |
836 | 1.00k | iter.resetLimits(iter.node, iter.lastPathElement()); |
837 | 1.00k | iter.split(iter.node, iter.lastPathElement()); |
838 | 1.00k | return iter; |
839 | 1.00k | } |
840 | | |
841 | | NNTreeImpl::iterator |
842 | | NNTreeImpl::insert(QPDFObjectHandle const& key, QPDFObjectHandle const& value) |
843 | 39.0k | { |
844 | 39.0k | auto iter = find(key, true); |
845 | 39.0k | if (!iter.valid()) { |
846 | 1.00k | return insertFirst(key, value); |
847 | 38.0k | } else if (details.compareKeys(key, iter->first) == 0) { |
848 | 25.0k | Array items = iter.node.getKey(details.itemsKey()); |
849 | 25.0k | items.set(iter.item_number + 1, value); |
850 | 25.0k | iter.updateIValue(); |
851 | 25.0k | } else { |
852 | 12.9k | iter.insertAfter(key, value); |
853 | 12.9k | } |
854 | 38.0k | return iter; |
855 | 39.0k | } |
856 | | |
857 | | bool |
858 | | NNTreeImpl::remove(QPDFObjectHandle const& key, QPDFObjectHandle* value) |
859 | 0 | { |
860 | 0 | auto iter = find(key, false); |
861 | 0 | if (!iter.valid()) { |
862 | 0 | return false; |
863 | 0 | } |
864 | 0 | if (value) { |
865 | 0 | *value = iter->second; |
866 | 0 | } |
867 | 0 | iter.remove(); |
868 | 0 | return true; |
869 | 0 | } |