Coverage Report

Created: 2025-07-18 07:00

/src/qpdf/libqpdf/NNTree.cc
Line
Count
Source (jump to first uncovered line)
1
#include <qpdf/NNTree.hh>
2
3
#include <qpdf/QTC.hh>
4
#include <qpdf/QUtil.hh>
5
6
#include <exception>
7
8
static std::string
9
get_description(QPDFObjectHandle& node)
10
23.8k
{
11
23.8k
    std::string result("Name/Number tree node");
12
23.8k
    if (node.isIndirect()) {
13
19.9k
        result += " (object " + std::to_string(node.getObjectID()) + ")";
14
19.9k
    }
15
23.8k
    return result;
16
23.8k
}
17
18
static void
19
warn(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg)
20
20.5k
{
21
20.5k
    qpdf.warn(qpdf_e_damaged_pdf, get_description(node), 0, msg);
22
20.5k
}
23
24
static void
25
error(QPDF& qpdf, QPDFObjectHandle& node, std::string const& msg)
26
3.33k
{
27
3.33k
    throw QPDFExc(qpdf_e_damaged_pdf, qpdf.getFilename(), get_description(node), 0, msg);
28
3.33k
}
29
30
NNTreeIterator::NNTreeIterator(NNTreeImpl& impl) :
31
202k
    impl(impl)
32
202k
{
33
202k
}
34
35
void
36
NNTreeIterator::updateIValue(bool allow_invalid)
37
289k
{
38
    // ivalue should never be used inside the class since we return a pointer/reference to it. Every
39
    // bit of code that ever changes what object the iterator points to should take care to call
40
    // updateIValue. Failure to do this means that any old references to *iter will point to
41
    // incorrect objects, though the next dereference of the iterator will fix it. This isn't
42
    // necessarily catastrophic, but it would be confusing. The test suite attempts to exercise
43
    // various cases to ensure we don't introduce that bug in the future, but sadly it's tricky to
44
    // verify by reasoning about the code that this constraint is always satisfied. Whenever we
45
    // update what the iterator points to, we should call setItemNumber, which calls this. If we
46
    // change what the iterator points to in some other way, such as replacing a value or removing
47
    // an item and making the iterator point at a different item in potentially the same position,
48
    // we must call updateIValue as well. These cases are handled, and for good measure, we also
49
    // call updateIValue in operator* and operator->.
50
51
289k
    bool okay = false;
52
289k
    if (item_number >= 0 && node.isDictionary()) {
53
283k
        auto items = node.getKey(impl.details.itemsKey());
54
283k
        if (item_number + 1 < items.getArrayNItems()) {
55
282k
            okay = true;
56
282k
            ivalue.first = items.getArrayItem(item_number);
57
282k
            ivalue.second = items.getArrayItem(1 + item_number);
58
282k
        } else {
59
878
            error(impl.qpdf, node, "update ivalue: items array is too short");
60
878
        }
61
283k
    }
62
289k
    if (!okay) {
63
6.11k
        if (!allow_invalid) {
64
0
            throw std::logic_error(
65
0
                "attempt made to dereference an invalid name/number tree iterator");
66
0
        }
67
6.11k
        ivalue.first = QPDFObjectHandle();
68
6.11k
        ivalue.second = QPDFObjectHandle();
69
6.11k
    }
70
289k
}
71
72
NNTreeIterator::PathElement::PathElement(QPDFObjectHandle const& node, int kid_number) :
73
47.8k
    node(node),
74
47.8k
    kid_number(kid_number)
75
47.8k
{
76
47.8k
}
77
78
QPDFObjectHandle
79
NNTreeIterator::getNextKid(PathElement& pe, bool backward)
80
4.38k
{
81
4.38k
    QPDFObjectHandle result;
82
4.38k
    bool found = false;
83
14.7k
    while (!found) {
84
10.3k
        pe.kid_number += backward ? -1 : 1;
85
10.3k
        auto kids = pe.node.getKey("/Kids");
86
10.3k
        if ((pe.kid_number >= 0) && (pe.kid_number < kids.getArrayNItems())) {
87
8.72k
            result = kids.getArrayItem(pe.kid_number);
88
8.72k
            if (result.isDictionary() &&
89
8.72k
                (result.hasKey("/Kids") || result.hasKey(impl.details.itemsKey()))) {
90
2.72k
                found = true;
91
6.00k
            } else {
92
6.00k
                QTC::TC("qpdf", "NNTree skip invalid kid");
93
6.00k
                warn(
94
6.00k
                    impl.qpdf,
95
6.00k
                    pe.node,
96
6.00k
                    ("skipping over invalid kid at index " + std::to_string(pe.kid_number)));
97
6.00k
            }
98
8.72k
        } else {
99
1.64k
            result = QPDFObjectHandle::newNull();
100
1.64k
            found = true;
101
1.64k
        }
102
10.3k
    }
103
4.38k
    return result;
104
4.38k
}
105
106
bool
107
NNTreeIterator::valid() const
108
344k
{
109
344k
    return item_number >= 0;
110
344k
}
111
112
void
113
NNTreeIterator::increment(bool backward)
114
69.8k
{
115
69.8k
    if (item_number < 0) {
116
0
        QTC::TC("qpdf", "NNTree increment end()");
117
0
        deepen(impl.oh, !backward, true);
118
0
        return;
119
0
    }
120
69.8k
    bool found_valid_key = false;
121
148k
    while (valid() && !found_valid_key) {
122
78.6k
        item_number += backward ? -2 : 2;
123
78.6k
        auto items = node.getKey(impl.details.itemsKey());
124
78.6k
        if (item_number < 0 || item_number >= items.getArrayNItems()) {
125
2.57k
            bool found = false;
126
2.57k
            setItemNumber(QPDFObjectHandle(), -1);
127
6.95k
            while (!(found || path.empty())) {
128
4.38k
                auto& element = path.back();
129
4.38k
                auto pe_node = getNextKid(element, backward);
130
4.38k
                if (pe_node.isNull()) {
131
1.64k
                    path.pop_back();
132
2.73k
                } else {
133
2.73k
                    found = deepen(pe_node, !backward, false);
134
2.73k
                }
135
4.38k
            }
136
2.57k
        }
137
78.6k
        if (item_number >= 0) {
138
77.2k
            items = node.getKey(impl.details.itemsKey());
139
77.2k
            if (item_number + 1 >= items.getArrayNItems()) {
140
919
                QTC::TC("qpdf", "NNTree skip item at end of short items");
141
919
                warn(impl.qpdf, node, "items array doesn't have enough elements");
142
76.3k
            } else if (!impl.details.keyValid(items.getArrayItem(item_number))) {
143
7.90k
                QTC::TC("qpdf", "NNTree skip invalid key");
144
7.90k
                warn(
145
7.90k
                    impl.qpdf,
146
7.90k
                    node,
147
7.90k
                    ("item " + std::to_string(item_number) + " has the wrong type"));
148
68.4k
            } else {
149
68.4k
                found_valid_key = true;
150
68.4k
            }
151
77.2k
        }
152
78.6k
    }
153
69.8k
}
154
155
void
156
NNTreeIterator::resetLimits(QPDFObjectHandle a_node, std::list<PathElement>::iterator parent)
157
33.5k
{
158
33.5k
    bool done = false;
159
49.9k
    while (!done) {
160
33.5k
        if (parent == path.end()) {
161
17.2k
            QTC::TC("qpdf", "NNTree remove limits from root");
162
17.2k
            a_node.removeKey("/Limits");
163
17.2k
            done = true;
164
17.2k
            break;
165
17.2k
        }
166
16.3k
        auto kids = a_node.getKey("/Kids");
167
16.3k
        int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
168
16.3k
        auto items = a_node.getKey(impl.details.itemsKey());
169
16.3k
        int nitems = items.isArray() ? items.getArrayNItems() : 0;
170
171
16.3k
        bool changed = true;
172
16.3k
        QPDFObjectHandle first;
173
16.3k
        QPDFObjectHandle last;
174
16.3k
        if (nitems >= 2) {
175
15.7k
            first = items.getArrayItem(0);
176
15.7k
            last = items.getArrayItem((nitems - 1) & ~1);
177
15.7k
        } else if (nkids > 0) {
178
587
            auto first_kid = kids.getArrayItem(0);
179
587
            auto last_kid = kids.getArrayItem(nkids - 1);
180
587
            if (first_kid.isDictionary() && last_kid.isDictionary()) {
181
587
                auto first_limits = first_kid.getKey("/Limits");
182
587
                auto last_limits = last_kid.getKey("/Limits");
183
587
                if (first_limits.isArray() && (first_limits.getArrayNItems() >= 2) &&
184
587
                    last_limits.isArray() && (last_limits.getArrayNItems() >= 2)) {
185
587
                    first = first_limits.getArrayItem(0);
186
587
                    last = last_limits.getArrayItem(1);
187
587
                }
188
587
            }
189
587
        }
190
16.3k
        if (first && last) {
191
16.3k
            auto limits = QPDFObjectHandle::newArray();
192
16.3k
            limits.appendItem(first);
193
16.3k
            limits.appendItem(last);
194
16.3k
            auto olimits = a_node.getKey("/Limits");
195
16.3k
            if (olimits.isArray() && (olimits.getArrayNItems() == 2)) {
196
14.4k
                auto ofirst = olimits.getArrayItem(0);
197
14.4k
                auto olast = olimits.getArrayItem(1);
198
14.4k
                if (impl.details.keyValid(ofirst) && impl.details.keyValid(olast) &&
199
14.4k
                    (impl.details.compareKeys(first, ofirst) == 0) &&
200
14.4k
                    (impl.details.compareKeys(last, olast) == 0)) {
201
10.5k
                    QTC::TC("qpdf", "NNTree limits didn't change");
202
10.5k
                    changed = false;
203
10.5k
                }
204
14.4k
            }
205
16.3k
            if (changed && !a_node.isSameObjectAs(path.begin()->node)) {
206
5.28k
                a_node.replaceKey("/Limits", limits);
207
5.28k
            }
208
16.3k
        } else {
209
0
            QTC::TC("qpdf", "NNTree unable to determine limits");
210
0
            warn(impl.qpdf, a_node, "unable to determine limits");
211
0
        }
212
213
16.3k
        if (!changed || parent == path.begin()) {
214
16.3k
            done = true;
215
16.3k
        } else {
216
0
            a_node = parent->node;
217
0
            --parent;
218
0
        }
219
16.3k
    }
220
33.5k
}
221
222
void
223
NNTreeIterator::split(QPDFObjectHandle to_split, std::list<PathElement>::iterator parent)
224
31.6k
{
225
    // Split some node along the path to the item pointed to by this iterator, and adjust the
226
    // iterator so it points to the same item.
227
228
    // In examples, for simplicity, /Nums is shown to just contain numbers instead of pairs. Imagine
229
    // this tree:
230
    //
231
    // root: << /Kids [ A B C D ] >>
232
    // A: << /Nums [ 1 2 3 4 ] >>
233
    // B: << /Nums [ 5 6 7 8 ] >>
234
    // C: << /Nums [ 9 10 11 12 ] >>
235
    // D: << /Kids [ E F ]
236
    // E: << /Nums [ 13 14 15 16 ] >>
237
    // F: << /Nums [ 17 18 19 20 ] >>
238
239
    // iter1 (points to 19)
240
    //   path:
241
    //   - { node: root: kid_number: 3 }
242
    //   - { node: D, kid_number: 1 }
243
    //   node: F
244
    //   item_number: 2
245
246
    // iter2 (points to 1)
247
    //   path:
248
    //   - { node: root, kid_number: 0}
249
    //   node: A
250
    //   item_number: 0
251
252
31.6k
    if (!valid()) {
253
0
        throw std::logic_error("NNTreeIterator::split called an invalid iterator");
254
0
    }
255
256
    // Find the array we actually need to split, which is either this node's kids or items.
257
31.6k
    auto kids = to_split.getKey("/Kids");
258
31.6k
    int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
259
31.6k
    auto items = to_split.getKey(impl.details.itemsKey());
260
31.6k
    int nitems = items.isArray() ? items.getArrayNItems() : 0;
261
262
31.6k
    QPDFObjectHandle first_half;
263
31.6k
    int n = 0;
264
31.6k
    std::string key;
265
31.6k
    int threshold = 0;
266
31.6k
    if (nkids > 0) {
267
587
        QTC::TC("qpdf", "NNTree split kids");
268
587
        first_half = kids;
269
587
        n = nkids;
270
587
        threshold = impl.split_threshold;
271
587
        key = "/Kids";
272
31.0k
    } else if (nitems > 0) {
273
31.0k
        QTC::TC("qpdf", "NNTree split items");
274
31.0k
        first_half = items;
275
31.0k
        n = nitems;
276
31.0k
        threshold = 2 * impl.split_threshold;
277
31.0k
        key = impl.details.itemsKey();
278
31.0k
    } else {
279
0
        throw std::logic_error("NNTreeIterator::split called on invalid node");
280
0
    }
281
282
31.6k
    if (n <= threshold) {
283
30.6k
        return;
284
30.6k
    }
285
286
979
    bool is_root = (parent == path.end());
287
979
    bool is_leaf = (nitems > 0);
288
289
    // CURRENT STATE: tree is in original state; iterator is valid and unchanged.
290
291
979
    if (is_root) {
292
        // What we want to do is to create a new node for the second half of the items and put it in
293
        // the parent's /Kids array right after the element that points to the current to_split
294
        // node, but if we're splitting root, there is no parent, so handle that first.
295
296
        // In the non-root case, parent points to the path element whose /Kids contains the first
297
        // half node, and the first half node is to_split. If we are splitting the root, we need to
298
        // push everything down a level, but we want to keep the actual root object the same so that
299
        // indirect references to it remain intact (and also in case it might be a direct object,
300
        // which it shouldn't be but that case probably exists in the wild). To achieve this, we
301
        // create a new node for the first half and then replace /Kids in the root to contain it.
302
        // Then we adjust the path so that the first element is root and the second element, if any,
303
        // is the new first half. In this way, we make the root case identical to the non-root case
304
        // so remaining logic can handle them in the same way.
305
306
392
        auto first_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
307
392
        first_node.replaceKey(key, first_half);
308
392
        QPDFObjectHandle new_kids = QPDFObjectHandle::newArray();
309
392
        new_kids.appendItem(first_node);
310
392
        to_split.removeKey("/Limits"); // already shouldn't be there for root
311
392
        to_split.removeKey(impl.details.itemsKey());
312
392
        to_split.replaceKey("/Kids", new_kids);
313
392
        if (is_leaf) {
314
391
            QTC::TC("qpdf", "NNTree split root + leaf");
315
391
            node = first_node;
316
391
        } else {
317
1
            QTC::TC("qpdf", "NNTree split root + !leaf");
318
1
            auto next = path.begin();
319
1
            next->node = first_node;
320
1
        }
321
392
        this->path.emplace_front(to_split, 0);
322
392
        parent = path.begin();
323
392
        to_split = first_node;
324
392
    }
325
326
    // CURRENT STATE: parent is guaranteed to be defined, and we have the invariants that
327
    // parent[/Kids][kid_number] == to_split and (++parent).node == to_split.
328
329
    // Create a second half array, and transfer the second half of the items into the second half
330
    // array.
331
979
    QPDFObjectHandle second_half = QPDFObjectHandle::newArray();
332
979
    int start_idx = ((n / 2) & ~1);
333
34.2k
    while (first_half.getArrayNItems() > start_idx) {
334
33.2k
        second_half.appendItem(first_half.getArrayItem(start_idx));
335
33.2k
        first_half.eraseItem(start_idx);
336
33.2k
    }
337
979
    resetLimits(to_split, parent);
338
339
    // Create a new node to contain the second half
340
979
    QPDFObjectHandle second_node = impl.qpdf.makeIndirectObject(QPDFObjectHandle::newDictionary());
341
979
    second_node.replaceKey(key, second_half);
342
979
    resetLimits(second_node, parent);
343
344
    // CURRENT STATE: half the items from the kids or items array in the node being split have been
345
    // moved into a new node. The new node is not yet attached to the tree. The iterator may have a
346
    // path element or leaf node that is out of bounds.
347
348
    // We need to adjust the parent to add the second node to /Kids and, if needed, update
349
    // kid_number to traverse through it. We need to update to_split's path element, or the node if
350
    // this is a leaf, so that the kid/item number points to the right place.
351
352
979
    auto parent_kids = parent->node.getKey("/Kids");
353
979
    parent_kids.insertItem(parent->kid_number + 1, second_node);
354
979
    auto cur_elem = parent;
355
979
    ++cur_elem; // points to end() for leaf nodes
356
979
    int old_idx = (is_leaf ? item_number : cur_elem->kid_number);
357
979
    if (old_idx >= start_idx) {
358
545
        ++parent->kid_number;
359
545
        if (is_leaf) {
360
545
            QTC::TC("qpdf", "NNTree split second half item");
361
545
            setItemNumber(second_node, item_number - start_idx);
362
545
        } else {
363
0
            QTC::TC("qpdf", "NNTree split second half kid");
364
0
            cur_elem->node = second_node;
365
0
            cur_elem->kid_number -= start_idx;
366
0
        }
367
545
    }
368
979
    if (!is_root) {
369
587
        QTC::TC("qpdf", "NNTree split parent");
370
587
        auto next = parent->node;
371
587
        resetLimits(next, parent);
372
587
        --parent;
373
587
        split(next, parent);
374
587
    }
375
979
}
376
377
std::list<NNTreeIterator::PathElement>::iterator
378
NNTreeIterator::lastPathElement()
379
62.0k
{
380
62.0k
    auto result = path.end();
381
62.0k
    if (!path.empty()) {
382
27.6k
        --result;
383
27.6k
    }
384
62.0k
    return result;
385
62.0k
}
386
387
void
388
NNTreeIterator::insertAfter(QPDFObjectHandle key, QPDFObjectHandle value)
389
29.0k
{
390
29.0k
    if (!valid()) {
391
0
        QTC::TC("qpdf", "NNTree insertAfter inserts first");
392
0
        impl.insertFirst(key, value);
393
0
        deepen(impl.oh, true, false);
394
0
        return;
395
0
    }
396
397
29.0k
    auto items = node.getKey(impl.details.itemsKey());
398
29.0k
    if (!items.isArray()) {
399
0
        error(impl.qpdf, node, "node contains no items array");
400
0
    }
401
29.0k
    if (items.getArrayNItems() < item_number + 2) {
402
0
        error(impl.qpdf, node, "insert: items array is too short");
403
0
    }
404
29.0k
    items.insertItem(item_number + 2, key);
405
29.0k
    items.insertItem(item_number + 3, value);
406
29.0k
    resetLimits(node, lastPathElement());
407
29.0k
    split(node, lastPathElement());
408
29.0k
    increment(false);
409
29.0k
}
410
411
void
412
NNTreeIterator::remove()
413
0
{
414
    // Remove this item, leaving the tree valid and this iterator pointing to the next item.
415
416
0
    if (!valid()) {
417
0
        throw std::logic_error("attempt made to remove an invalid iterator");
418
0
    }
419
0
    auto items = node.getKey(impl.details.itemsKey());
420
0
    int nitems = items.getArrayNItems();
421
0
    if (item_number + 2 > nitems) {
422
0
        error(impl.qpdf, node, "found short items array while removing an item");
423
0
    }
424
425
0
    items.eraseItem(item_number);
426
0
    items.eraseItem(item_number);
427
0
    nitems -= 2;
428
429
0
    if (nitems > 0) {
430
        // There are still items left
431
432
0
        if (item_number == 0 || item_number == nitems) {
433
            // We removed either the first or last item of an items array that remains non-empty, so
434
            // we have to adjust limits.
435
0
            QTC::TC("qpdf", "NNTree remove reset limits");
436
0
            resetLimits(node, lastPathElement());
437
0
        }
438
439
0
        if (item_number == nitems) {
440
            // We removed the last item of a non-empty items array, so advance to the successor of
441
            // the previous item.
442
0
            QTC::TC("qpdf", "NNTree erased last item");
443
0
            item_number -= 2;
444
0
            increment(false);
445
0
        } else if (item_number < nitems) {
446
            // We don't have to do anything since the removed item's successor now occupies its
447
            // former location.
448
0
            QTC::TC("qpdf", "NNTree erased non-last item");
449
0
            updateIValue();
450
0
        } else {
451
            // We already checked to ensure this condition would not happen.
452
0
            throw std::logic_error("NNTreeIterator::remove: item_number > nitems after erase");
453
0
        }
454
0
        return;
455
0
    }
456
457
0
    if (path.empty()) {
458
        // Special case: if this is the root node, we can leave it empty.
459
0
        QTC::TC("qpdf", "NNTree erased all items on leaf/root");
460
0
        setItemNumber(impl.oh, -1);
461
0
        return;
462
0
    }
463
464
0
    QTC::TC("qpdf", "NNTree items is empty after remove");
465
466
    // We removed the last item from this items array, so we need to remove this node from the
467
    // parent on up the tree. Then we need to position ourselves at the removed item's successor.
468
0
    bool done = false;
469
0
    while (!done) {
470
0
        auto element = lastPathElement();
471
0
        auto parent = element;
472
0
        --parent;
473
0
        auto kids = element->node.getKey("/Kids");
474
0
        kids.eraseItem(element->kid_number);
475
0
        auto nkids = kids.getArrayNItems();
476
0
        if (nkids > 0) {
477
            // The logic here is similar to the items case.
478
0
            if ((element->kid_number == 0) || (element->kid_number == nkids)) {
479
0
                QTC::TC("qpdf", "NNTree erased first or last kid");
480
0
                resetLimits(element->node, parent);
481
0
            }
482
0
            if (element->kid_number == nkids) {
483
                // Move to the successor of the last child of the previous kid.
484
0
                setItemNumber(QPDFObjectHandle(), -1);
485
0
                --element->kid_number;
486
0
                deepen(kids.getArrayItem(element->kid_number), false, true);
487
0
                if (valid()) {
488
0
                    increment(false);
489
0
                    if (!valid()) {
490
0
                        QTC::TC("qpdf", "NNTree erased last item in tree");
491
0
                    } else {
492
0
                        QTC::TC("qpdf", "NNTree erased last kid");
493
0
                    }
494
0
                }
495
0
            } else {
496
                // Next kid is in deleted kid's position
497
0
                QTC::TC("qpdf", "NNTree erased non-last kid");
498
0
                deepen(kids.getArrayItem(element->kid_number), true, true);
499
0
            }
500
0
            done = true;
501
0
        } else if (parent == path.end()) {
502
            // We erased the very last item. Convert the root to an empty items array.
503
0
            QTC::TC("qpdf", "NNTree non-flat tree is empty after remove");
504
0
            element->node.removeKey("/Kids");
505
0
            element->node.replaceKey(impl.details.itemsKey(), QPDFObjectHandle::newArray());
506
0
            path.clear();
507
0
            setItemNumber(impl.oh, -1);
508
0
            done = true;
509
0
        } else {
510
            // Walk up the tree and continue
511
0
            QTC::TC("qpdf", "NNTree remove walking up tree");
512
0
            path.pop_back();
513
0
        }
514
0
    }
515
0
}
516
517
NNTreeIterator&
518
NNTreeIterator::operator++()
519
40.8k
{
520
40.8k
    increment(false);
521
40.8k
    return *this;
522
40.8k
}
523
524
NNTreeIterator&
525
NNTreeIterator::operator--()
526
0
{
527
0
    increment(true);
528
0
    return *this;
529
0
}
530
531
NNTreeIterator::reference
532
NNTreeIterator::operator*()
533
41.1k
{
534
41.1k
    updateIValue(false);
535
41.1k
    return ivalue;
536
41.1k
}
537
538
NNTreeIterator::pointer
539
NNTreeIterator::operator->()
540
140k
{
541
140k
    updateIValue(false);
542
140k
    return &ivalue;
543
140k
}
544
545
bool
546
NNTreeIterator::operator==(NNTreeIterator const& other) const
547
94.2k
{
548
94.2k
    if (item_number == -1 && other.item_number == -1) {
549
6.10k
        return true;
550
6.10k
    }
551
88.1k
    if (path.size() != other.path.size()) {
552
60.6k
        return false;
553
60.6k
    }
554
27.4k
    auto tpi = path.begin();
555
27.4k
    auto opi = other.path.begin();
556
27.4k
    while (tpi != path.end()) {
557
0
        if (tpi->kid_number != opi->kid_number) {
558
0
            return false;
559
0
        }
560
0
        ++tpi;
561
0
        ++opi;
562
0
    }
563
27.4k
    if (item_number != other.item_number) {
564
27.4k
        return false;
565
27.4k
    }
566
0
    return true;
567
27.4k
}
568
569
void
570
NNTreeIterator::setItemNumber(QPDFObjectHandle const& a_node, int n)
571
98.5k
{
572
98.5k
    node = a_node;
573
98.5k
    item_number = n;
574
98.5k
    updateIValue();
575
98.5k
}
576
577
void
578
NNTreeIterator::addPathElement(QPDFObjectHandle const& a_node, int kid_number)
579
47.4k
{
580
47.4k
    path.emplace_back(a_node, kid_number);
581
47.4k
}
582
583
bool
584
NNTreeIterator::deepen(QPDFObjectHandle a_node, bool first, bool allow_empty)
585
55.1k
{
586
    // Starting at this node, descend through the first or last kid until we reach a node with
587
    // items. If we succeed, return true; otherwise return false and leave path alone.
588
589
55.1k
    auto opath = path;
590
55.1k
    bool failed = false;
591
592
55.1k
    QPDFObjGen::set seen;
593
55.1k
    for (auto const& i: path) {
594
5.13k
        seen.add(i.node);
595
5.13k
    }
596
83.1k
    while (!failed) {
597
82.9k
        if (!seen.add(a_node)) {
598
422
            QTC::TC("qpdf", "NNTree deepen: loop");
599
422
            warn(impl.qpdf, a_node, "loop detected while traversing name/number tree");
600
422
            failed = true;
601
422
            break;
602
422
        }
603
604
82.5k
        if (!a_node.isDictionary()) {
605
1.23k
            QTC::TC("qpdf", "NNTree node is not a dictionary");
606
1.23k
            warn(impl.qpdf, a_node, "non-dictionary node while traversing name/number tree");
607
1.23k
            failed = true;
608
1.23k
            break;
609
1.23k
        }
610
611
81.2k
        auto kids = a_node.getKey("/Kids");
612
81.2k
        int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
613
81.2k
        auto items = a_node.getKey(impl.details.itemsKey());
614
81.2k
        int nitems = items.isArray() ? items.getArrayNItems() : 0;
615
81.2k
        if (nitems > 0) {
616
48.5k
            setItemNumber(a_node, first ? 0 : nitems - 2);
617
48.5k
            break;
618
48.5k
        } else if (nkids > 0) {
619
27.9k
            int kid_number = first ? 0 : nkids - 1;
620
27.9k
            addPathElement(a_node, kid_number);
621
27.9k
            auto next = kids.getArrayItem(kid_number);
622
27.9k
            if (!next.isIndirect()) {
623
266
                if (impl.auto_repair) {
624
266
                    QTC::TC("qpdf", "NNTree fix indirect kid");
625
266
                    warn(
626
266
                        impl.qpdf,
627
266
                        a_node,
628
266
                        ("converting kid number " + std::to_string(kid_number) +
629
266
                         " to an indirect object"));
630
266
                    next = impl.qpdf.makeIndirectObject(next);
631
266
                    kids.setArrayItem(kid_number, next);
632
266
                } else {
633
0
                    QTC::TC("qpdf", "NNTree warn indirect kid");
634
0
                    warn(
635
0
                        impl.qpdf,
636
0
                        a_node,
637
0
                        ("kid number " + std::to_string(kid_number) +
638
0
                         " is not an indirect object"));
639
0
                }
640
266
            }
641
27.9k
            a_node = next;
642
27.9k
        } else if (allow_empty && items.isArray()) {
643
3.54k
            QTC::TC("qpdf", "NNTree deepen found empty");
644
3.54k
            setItemNumber(a_node, -1);
645
3.54k
            break;
646
3.54k
        } else {
647
1.18k
            QTC::TC("qpdf", "NNTree deepen: invalid node");
648
1.18k
            warn(
649
1.18k
                impl.qpdf,
650
1.18k
                a_node,
651
1.18k
                ("name/number tree node has neither non-empty " + impl.details.itemsKey() +
652
1.18k
                 " nor /Kids"));
653
1.18k
            failed = true;
654
1.18k
            break;
655
1.18k
        }
656
81.2k
    }
657
55.1k
    if (failed) {
658
2.47k
        path = opath;
659
2.47k
        return false;
660
2.47k
    }
661
52.6k
    return true;
662
55.1k
}
663
664
NNTreeImpl::NNTreeImpl(
665
    NNTreeDetails const& details, QPDF& qpdf, QPDFObjectHandle& oh, bool auto_repair) :
666
3.42k
    details(details),
667
3.42k
    qpdf(qpdf),
668
3.42k
    oh(oh),
669
3.42k
    auto_repair(auto_repair)
670
3.42k
{
671
3.42k
}
672
673
void
674
NNTreeImpl::setSplitThreshold(int threshold)
675
0
{
676
0
    split_threshold = threshold;
677
0
}
678
679
NNTreeImpl::iterator
680
NNTreeImpl::begin()
681
52.3k
{
682
52.3k
    iterator result(*this);
683
52.3k
    result.deepen(oh, true, true);
684
52.3k
    return result;
685
52.3k
}
686
687
NNTreeImpl::iterator
688
NNTreeImpl::end()
689
105k
{
690
105k
    return {*this};
691
105k
}
692
693
NNTreeImpl::iterator
694
NNTreeImpl::last()
695
0
{
696
0
    iterator result(*this);
697
0
    result.deepen(oh, false, true);
698
0
    return result;
699
0
}
700
701
int
702
NNTreeImpl::withinLimits(QPDFObjectHandle key, QPDFObjectHandle node)
703
36.6k
{
704
36.6k
    int result = 0;
705
36.6k
    auto limits = node.getKey("/Limits");
706
36.6k
    if (limits.isArray() && (limits.getArrayNItems() >= 2) &&
707
36.6k
        details.keyValid(limits.getArrayItem(0)) && details.keyValid(limits.getArrayItem(1))) {
708
36.0k
        if (details.compareKeys(key, limits.getArrayItem(0)) < 0) {
709
13.4k
            result = -1;
710
22.5k
        } else if (details.compareKeys(key, limits.getArrayItem(1)) > 0) {
711
6.42k
            result = 1;
712
6.42k
        }
713
36.0k
    } else {
714
662
        QTC::TC("qpdf", "NNTree missing limits");
715
662
        error(qpdf, node, "node is missing /Limits");
716
662
    }
717
36.6k
    return result;
718
36.6k
}
719
720
int
721
NNTreeImpl::binarySearch(
722
    QPDFObjectHandle key,
723
    QPDFObjectHandle items,
724
    int num_items,
725
    bool return_prev_if_not_found,
726
    int (NNTreeImpl::*compare)(QPDFObjectHandle& key, QPDFObjectHandle& arr, int item))
727
63.5k
{
728
63.5k
    int max_idx = 1;
729
276k
    while (max_idx < num_items) {
730
213k
        max_idx <<= 1;
731
213k
    }
732
733
63.5k
    int step = max_idx / 2;
734
63.5k
    int checks = max_idx;
735
63.5k
    int idx = step;
736
63.5k
    int found_idx = -1;
737
63.5k
    bool found = false;
738
63.5k
    bool found_leq = false;
739
63.5k
    int status = 0;
740
741
305k
    while ((!found) && (checks > 0)) {
742
241k
        if (idx < num_items) {
743
218k
            status = (this->*compare)(key, items, idx);
744
218k
            if (status >= 0) {
745
120k
                found_leq = true;
746
120k
                found_idx = idx;
747
120k
            }
748
218k
        } else {
749
            // consider item to be below anything after the top
750
23.1k
            status = -1;
751
23.1k
        }
752
753
241k
        if (status == 0) {
754
26.6k
            found = true;
755
215k
        } else {
756
215k
            checks >>= 1;
757
215k
            if (checks > 0) {
758
178k
                step >>= 1;
759
178k
                if (step == 0) {
760
39.7k
                    step = 1;
761
39.7k
                }
762
763
178k
                if (status < 0) {
764
101k
                    idx -= step;
765
101k
                } else {
766
76.4k
                    idx += step;
767
76.4k
                }
768
178k
            }
769
215k
        }
770
241k
    }
771
772
63.5k
    if (found || (found_leq && return_prev_if_not_found)) {
773
60.7k
        return found_idx;
774
60.7k
    } else {
775
2.77k
        return -1;
776
2.77k
    }
777
63.5k
}
778
779
int
780
NNTreeImpl::compareKeyItem(QPDFObjectHandle& key, QPDFObjectHandle& items, int idx)
781
181k
{
782
181k
    if (!((items.isArray() && (items.getArrayNItems() > (2 * idx)) &&
783
181k
           details.keyValid(items.getArrayItem(2 * idx))))) {
784
1.02k
        QTC::TC("qpdf", "NNTree item is wrong type");
785
1.02k
        error(qpdf, oh, ("item at index " + std::to_string(2 * idx) + " is not the right type"));
786
1.02k
    }
787
181k
    return details.compareKeys(key, items.getArrayItem(2 * idx));
788
181k
}
789
790
int
791
NNTreeImpl::compareKeyKid(QPDFObjectHandle& key, QPDFObjectHandle& kids, int idx)
792
37.3k
{
793
37.3k
    if (!(kids.isArray() && (idx < kids.getArrayNItems()) &&
794
37.3k
          kids.getArrayItem(idx).isDictionary())) {
795
593
        QTC::TC("qpdf", "NNTree kid is invalid");
796
593
        error(qpdf, oh, "invalid kid at index " + std::to_string(idx));
797
593
    }
798
37.3k
    return withinLimits(key, kids.getArrayItem(idx));
799
37.3k
}
800
801
void
802
NNTreeImpl::repair()
803
1.84k
{
804
1.84k
    auto new_node = QPDFObjectHandle::newDictionary();
805
1.84k
    new_node.replaceKey(details.itemsKey(), QPDFObjectHandle::newArray());
806
1.84k
    NNTreeImpl repl(details, qpdf, new_node, false);
807
41.1k
    for (auto const& i: *this) {
808
41.1k
        repl.insert(i.first, i.second);
809
41.1k
    }
810
1.84k
    oh.replaceKey("/Kids", new_node.getKey("/Kids"));
811
1.84k
    oh.replaceKey(details.itemsKey(), new_node.getKey(details.itemsKey()));
812
1.84k
}
813
814
NNTreeImpl::iterator
815
NNTreeImpl::find(QPDFObjectHandle key, bool return_prev_if_not_found)
816
47.8k
{
817
47.8k
    try {
818
47.8k
        return findInternal(key, return_prev_if_not_found);
819
47.8k
    } catch (QPDFExc& e) {
820
2.98k
        if (auto_repair) {
821
2.65k
            QTC::TC("qpdf", "NNTree repair");
822
2.65k
            warn(qpdf, oh, std::string("attempting to repair after error: ") + e.what());
823
2.65k
            repair();
824
2.65k
            return findInternal(key, return_prev_if_not_found);
825
2.65k
        } else {
826
337
            throw;
827
337
        }
828
2.98k
    }
829
47.8k
}
830
831
NNTreeImpl::iterator
832
NNTreeImpl::findInternal(QPDFObjectHandle key, bool return_prev_if_not_found)
833
48.5k
{
834
48.5k
    auto first_item = begin();
835
48.5k
    auto last_item = end();
836
48.5k
    if (first_item == end()) {
837
        // Empty
838
3.42k
        return end();
839
45.0k
    } else if (
840
45.0k
        first_item.valid() && details.keyValid(first_item->first) &&
841
45.0k
        details.compareKeys(key, first_item->first) < 0) {
842
        // Before the first key
843
335
        return end();
844
44.7k
    } else if (
845
44.7k
        last_item.valid() && details.keyValid(last_item->first) &&
846
44.7k
        details.compareKeys(key, last_item->first) > 0) {
847
        // After the last key
848
0
        if (return_prev_if_not_found) {
849
0
            return last_item;
850
0
        } else {
851
0
            return end();
852
0
        }
853
0
    }
854
855
44.7k
    QPDFObjGen::set seen;
856
44.7k
    auto node = oh;
857
44.7k
    iterator result(*this);
858
859
65.7k
    while (true) {
860
63.5k
        if (!seen.add(node)) {
861
34
            QTC::TC("qpdf", "NNTree loop in find");
862
34
            error(qpdf, node, "loop detected in find");
863
34
        }
864
865
63.5k
        auto kids = node.getKey("/Kids");
866
63.5k
        int nkids = kids.isArray() ? kids.getArrayNItems() : 0;
867
63.5k
        auto items = node.getKey(details.itemsKey());
868
63.5k
        int nitems = items.isArray() ? items.getArrayNItems() : 0;
869
63.5k
        if (nitems > 0) {
870
42.6k
            int idx = binarySearch(
871
42.6k
                key, items, nitems / 2, return_prev_if_not_found, &NNTreeImpl::compareKeyItem);
872
42.6k
            if (idx >= 0) {
873
41.2k
                result.setItemNumber(node, 2 * idx);
874
41.2k
            }
875
42.6k
            break;
876
42.6k
        } else if (nkids > 0) {
877
20.8k
            int idx = binarySearch(key, kids, nkids, true, &NNTreeImpl::compareKeyKid);
878
20.8k
            if (idx == -1) {
879
92
                QTC::TC("qpdf", "NNTree -1 in binary search");
880
92
                error(
881
92
                    qpdf,
882
92
                    node,
883
92
                    "unexpected -1 from binary search of kids;"
884
92
                    " limits may by wrong");
885
92
            }
886
20.8k
            result.addPathElement(node, idx);
887
20.8k
            node = kids.getArrayItem(idx);
888
20.8k
        } else {
889
80
            QTC::TC("qpdf", "NNTree bad node during find");
890
80
            error(qpdf, node, "bad node during find");
891
80
        }
892
63.5k
    }
893
894
44.7k
    return result;
895
48.5k
}
896
897
NNTreeImpl::iterator
898
NNTreeImpl::insertFirst(QPDFObjectHandle key, QPDFObjectHandle value)
899
2.03k
{
900
2.03k
    auto iter = begin();
901
2.03k
    QPDFObjectHandle items;
902
2.03k
    if (iter.node.isDictionary()) {
903
2.03k
        items = iter.node.getKey(details.itemsKey());
904
2.03k
    }
905
2.03k
    if (!(items.isArray())) {
906
0
        QTC::TC("qpdf", "NNTree no valid items node in insertFirst");
907
0
        error(qpdf, oh, "unable to find a valid items node");
908
0
    }
909
2.03k
    items.insertItem(0, key);
910
2.03k
    items.insertItem(1, value);
911
2.03k
    iter.setItemNumber(iter.node, 0);
912
2.03k
    iter.resetLimits(iter.node, iter.lastPathElement());
913
2.03k
    iter.split(iter.node, iter.lastPathElement());
914
2.03k
    return iter;
915
2.03k
}
916
917
NNTreeImpl::iterator
918
NNTreeImpl::insert(QPDFObjectHandle key, QPDFObjectHandle value)
919
41.1k
{
920
41.1k
    auto iter = find(key, true);
921
41.1k
    if (!iter.valid()) {
922
2.03k
        QTC::TC("qpdf", "NNTree insert inserts first");
923
2.03k
        return insertFirst(key, value);
924
39.1k
    } else if (details.compareKeys(key, iter->first) == 0) {
925
9.80k
        QTC::TC("qpdf", "NNTree insert replaces");
926
9.80k
        auto items = iter.node.getKey(details.itemsKey());
927
9.80k
        items.setArrayItem(iter.item_number + 1, value);
928
9.80k
        iter.updateIValue();
929
29.3k
    } else {
930
29.3k
        QTC::TC("qpdf", "NNTree insert inserts after");
931
29.3k
        iter.insertAfter(key, value);
932
29.3k
    }
933
39.1k
    return iter;
934
41.1k
}
935
936
bool
937
NNTreeImpl::remove(QPDFObjectHandle key, QPDFObjectHandle* value)
938
0
{
939
0
    auto iter = find(key, false);
940
0
    if (!iter.valid()) {
941
0
        QTC::TC("qpdf", "NNTree remove not found");
942
0
        return false;
943
0
    }
944
0
    if (value) {
945
0
        *value = iter->second;
946
0
    }
947
0
    iter.remove();
948
0
    return true;
949
0
}