Coverage Report

Created: 2025-06-09 08:44

/src/gdal/ogr/ogrsf_frmts/flatgeobuf/packedrtree.cpp
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * Project:  FlatGeobuf
4
 * Purpose:  Packed RTree management
5
 * Author:   Björn Harrtell <bjorn at wololo dot org>
6
 *
7
 ******************************************************************************
8
 * Copyright (c) 2018-2020, Björn Harrtell <bjorn at wololo dot org>
9
 *
10
 * SPDX-License-Identifier: MIT
11
 ****************************************************************************/
12
13
// NOTE: The upstream of this file is in
14
// https://github.com/bjornharrtell/flatgeobuf/tree/master/src/cpp
15
16
#ifdef GDAL_COMPILATION
17
#include "cpl_port.h"
18
#else
19
#define CPL_IS_LSB 1
20
#endif
21
22
#include "packedrtree.h"
23
24
#include <algorithm>
25
#include <limits>
26
#include <map>
27
#include <unordered_map>
28
#include <iostream>
29
30
namespace FlatGeobuf
31
{
32
33
const NodeItem &NodeItem::expand(const NodeItem &r)
34
90
{
35
90
    if (r.minX < minX)
36
20
        minX = r.minX;
37
90
    if (r.minY < minY)
38
18
        minY = r.minY;
39
90
    if (r.maxX > maxX)
40
22
        maxX = r.maxX;
41
90
    if (r.maxY > maxY)
42
18
        maxY = r.maxY;
43
90
    return *this;
44
90
}
45
46
NodeItem NodeItem::create(uint64_t offset)
47
18
{
48
18
    return {std::numeric_limits<double>::infinity(),
49
18
            std::numeric_limits<double>::infinity(),
50
18
            -1 * std::numeric_limits<double>::infinity(),
51
18
            -1 * std::numeric_limits<double>::infinity(), offset};
52
18
}
53
54
bool NodeItem::intersects(const NodeItem &r) const
55
0
{
56
0
    if (maxX < r.minX)
57
0
        return false;
58
0
    if (maxY < r.minY)
59
0
        return false;
60
0
    if (minX > r.maxX)
61
0
        return false;
62
0
    if (minY > r.maxY)
63
0
        return false;
64
0
    return true;
65
0
}
66
67
std::vector<double> NodeItem::toVector()
68
6
{
69
6
    return std::vector<double>{minX, minY, maxX, maxY};
70
6
}
71
72
// Based on public domain code at
73
// https://github.com/rawrunprotected/hilbert_curves
74
uint32_t hilbert(uint32_t x, uint32_t y)
75
66
{
76
66
    uint32_t a = x ^ y;
77
66
    uint32_t b = 0xFFFF ^ a;
78
66
    uint32_t c = 0xFFFF ^ (x | y);
79
66
    uint32_t d = x & (y ^ 0xFFFF);
80
81
66
    uint32_t A = a | (b >> 1);
82
66
    uint32_t B = (a >> 1) ^ a;
83
66
    uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c;
84
66
    uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d;
85
86
66
    a = A;
87
66
    b = B;
88
66
    c = C;
89
66
    d = D;
90
66
    A = ((a & (a >> 2)) ^ (b & (b >> 2)));
91
66
    B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2)));
92
66
    C ^= ((a & (c >> 2)) ^ (b & (d >> 2)));
93
66
    D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2)));
94
95
66
    a = A;
96
66
    b = B;
97
66
    c = C;
98
66
    d = D;
99
66
    A = ((a & (a >> 4)) ^ (b & (b >> 4)));
100
66
    B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4)));
101
66
    C ^= ((a & (c >> 4)) ^ (b & (d >> 4)));
102
66
    D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4)));
103
104
66
    a = A;
105
66
    b = B;
106
66
    c = C;
107
66
    d = D;
108
66
    C ^= ((a & (c >> 8)) ^ (b & (d >> 8)));
109
66
    D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8)));
110
111
66
    a = C ^ (C >> 1);
112
66
    b = D ^ (D >> 1);
113
114
66
    uint32_t i0 = x ^ y;
115
66
    uint32_t i1 = b | (0xFFFF ^ (i0 | a));
116
117
66
    i0 = (i0 | (i0 << 8)) & 0x00FF00FF;
118
66
    i0 = (i0 | (i0 << 4)) & 0x0F0F0F0F;
119
66
    i0 = (i0 | (i0 << 2)) & 0x33333333;
120
66
    i0 = (i0 | (i0 << 1)) & 0x55555555;
121
122
66
    i1 = (i1 | (i1 << 8)) & 0x00FF00FF;
123
66
    i1 = (i1 | (i1 << 4)) & 0x0F0F0F0F;
124
66
    i1 = (i1 | (i1 << 2)) & 0x33333333;
125
66
    i1 = (i1 | (i1 << 1)) & 0x55555555;
126
127
66
    uint32_t value = ((i1 << 1) | i0);
128
129
66
    return value;
130
66
}
131
132
uint32_t hilbert(const NodeItem &r, uint32_t hilbertMax, const double minX,
133
                 const double minY, const double width, const double height)
134
66
{
135
66
    uint32_t x = 0;
136
66
    uint32_t y = 0;
137
66
    uint32_t v;
138
66
    if (width != 0.0)
139
52
        x = static_cast<uint32_t>(
140
52
            floor(hilbertMax * ((r.minX + r.maxX) / 2 - minX) / width));
141
66
    if (height != 0.0)
142
0
        y = static_cast<uint32_t>(
143
0
            floor(hilbertMax * ((r.minY + r.maxY) / 2 - minY) / height));
144
66
    v = hilbert(x, y);
145
66
    return v;
146
66
}
147
148
void hilbertSort(std::vector<std::shared_ptr<Item>> &items)
149
0
{
150
0
    NodeItem extent = calcExtent(items);
151
0
    const double minX = extent.minX;
152
0
    const double minY = extent.minY;
153
0
    const double width = extent.width();
154
0
    const double height = extent.height();
155
0
    std::sort(items.begin(), items.end(),
156
0
              [minX, minY, width, height](std::shared_ptr<Item> a,
157
0
                                          std::shared_ptr<Item> b)
158
0
              {
159
0
                  uint32_t ha = hilbert(a->nodeItem, HILBERT_MAX, minX, minY,
160
0
                                        width, height);
161
0
                  uint32_t hb = hilbert(b->nodeItem, HILBERT_MAX, minX, minY,
162
0
                                        width, height);
163
0
                  return ha > hb;
164
0
              });
165
0
}
166
167
void hilbertSort(std::vector<NodeItem> &items)
168
0
{
169
0
    NodeItem extent = calcExtent(items);
170
0
    const double minX = extent.minX;
171
0
    const double minY = extent.minY;
172
0
    const double width = extent.width();
173
0
    const double height = extent.height();
174
0
    std::sort(items.begin(), items.end(),
175
0
              [minX, minY, width, height](const NodeItem &a, const NodeItem &b)
176
0
              {
177
0
                  uint32_t ha =
178
0
                      hilbert(a, HILBERT_MAX, minX, minY, width, height);
179
0
                  uint32_t hb =
180
0
                      hilbert(b, HILBERT_MAX, minX, minY, width, height);
181
0
                  return ha > hb;
182
0
              });
183
0
}
184
185
NodeItem calcExtent(const std::vector<std::shared_ptr<Item>> &items)
186
0
{
187
0
    return std::accumulate(
188
0
        items.begin(), items.end(), NodeItem::create(0),
189
0
        [](NodeItem a, const std::shared_ptr<Item> &b) -> NodeItem
190
0
        { return a.expand(b->nodeItem); });
191
0
}
192
193
NodeItem calcExtent(const std::vector<NodeItem> &nodes)
194
0
{
195
0
    return std::accumulate(nodes.begin(), nodes.end(), NodeItem::create(0),
196
0
                           [](NodeItem a, const NodeItem &b) -> NodeItem
197
0
                           { return a.expand(b); });
198
0
}
199
200
void PackedRTree::init(const uint16_t nodeSize)
201
6
{
202
6
    if (nodeSize < 2)
203
0
        throw std::invalid_argument("Node size must be at least 2");
204
6
    if (_numItems == 0)
205
0
        throw std::invalid_argument("Cannot create empty tree");
206
6
    _nodeSize = std::min(std::max(nodeSize, static_cast<uint16_t>(2)),
207
6
                         static_cast<uint16_t>(65535));
208
6
    _levelBounds = generateLevelBounds(_numItems, _nodeSize);
209
6
    _numNodes = _levelBounds.front().second;
210
6
    _nodeItems = new NodeItem[static_cast<size_t>(_numNodes)];
211
6
}
212
213
template <class T, class U> inline T div_round_up(T a, U b)
214
33.9k
{
215
33.9k
    return a / b + (((a % b) == 0) ? 0 : 1);
216
33.9k
}
217
218
std::vector<std::pair<uint64_t, uint64_t>>
219
PackedRTree::generateLevelBounds(const uint64_t numItems,
220
                                 const uint16_t nodeSize)
221
6
{
222
6
    if (nodeSize < 2)
223
0
        throw std::invalid_argument("Node size must be at least 2");
224
6
    if (numItems == 0)
225
0
        throw std::invalid_argument("Number of items must be greater than 0");
226
6
    if (numItems >
227
6
        std::numeric_limits<uint64_t>::max() - ((numItems / nodeSize) * 2))
228
0
        throw std::overflow_error("Number of items too large");
229
230
    // number of nodes per level in bottom-up order
231
6
    std::vector<uint64_t> levelNumNodes;
232
6
    uint64_t n = numItems;
233
6
    uint64_t numNodes = n;
234
6
    levelNumNodes.push_back(n);
235
6
    do
236
6
    {
237
6
        n = div_round_up(n, nodeSize);
238
6
        numNodes += n;
239
6
        levelNumNodes.push_back(n);
240
6
    } while (n != 1);
241
242
    // bounds per level in reversed storage order (top-down)
243
6
    std::vector<uint64_t> levelOffsets;
244
6
    n = numNodes;
245
6
    for (auto size : levelNumNodes)
246
12
        levelOffsets.push_back(n -= size);
247
6
    std::vector<std::pair<uint64_t, uint64_t>> levelBounds;
248
18
    for (size_t i = 0; i < levelNumNodes.size(); i++)
249
12
        levelBounds.push_back(std::pair<uint64_t, uint64_t>(
250
12
            levelOffsets[i], levelOffsets[i] + levelNumNodes[i]));
251
6
    return levelBounds;
252
6
}
253
254
void PackedRTree::generateNodes()
255
6
{
256
12
    for (uint32_t i = 0; i < _levelBounds.size() - 1; i++)
257
6
    {
258
6
        auto pos = _levelBounds[i].first;
259
6
        auto end = _levelBounds[i].second;
260
6
        auto newpos = _levelBounds[i + 1].first;
261
12
        while (pos < end)
262
6
        {
263
6
            NodeItem node = NodeItem::create(pos);
264
36
            for (uint32_t j = 0; j < _nodeSize && pos < end; j++)
265
30
                node.expand(_nodeItems[pos++]);
266
6
            _nodeItems[newpos++] = node;
267
6
        }
268
6
    }
269
6
}
270
271
void PackedRTree::fromData(const void *data)
272
0
{
273
0
    auto buf = reinterpret_cast<const uint8_t *>(data);
274
0
    const NodeItem *pn = reinterpret_cast<const NodeItem *>(buf);
275
0
    for (uint64_t i = 0; i < _numNodes; i++)
276
0
    {
277
0
        NodeItem n = *pn++;
278
0
        _nodeItems[i] = n;
279
0
        _extent.expand(n);
280
0
    }
281
0
}
282
283
PackedRTree::PackedRTree(const std::vector<std::shared_ptr<Item>> &items,
284
                         const NodeItem &extent, const uint16_t nodeSize)
285
0
    : _extent(extent), _numItems(items.size())
286
0
{
287
0
    init(nodeSize);
288
0
    for (size_t i = 0; i < _numItems; i++)
289
0
        _nodeItems[_numNodes - _numItems + i] = items[i]->nodeItem;
290
0
    generateNodes();
291
0
}
292
293
PackedRTree::PackedRTree(const std::vector<NodeItem> &nodes,
294
                         const NodeItem &extent, const uint16_t nodeSize)
295
0
    : _extent(extent), _numItems(nodes.size())
296
0
{
297
0
    init(nodeSize);
298
0
    for (size_t i = 0; i < _numItems; i++)
299
0
        _nodeItems[_numNodes - _numItems + i] = nodes[i];
300
0
    generateNodes();
301
0
}
302
303
PackedRTree::PackedRTree(const void *data, const uint64_t numItems,
304
                         const uint16_t nodeSize)
305
0
    : _extent(NodeItem::create(0)), _numItems(numItems)
306
0
{
307
0
    init(nodeSize);
308
0
    fromData(data);
309
0
}
310
311
PackedRTree::PackedRTree(std::function<void(NodeItem *)> fillNodeItems,
312
                         const uint64_t numItems, const NodeItem &extent,
313
                         const uint16_t nodeSize)
314
6
    : _extent(extent), _numItems(numItems)
315
6
{
316
6
    init(nodeSize);
317
6
    fillNodeItems(_nodeItems + _numNodes - _numItems);
318
6
    generateNodes();
319
6
}
320
321
std::vector<SearchResultItem>
322
PackedRTree::search(double minX, double minY, double maxX, double maxY) const
323
0
{
324
0
    uint64_t leafNodesOffset = _levelBounds.front().first;
325
0
    NodeItem n{minX, minY, maxX, maxY, 0};
326
0
    std::vector<SearchResultItem> results;
327
0
    std::unordered_map<uint64_t, uint64_t> queue;
328
0
    queue.insert(std::pair<uint64_t, uint64_t>(0, _levelBounds.size() - 1));
329
0
    while (queue.size() != 0)
330
0
    {
331
0
        auto next = queue.begin();
332
0
        uint64_t nodeIndex = next->first;
333
0
        uint64_t level = next->second;
334
0
        queue.erase(next);
335
0
        bool isLeafNode = nodeIndex >= _numNodes - _numItems;
336
        // find the end index of the node
337
0
        uint64_t end =
338
0
            std::min(static_cast<uint64_t>(nodeIndex + _nodeSize),
339
0
                     _levelBounds[static_cast<size_t>(level)].second);
340
        // search through child nodes
341
0
        for (uint64_t pos = nodeIndex; pos < end; pos++)
342
0
        {
343
0
            const auto &nodeItem = _nodeItems[static_cast<size_t>(pos)];
344
0
            if (!n.intersects(nodeItem))
345
0
                continue;
346
0
            if (isLeafNode)
347
0
                results.push_back({nodeItem.offset, pos - leafNodesOffset});
348
0
            else
349
0
                queue.insert(
350
0
                    std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1));
351
0
        }
352
0
    }
353
0
    return results;
354
0
}
355
356
std::vector<SearchResultItem> PackedRTree::streamSearch(
357
    const uint64_t numItems, const uint16_t nodeSize, const NodeItem &item,
358
    const std::function<void(uint8_t *, size_t, size_t)> &readNode)
359
0
{
360
0
    auto levelBounds = generateLevelBounds(numItems, nodeSize);
361
0
    uint64_t leafNodesOffset = levelBounds.front().first;
362
0
    uint64_t numNodes = levelBounds.front().second;
363
0
    auto nodeItems = std::vector<NodeItem>(nodeSize);
364
0
    uint8_t *nodesBuf = reinterpret_cast<uint8_t *>(nodeItems.data());
365
    // use ordered search queue to make index traversal in sequential order
366
0
    std::map<uint64_t, uint64_t> queue;
367
0
    std::vector<SearchResultItem> results;
368
0
    queue.insert(std::pair<uint64_t, uint64_t>(0, levelBounds.size() - 1));
369
0
    while (queue.size() != 0)
370
0
    {
371
0
        auto next = queue.begin();
372
0
        uint64_t nodeIndex = next->first;
373
0
        uint64_t level = next->second;
374
0
        queue.erase(next);
375
0
        bool isLeafNode = nodeIndex >= numNodes - numItems;
376
        // find the end index of the node
377
0
        uint64_t end = std::min(static_cast<uint64_t>(nodeIndex + nodeSize),
378
0
                                levelBounds[static_cast<size_t>(level)].second);
379
0
        uint64_t length = end - nodeIndex;
380
0
        readNode(nodesBuf, static_cast<size_t>(nodeIndex * sizeof(NodeItem)),
381
0
                 static_cast<size_t>(length * sizeof(NodeItem)));
382
#if !CPL_IS_LSB
383
        for (size_t i = 0; i < static_cast<size_t>(length); i++)
384
        {
385
            CPL_LSBPTR64(&nodeItems[i].minX);
386
            CPL_LSBPTR64(&nodeItems[i].minY);
387
            CPL_LSBPTR64(&nodeItems[i].maxX);
388
            CPL_LSBPTR64(&nodeItems[i].maxY);
389
            CPL_LSBPTR64(&nodeItems[i].offset);
390
        }
391
#endif
392
        // search through child nodes
393
0
        for (uint64_t pos = nodeIndex; pos < end; pos++)
394
0
        {
395
0
            uint64_t nodePos = pos - nodeIndex;
396
0
            const auto &nodeItem = nodeItems[static_cast<size_t>(nodePos)];
397
0
            if (!item.intersects(nodeItem))
398
0
                continue;
399
0
            if (isLeafNode)
400
0
                results.push_back({nodeItem.offset, pos - leafNodesOffset});
401
0
            else
402
0
                queue.insert(
403
0
                    std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1));
404
0
        }
405
0
    }
406
0
    return results;
407
0
}
408
409
uint64_t PackedRTree::size() const
410
0
{
411
0
    return _numNodes * sizeof(NodeItem);
412
0
}
413
414
uint64_t PackedRTree::size(const uint64_t numItems, const uint16_t nodeSize)
415
5.45k
{
416
5.45k
    if (nodeSize < 2)
417
0
        throw std::invalid_argument("Node size must be at least 2");
418
5.45k
    if (numItems == 0)
419
9
        throw std::invalid_argument("Number of items must be greater than 0");
420
5.44k
    const uint16_t nodeSizeMin =
421
5.44k
        std::min(std::max(nodeSize, static_cast<uint16_t>(2)),
422
5.44k
                 static_cast<uint16_t>(65535));
423
    // limit so that resulting size in bytes can be represented by uint64_t
424
5.44k
    if (numItems > static_cast<uint64_t>(1) << 56)
425
0
        throw std::overflow_error("Number of items must be less than 2^56");
426
5.44k
    uint64_t n = numItems;
427
5.44k
    uint64_t numNodes = n;
428
5.44k
    do
429
33.9k
    {
430
33.9k
        n = div_round_up(n, nodeSizeMin);
431
33.9k
        numNodes += n;
432
33.9k
    } while (n != 1);
433
5.44k
    return numNodes * sizeof(NodeItem);
434
5.44k
}
435
436
void PackedRTree::streamWrite(
437
    const std::function<void(uint8_t *, size_t)> &writeData)
438
6
{
439
#if !CPL_IS_LSB
440
    for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++)
441
    {
442
        CPL_LSBPTR64(&_nodeItems[i].minX);
443
        CPL_LSBPTR64(&_nodeItems[i].minY);
444
        CPL_LSBPTR64(&_nodeItems[i].maxX);
445
        CPL_LSBPTR64(&_nodeItems[i].maxY);
446
        CPL_LSBPTR64(&_nodeItems[i].offset);
447
    }
448
#endif
449
6
    writeData(reinterpret_cast<uint8_t *>(_nodeItems),
450
6
              static_cast<size_t>(_numNodes * sizeof(NodeItem)));
451
#if !CPL_IS_LSB
452
    for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++)
453
    {
454
        CPL_LSBPTR64(&_nodeItems[i].minX);
455
        CPL_LSBPTR64(&_nodeItems[i].minY);
456
        CPL_LSBPTR64(&_nodeItems[i].maxX);
457
        CPL_LSBPTR64(&_nodeItems[i].maxY);
458
        CPL_LSBPTR64(&_nodeItems[i].offset);
459
    }
460
#endif
461
6
}
462
463
NodeItem PackedRTree::getExtent() const
464
0
{
465
0
    return _extent;
466
0
}
467
468
}  // namespace FlatGeobuf