/src/gdal/ogr/ogrsf_frmts/flatgeobuf/packedrtree.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * Project: FlatGeobuf |
4 | | * Purpose: Packed RTree management |
5 | | * Author: Björn Harrtell <bjorn at wololo dot org> |
6 | | * |
7 | | ****************************************************************************** |
8 | | * Copyright (c) 2018-2020, Björn Harrtell <bjorn at wololo dot org> |
9 | | * |
10 | | * SPDX-License-Identifier: MIT |
11 | | ****************************************************************************/ |
12 | | |
13 | | // NOTE: The upstream of this file is in |
14 | | // https://github.com/bjornharrtell/flatgeobuf/tree/master/src/cpp |
15 | | |
16 | | #ifdef GDAL_COMPILATION |
17 | | #include "cpl_port.h" |
18 | | #else |
19 | | #define CPL_IS_LSB 1 |
20 | | #endif |
21 | | |
22 | | #include "packedrtree.h" |
23 | | |
24 | | #include <algorithm> |
25 | | #include <limits> |
26 | | #include <map> |
27 | | #include <unordered_map> |
28 | | #include <iostream> |
29 | | |
30 | | namespace FlatGeobuf |
31 | | { |
32 | | |
33 | | const NodeItem &NodeItem::expand(const NodeItem &r) |
34 | 90 | { |
35 | 90 | if (r.minX < minX) |
36 | 20 | minX = r.minX; |
37 | 90 | if (r.minY < minY) |
38 | 18 | minY = r.minY; |
39 | 90 | if (r.maxX > maxX) |
40 | 22 | maxX = r.maxX; |
41 | 90 | if (r.maxY > maxY) |
42 | 18 | maxY = r.maxY; |
43 | 90 | return *this; |
44 | 90 | } |
45 | | |
46 | | NodeItem NodeItem::create(uint64_t offset) |
47 | 18 | { |
48 | 18 | return {std::numeric_limits<double>::infinity(), |
49 | 18 | std::numeric_limits<double>::infinity(), |
50 | 18 | -1 * std::numeric_limits<double>::infinity(), |
51 | 18 | -1 * std::numeric_limits<double>::infinity(), offset}; |
52 | 18 | } |
53 | | |
54 | | bool NodeItem::intersects(const NodeItem &r) const |
55 | 0 | { |
56 | 0 | if (maxX < r.minX) |
57 | 0 | return false; |
58 | 0 | if (maxY < r.minY) |
59 | 0 | return false; |
60 | 0 | if (minX > r.maxX) |
61 | 0 | return false; |
62 | 0 | if (minY > r.maxY) |
63 | 0 | return false; |
64 | 0 | return true; |
65 | 0 | } |
66 | | |
67 | | std::vector<double> NodeItem::toVector() |
68 | 6 | { |
69 | 6 | return std::vector<double>{minX, minY, maxX, maxY}; |
70 | 6 | } |
71 | | |
72 | | // Based on public domain code at |
73 | | // https://github.com/rawrunprotected/hilbert_curves |
74 | | uint32_t hilbert(uint32_t x, uint32_t y) |
75 | 66 | { |
76 | 66 | uint32_t a = x ^ y; |
77 | 66 | uint32_t b = 0xFFFF ^ a; |
78 | 66 | uint32_t c = 0xFFFF ^ (x | y); |
79 | 66 | uint32_t d = x & (y ^ 0xFFFF); |
80 | | |
81 | 66 | uint32_t A = a | (b >> 1); |
82 | 66 | uint32_t B = (a >> 1) ^ a; |
83 | 66 | uint32_t C = ((c >> 1) ^ (b & (d >> 1))) ^ c; |
84 | 66 | uint32_t D = ((a & (c >> 1)) ^ (d >> 1)) ^ d; |
85 | | |
86 | 66 | a = A; |
87 | 66 | b = B; |
88 | 66 | c = C; |
89 | 66 | d = D; |
90 | 66 | A = ((a & (a >> 2)) ^ (b & (b >> 2))); |
91 | 66 | B = ((a & (b >> 2)) ^ (b & ((a ^ b) >> 2))); |
92 | 66 | C ^= ((a & (c >> 2)) ^ (b & (d >> 2))); |
93 | 66 | D ^= ((b & (c >> 2)) ^ ((a ^ b) & (d >> 2))); |
94 | | |
95 | 66 | a = A; |
96 | 66 | b = B; |
97 | 66 | c = C; |
98 | 66 | d = D; |
99 | 66 | A = ((a & (a >> 4)) ^ (b & (b >> 4))); |
100 | 66 | B = ((a & (b >> 4)) ^ (b & ((a ^ b) >> 4))); |
101 | 66 | C ^= ((a & (c >> 4)) ^ (b & (d >> 4))); |
102 | 66 | D ^= ((b & (c >> 4)) ^ ((a ^ b) & (d >> 4))); |
103 | | |
104 | 66 | a = A; |
105 | 66 | b = B; |
106 | 66 | c = C; |
107 | 66 | d = D; |
108 | 66 | C ^= ((a & (c >> 8)) ^ (b & (d >> 8))); |
109 | 66 | D ^= ((b & (c >> 8)) ^ ((a ^ b) & (d >> 8))); |
110 | | |
111 | 66 | a = C ^ (C >> 1); |
112 | 66 | b = D ^ (D >> 1); |
113 | | |
114 | 66 | uint32_t i0 = x ^ y; |
115 | 66 | uint32_t i1 = b | (0xFFFF ^ (i0 | a)); |
116 | | |
117 | 66 | i0 = (i0 | (i0 << 8)) & 0x00FF00FF; |
118 | 66 | i0 = (i0 | (i0 << 4)) & 0x0F0F0F0F; |
119 | 66 | i0 = (i0 | (i0 << 2)) & 0x33333333; |
120 | 66 | i0 = (i0 | (i0 << 1)) & 0x55555555; |
121 | | |
122 | 66 | i1 = (i1 | (i1 << 8)) & 0x00FF00FF; |
123 | 66 | i1 = (i1 | (i1 << 4)) & 0x0F0F0F0F; |
124 | 66 | i1 = (i1 | (i1 << 2)) & 0x33333333; |
125 | 66 | i1 = (i1 | (i1 << 1)) & 0x55555555; |
126 | | |
127 | 66 | uint32_t value = ((i1 << 1) | i0); |
128 | | |
129 | 66 | return value; |
130 | 66 | } |
131 | | |
132 | | uint32_t hilbert(const NodeItem &r, uint32_t hilbertMax, const double minX, |
133 | | const double minY, const double width, const double height) |
134 | 66 | { |
135 | 66 | uint32_t x = 0; |
136 | 66 | uint32_t y = 0; |
137 | 66 | uint32_t v; |
138 | 66 | if (width != 0.0) |
139 | 52 | x = static_cast<uint32_t>( |
140 | 52 | floor(hilbertMax * ((r.minX + r.maxX) / 2 - minX) / width)); |
141 | 66 | if (height != 0.0) |
142 | 0 | y = static_cast<uint32_t>( |
143 | 0 | floor(hilbertMax * ((r.minY + r.maxY) / 2 - minY) / height)); |
144 | 66 | v = hilbert(x, y); |
145 | 66 | return v; |
146 | 66 | } |
147 | | |
148 | | void hilbertSort(std::vector<std::shared_ptr<Item>> &items) |
149 | 0 | { |
150 | 0 | NodeItem extent = calcExtent(items); |
151 | 0 | const double minX = extent.minX; |
152 | 0 | const double minY = extent.minY; |
153 | 0 | const double width = extent.width(); |
154 | 0 | const double height = extent.height(); |
155 | 0 | std::sort(items.begin(), items.end(), |
156 | 0 | [minX, minY, width, height](std::shared_ptr<Item> a, |
157 | 0 | std::shared_ptr<Item> b) |
158 | 0 | { |
159 | 0 | uint32_t ha = hilbert(a->nodeItem, HILBERT_MAX, minX, minY, |
160 | 0 | width, height); |
161 | 0 | uint32_t hb = hilbert(b->nodeItem, HILBERT_MAX, minX, minY, |
162 | 0 | width, height); |
163 | 0 | return ha > hb; |
164 | 0 | }); |
165 | 0 | } |
166 | | |
167 | | void hilbertSort(std::vector<NodeItem> &items) |
168 | 0 | { |
169 | 0 | NodeItem extent = calcExtent(items); |
170 | 0 | const double minX = extent.minX; |
171 | 0 | const double minY = extent.minY; |
172 | 0 | const double width = extent.width(); |
173 | 0 | const double height = extent.height(); |
174 | 0 | std::sort(items.begin(), items.end(), |
175 | 0 | [minX, minY, width, height](const NodeItem &a, const NodeItem &b) |
176 | 0 | { |
177 | 0 | uint32_t ha = |
178 | 0 | hilbert(a, HILBERT_MAX, minX, minY, width, height); |
179 | 0 | uint32_t hb = |
180 | 0 | hilbert(b, HILBERT_MAX, minX, minY, width, height); |
181 | 0 | return ha > hb; |
182 | 0 | }); |
183 | 0 | } |
184 | | |
185 | | NodeItem calcExtent(const std::vector<std::shared_ptr<Item>> &items) |
186 | 0 | { |
187 | 0 | return std::accumulate( |
188 | 0 | items.begin(), items.end(), NodeItem::create(0), |
189 | 0 | [](NodeItem a, const std::shared_ptr<Item> &b) -> NodeItem |
190 | 0 | { return a.expand(b->nodeItem); }); |
191 | 0 | } |
192 | | |
193 | | NodeItem calcExtent(const std::vector<NodeItem> &nodes) |
194 | 0 | { |
195 | 0 | return std::accumulate(nodes.begin(), nodes.end(), NodeItem::create(0), |
196 | 0 | [](NodeItem a, const NodeItem &b) -> NodeItem |
197 | 0 | { return a.expand(b); }); |
198 | 0 | } |
199 | | |
200 | | void PackedRTree::init(const uint16_t nodeSize) |
201 | 6 | { |
202 | 6 | if (nodeSize < 2) |
203 | 0 | throw std::invalid_argument("Node size must be at least 2"); |
204 | 6 | if (_numItems == 0) |
205 | 0 | throw std::invalid_argument("Cannot create empty tree"); |
206 | 6 | _nodeSize = std::min(std::max(nodeSize, static_cast<uint16_t>(2)), |
207 | 6 | static_cast<uint16_t>(65535)); |
208 | 6 | _levelBounds = generateLevelBounds(_numItems, _nodeSize); |
209 | 6 | _numNodes = _levelBounds.front().second; |
210 | 6 | _nodeItems = new NodeItem[static_cast<size_t>(_numNodes)]; |
211 | 6 | } |
212 | | |
213 | | template <class T, class U> inline T div_round_up(T a, U b) |
214 | 33.9k | { |
215 | 33.9k | return a / b + (((a % b) == 0) ? 0 : 1); |
216 | 33.9k | } |
217 | | |
218 | | std::vector<std::pair<uint64_t, uint64_t>> |
219 | | PackedRTree::generateLevelBounds(const uint64_t numItems, |
220 | | const uint16_t nodeSize) |
221 | 6 | { |
222 | 6 | if (nodeSize < 2) |
223 | 0 | throw std::invalid_argument("Node size must be at least 2"); |
224 | 6 | if (numItems == 0) |
225 | 0 | throw std::invalid_argument("Number of items must be greater than 0"); |
226 | 6 | if (numItems > |
227 | 6 | std::numeric_limits<uint64_t>::max() - ((numItems / nodeSize) * 2)) |
228 | 0 | throw std::overflow_error("Number of items too large"); |
229 | | |
230 | | // number of nodes per level in bottom-up order |
231 | 6 | std::vector<uint64_t> levelNumNodes; |
232 | 6 | uint64_t n = numItems; |
233 | 6 | uint64_t numNodes = n; |
234 | 6 | levelNumNodes.push_back(n); |
235 | 6 | do |
236 | 6 | { |
237 | 6 | n = div_round_up(n, nodeSize); |
238 | 6 | numNodes += n; |
239 | 6 | levelNumNodes.push_back(n); |
240 | 6 | } while (n != 1); |
241 | | |
242 | | // bounds per level in reversed storage order (top-down) |
243 | 6 | std::vector<uint64_t> levelOffsets; |
244 | 6 | n = numNodes; |
245 | 6 | for (auto size : levelNumNodes) |
246 | 12 | levelOffsets.push_back(n -= size); |
247 | 6 | std::vector<std::pair<uint64_t, uint64_t>> levelBounds; |
248 | 18 | for (size_t i = 0; i < levelNumNodes.size(); i++) |
249 | 12 | levelBounds.push_back(std::pair<uint64_t, uint64_t>( |
250 | 12 | levelOffsets[i], levelOffsets[i] + levelNumNodes[i])); |
251 | 6 | return levelBounds; |
252 | 6 | } |
253 | | |
254 | | void PackedRTree::generateNodes() |
255 | 6 | { |
256 | 12 | for (uint32_t i = 0; i < _levelBounds.size() - 1; i++) |
257 | 6 | { |
258 | 6 | auto pos = _levelBounds[i].first; |
259 | 6 | auto end = _levelBounds[i].second; |
260 | 6 | auto newpos = _levelBounds[i + 1].first; |
261 | 12 | while (pos < end) |
262 | 6 | { |
263 | 6 | NodeItem node = NodeItem::create(pos); |
264 | 36 | for (uint32_t j = 0; j < _nodeSize && pos < end; j++) |
265 | 30 | node.expand(_nodeItems[pos++]); |
266 | 6 | _nodeItems[newpos++] = node; |
267 | 6 | } |
268 | 6 | } |
269 | 6 | } |
270 | | |
271 | | void PackedRTree::fromData(const void *data) |
272 | 0 | { |
273 | 0 | auto buf = reinterpret_cast<const uint8_t *>(data); |
274 | 0 | const NodeItem *pn = reinterpret_cast<const NodeItem *>(buf); |
275 | 0 | for (uint64_t i = 0; i < _numNodes; i++) |
276 | 0 | { |
277 | 0 | NodeItem n = *pn++; |
278 | 0 | _nodeItems[i] = n; |
279 | 0 | _extent.expand(n); |
280 | 0 | } |
281 | 0 | } |
282 | | |
283 | | PackedRTree::PackedRTree(const std::vector<std::shared_ptr<Item>> &items, |
284 | | const NodeItem &extent, const uint16_t nodeSize) |
285 | 0 | : _extent(extent), _numItems(items.size()) |
286 | 0 | { |
287 | 0 | init(nodeSize); |
288 | 0 | for (size_t i = 0; i < _numItems; i++) |
289 | 0 | _nodeItems[_numNodes - _numItems + i] = items[i]->nodeItem; |
290 | 0 | generateNodes(); |
291 | 0 | } |
292 | | |
293 | | PackedRTree::PackedRTree(const std::vector<NodeItem> &nodes, |
294 | | const NodeItem &extent, const uint16_t nodeSize) |
295 | 0 | : _extent(extent), _numItems(nodes.size()) |
296 | 0 | { |
297 | 0 | init(nodeSize); |
298 | 0 | for (size_t i = 0; i < _numItems; i++) |
299 | 0 | _nodeItems[_numNodes - _numItems + i] = nodes[i]; |
300 | 0 | generateNodes(); |
301 | 0 | } |
302 | | |
303 | | PackedRTree::PackedRTree(const void *data, const uint64_t numItems, |
304 | | const uint16_t nodeSize) |
305 | 0 | : _extent(NodeItem::create(0)), _numItems(numItems) |
306 | 0 | { |
307 | 0 | init(nodeSize); |
308 | 0 | fromData(data); |
309 | 0 | } |
310 | | |
311 | | PackedRTree::PackedRTree(std::function<void(NodeItem *)> fillNodeItems, |
312 | | const uint64_t numItems, const NodeItem &extent, |
313 | | const uint16_t nodeSize) |
314 | 6 | : _extent(extent), _numItems(numItems) |
315 | 6 | { |
316 | 6 | init(nodeSize); |
317 | 6 | fillNodeItems(_nodeItems + _numNodes - _numItems); |
318 | 6 | generateNodes(); |
319 | 6 | } |
320 | | |
321 | | std::vector<SearchResultItem> |
322 | | PackedRTree::search(double minX, double minY, double maxX, double maxY) const |
323 | 0 | { |
324 | 0 | uint64_t leafNodesOffset = _levelBounds.front().first; |
325 | 0 | NodeItem n{minX, minY, maxX, maxY, 0}; |
326 | 0 | std::vector<SearchResultItem> results; |
327 | 0 | std::unordered_map<uint64_t, uint64_t> queue; |
328 | 0 | queue.insert(std::pair<uint64_t, uint64_t>(0, _levelBounds.size() - 1)); |
329 | 0 | while (queue.size() != 0) |
330 | 0 | { |
331 | 0 | auto next = queue.begin(); |
332 | 0 | uint64_t nodeIndex = next->first; |
333 | 0 | uint64_t level = next->second; |
334 | 0 | queue.erase(next); |
335 | 0 | bool isLeafNode = nodeIndex >= _numNodes - _numItems; |
336 | | // find the end index of the node |
337 | 0 | uint64_t end = |
338 | 0 | std::min(static_cast<uint64_t>(nodeIndex + _nodeSize), |
339 | 0 | _levelBounds[static_cast<size_t>(level)].second); |
340 | | // search through child nodes |
341 | 0 | for (uint64_t pos = nodeIndex; pos < end; pos++) |
342 | 0 | { |
343 | 0 | const auto &nodeItem = _nodeItems[static_cast<size_t>(pos)]; |
344 | 0 | if (!n.intersects(nodeItem)) |
345 | 0 | continue; |
346 | 0 | if (isLeafNode) |
347 | 0 | results.push_back({nodeItem.offset, pos - leafNodesOffset}); |
348 | 0 | else |
349 | 0 | queue.insert( |
350 | 0 | std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1)); |
351 | 0 | } |
352 | 0 | } |
353 | 0 | return results; |
354 | 0 | } |
355 | | |
356 | | std::vector<SearchResultItem> PackedRTree::streamSearch( |
357 | | const uint64_t numItems, const uint16_t nodeSize, const NodeItem &item, |
358 | | const std::function<void(uint8_t *, size_t, size_t)> &readNode) |
359 | 0 | { |
360 | 0 | auto levelBounds = generateLevelBounds(numItems, nodeSize); |
361 | 0 | uint64_t leafNodesOffset = levelBounds.front().first; |
362 | 0 | uint64_t numNodes = levelBounds.front().second; |
363 | 0 | auto nodeItems = std::vector<NodeItem>(nodeSize); |
364 | 0 | uint8_t *nodesBuf = reinterpret_cast<uint8_t *>(nodeItems.data()); |
365 | | // use ordered search queue to make index traversal in sequential order |
366 | 0 | std::map<uint64_t, uint64_t> queue; |
367 | 0 | std::vector<SearchResultItem> results; |
368 | 0 | queue.insert(std::pair<uint64_t, uint64_t>(0, levelBounds.size() - 1)); |
369 | 0 | while (queue.size() != 0) |
370 | 0 | { |
371 | 0 | auto next = queue.begin(); |
372 | 0 | uint64_t nodeIndex = next->first; |
373 | 0 | uint64_t level = next->second; |
374 | 0 | queue.erase(next); |
375 | 0 | bool isLeafNode = nodeIndex >= numNodes - numItems; |
376 | | // find the end index of the node |
377 | 0 | uint64_t end = std::min(static_cast<uint64_t>(nodeIndex + nodeSize), |
378 | 0 | levelBounds[static_cast<size_t>(level)].second); |
379 | 0 | uint64_t length = end - nodeIndex; |
380 | 0 | readNode(nodesBuf, static_cast<size_t>(nodeIndex * sizeof(NodeItem)), |
381 | 0 | static_cast<size_t>(length * sizeof(NodeItem))); |
382 | | #if !CPL_IS_LSB |
383 | | for (size_t i = 0; i < static_cast<size_t>(length); i++) |
384 | | { |
385 | | CPL_LSBPTR64(&nodeItems[i].minX); |
386 | | CPL_LSBPTR64(&nodeItems[i].minY); |
387 | | CPL_LSBPTR64(&nodeItems[i].maxX); |
388 | | CPL_LSBPTR64(&nodeItems[i].maxY); |
389 | | CPL_LSBPTR64(&nodeItems[i].offset); |
390 | | } |
391 | | #endif |
392 | | // search through child nodes |
393 | 0 | for (uint64_t pos = nodeIndex; pos < end; pos++) |
394 | 0 | { |
395 | 0 | uint64_t nodePos = pos - nodeIndex; |
396 | 0 | const auto &nodeItem = nodeItems[static_cast<size_t>(nodePos)]; |
397 | 0 | if (!item.intersects(nodeItem)) |
398 | 0 | continue; |
399 | 0 | if (isLeafNode) |
400 | 0 | results.push_back({nodeItem.offset, pos - leafNodesOffset}); |
401 | 0 | else |
402 | 0 | queue.insert( |
403 | 0 | std::pair<uint64_t, uint64_t>(nodeItem.offset, level - 1)); |
404 | 0 | } |
405 | 0 | } |
406 | 0 | return results; |
407 | 0 | } |
408 | | |
409 | | uint64_t PackedRTree::size() const |
410 | 0 | { |
411 | 0 | return _numNodes * sizeof(NodeItem); |
412 | 0 | } |
413 | | |
414 | | uint64_t PackedRTree::size(const uint64_t numItems, const uint16_t nodeSize) |
415 | 5.45k | { |
416 | 5.45k | if (nodeSize < 2) |
417 | 0 | throw std::invalid_argument("Node size must be at least 2"); |
418 | 5.45k | if (numItems == 0) |
419 | 9 | throw std::invalid_argument("Number of items must be greater than 0"); |
420 | 5.44k | const uint16_t nodeSizeMin = |
421 | 5.44k | std::min(std::max(nodeSize, static_cast<uint16_t>(2)), |
422 | 5.44k | static_cast<uint16_t>(65535)); |
423 | | // limit so that resulting size in bytes can be represented by uint64_t |
424 | 5.44k | if (numItems > static_cast<uint64_t>(1) << 56) |
425 | 0 | throw std::overflow_error("Number of items must be less than 2^56"); |
426 | 5.44k | uint64_t n = numItems; |
427 | 5.44k | uint64_t numNodes = n; |
428 | 5.44k | do |
429 | 33.9k | { |
430 | 33.9k | n = div_round_up(n, nodeSizeMin); |
431 | 33.9k | numNodes += n; |
432 | 33.9k | } while (n != 1); |
433 | 5.44k | return numNodes * sizeof(NodeItem); |
434 | 5.44k | } |
435 | | |
436 | | void PackedRTree::streamWrite( |
437 | | const std::function<void(uint8_t *, size_t)> &writeData) |
438 | 6 | { |
439 | | #if !CPL_IS_LSB |
440 | | for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++) |
441 | | { |
442 | | CPL_LSBPTR64(&_nodeItems[i].minX); |
443 | | CPL_LSBPTR64(&_nodeItems[i].minY); |
444 | | CPL_LSBPTR64(&_nodeItems[i].maxX); |
445 | | CPL_LSBPTR64(&_nodeItems[i].maxY); |
446 | | CPL_LSBPTR64(&_nodeItems[i].offset); |
447 | | } |
448 | | #endif |
449 | 6 | writeData(reinterpret_cast<uint8_t *>(_nodeItems), |
450 | 6 | static_cast<size_t>(_numNodes * sizeof(NodeItem))); |
451 | | #if !CPL_IS_LSB |
452 | | for (size_t i = 0; i < static_cast<size_t>(_numNodes); i++) |
453 | | { |
454 | | CPL_LSBPTR64(&_nodeItems[i].minX); |
455 | | CPL_LSBPTR64(&_nodeItems[i].minY); |
456 | | CPL_LSBPTR64(&_nodeItems[i].maxX); |
457 | | CPL_LSBPTR64(&_nodeItems[i].maxY); |
458 | | CPL_LSBPTR64(&_nodeItems[i].offset); |
459 | | } |
460 | | #endif |
461 | 6 | } |
462 | | |
463 | | NodeItem PackedRTree::getExtent() const |
464 | 0 | { |
465 | 0 | return _extent; |
466 | 0 | } |
467 | | |
468 | | } // namespace FlatGeobuf |