/src/tesseract/src/classify/kdtree.cpp
Line | Count | Source |
1 | | /****************************************************************************** |
2 | | ** Filename: kdtree.cpp |
3 | | ** Purpose: Routines for managing K-D search trees |
4 | | ** Author: Dan Johnson |
5 | | ** |
6 | | ** (c) Copyright Hewlett-Packard Company, 1988. |
7 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | ** you may not use this file except in compliance with the License. |
9 | | ** You may obtain a copy of the License at |
10 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
11 | | ** Unless required by applicable law or agreed to in writing, software |
12 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
13 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | ** See the License for the specific language governing permissions and |
15 | | ** limitations under the License. |
16 | | ******************************************************************************/ |
17 | | |
18 | | /*----------------------------------------------------------------------------- |
19 | | Include Files and Type Defines |
20 | | -----------------------------------------------------------------------------*/ |
21 | | #include "kdtree.h" |
22 | | |
23 | | #include <algorithm> |
24 | | #include <cfloat> // for FLT_MAX |
25 | | #include <cmath> |
26 | | #include <cstdio> |
27 | | |
28 | | namespace tesseract { |
29 | | |
30 | 0 | #define Magnitude(X) ((X) < 0 ? -(X) : (X)) |
31 | 0 | #define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D))) |
32 | | |
33 | | /*----------------------------------------------------------------------------- |
34 | | Global Data Definitions and Declarations |
35 | | -----------------------------------------------------------------------------*/ |
36 | 0 | #define MINSEARCH (-FLT_MAX) |
37 | 0 | #define MAXSEARCH FLT_MAX |
38 | | |
39 | | // Helper function to find the next essential dimension in a cycle. |
40 | 0 | static int NextLevel(KDTREE *tree, int level) { |
41 | 0 | do { |
42 | 0 | ++level; |
43 | 0 | if (level >= tree->KeySize) { |
44 | 0 | level = 0; |
45 | 0 | } |
46 | 0 | } while (tree->KeyDesc[level].NonEssential); |
47 | 0 | return level; |
48 | 0 | } |
49 | | |
50 | | //----------------------------------------------------------------------------- |
51 | | /** Store the k smallest-keyed key-value pairs. */ |
52 | | template <typename Key, typename Value> |
53 | | class MinK { |
54 | | public: |
55 | | MinK(Key max_key, int k); |
56 | | ~MinK(); |
57 | | |
58 | | struct Element { |
59 | | Element() = default; |
60 | 0 | Element(const Key &k, const Value &v) : key(k), value(v) {} |
61 | | |
62 | | Key key; |
63 | | Value value; |
64 | | }; |
65 | | |
66 | | bool insert(Key k, Value v); |
67 | | const Key &max_insertable_key(); |
68 | | |
69 | 0 | int elements_count() { |
70 | 0 | return elements_count_; |
71 | 0 | } |
72 | 0 | const Element *elements() { |
73 | 0 | return elements_; |
74 | 0 | } |
75 | | |
76 | | private: |
77 | | const Key max_key_; ///< the maximum possible Key |
78 | | Element *elements_; ///< unsorted array of elements |
79 | | int elements_count_; ///< the number of results collected so far |
80 | | int k_; ///< the number of results we want from the search |
81 | | int max_index_; ///< the index of the result with the largest key |
82 | | }; |
83 | | |
84 | | template <typename Key, typename Value> |
85 | | MinK<Key, Value>::MinK(Key max_key, int k) |
86 | 0 | : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) { |
87 | 0 | elements_ = new Element[k_]; |
88 | 0 | } |
89 | | |
90 | | template <typename Key, typename Value> |
91 | 0 | MinK<Key, Value>::~MinK() { |
92 | 0 | delete[] elements_; |
93 | 0 | } |
94 | | |
95 | | template <typename Key, typename Value> |
96 | 0 | const Key &MinK<Key, Value>::max_insertable_key() { |
97 | 0 | if (elements_count_ < k_) { |
98 | 0 | return max_key_; |
99 | 0 | } |
100 | 0 | return elements_[max_index_].key; |
101 | 0 | } |
102 | | |
103 | | template <typename Key, typename Value> |
104 | 0 | bool MinK<Key, Value>::insert(Key key, Value value) { |
105 | 0 | if (elements_count_ < k_) { |
106 | 0 | elements_[elements_count_++] = Element(key, value); |
107 | 0 | if (key > elements_[max_index_].key) { |
108 | 0 | max_index_ = elements_count_ - 1; |
109 | 0 | } |
110 | 0 | return true; |
111 | 0 | } else if (key < elements_[max_index_].key) { |
112 | | // evict the largest element. |
113 | 0 | elements_[max_index_] = Element(key, value); |
114 | | // recompute max_index_ |
115 | 0 | for (int i = 0; i < elements_count_; i++) { |
116 | 0 | if (elements_[i].key > elements_[max_index_].key) { |
117 | 0 | max_index_ = i; |
118 | 0 | } |
119 | 0 | } |
120 | 0 | return true; |
121 | 0 | } |
122 | 0 | return false; |
123 | 0 | } |
124 | | |
125 | | //----------------------------------------------------------------------------- |
126 | | /** Helper class for searching for the k closest points to query_point in tree. |
127 | | */ |
128 | | class KDTreeSearch { |
129 | | public: |
130 | | KDTreeSearch(KDTREE *tree, float *query_point, int k_closest); |
131 | | ~KDTreeSearch(); |
132 | | |
133 | | /** Return the k nearest points' data. */ |
134 | | void Search(int *result_count, float *distances, void **results); |
135 | | |
136 | | private: |
137 | | void SearchRec(int Level, KDNODE *SubTree); |
138 | | bool BoxIntersectsSearch(float *lower, float *upper); |
139 | | |
140 | | KDTREE *tree_; |
141 | | float *query_point_; |
142 | | float *sb_min_; ///< search box minimum |
143 | | float *sb_max_; ///< search box maximum |
144 | | MinK<float, void *> results_; |
145 | | }; |
146 | | |
147 | | KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest) |
148 | 0 | : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) { |
149 | 0 | sb_min_ = new float[tree->KeySize]; |
150 | 0 | sb_max_ = new float[tree->KeySize]; |
151 | 0 | } |
152 | | |
153 | 0 | KDTreeSearch::~KDTreeSearch() { |
154 | 0 | delete[] sb_min_; |
155 | 0 | delete[] sb_max_; |
156 | 0 | } |
157 | | |
158 | | /// Locate the k_closest points to query_point_, and return their distances and |
159 | | /// data into the given buffers. |
160 | 0 | void KDTreeSearch::Search(int *result_count, float *distances, void **results) { |
161 | 0 | if (tree_->Root.Left == nullptr) { |
162 | 0 | *result_count = 0; |
163 | 0 | } else { |
164 | 0 | for (int i = 0; i < tree_->KeySize; i++) { |
165 | 0 | sb_min_[i] = tree_->KeyDesc[i].Min; |
166 | 0 | sb_max_[i] = tree_->KeyDesc[i].Max; |
167 | 0 | } |
168 | 0 | SearchRec(0, tree_->Root.Left); |
169 | 0 | int count = results_.elements_count(); |
170 | 0 | *result_count = count; |
171 | 0 | for (int j = 0; j < count; j++) { |
172 | | // Pre-cast to float64 as key is a template type and we have no control |
173 | | // over its actual type. |
174 | 0 | distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key))); |
175 | 0 | results[j] = results_.elements()[j].value; |
176 | 0 | } |
177 | 0 | } |
178 | 0 | } |
179 | | |
180 | | /*----------------------------------------------------------------------------- |
181 | | Public Code |
182 | | -----------------------------------------------------------------------------*/ |
183 | | /// @return a new KDTREE based on the specified parameters. |
184 | | /// @param KeySize # of dimensions in the K-D tree |
185 | | /// @param KeyDesc array of params to describe key dimensions |
186 | 0 | KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) { |
187 | 0 | auto *KDTree = new KDTREE(KeySize); |
188 | 0 | for (int i = 0; i < KeySize; i++) { |
189 | 0 | KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential; |
190 | 0 | KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular; |
191 | 0 | if (KeyDesc[i].Circular) { |
192 | 0 | KDTree->KeyDesc[i].Min = KeyDesc[i].Min; |
193 | 0 | KDTree->KeyDesc[i].Max = KeyDesc[i].Max; |
194 | 0 | KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min; |
195 | 0 | KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2; |
196 | 0 | KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2; |
197 | 0 | } else { |
198 | 0 | KDTree->KeyDesc[i].Min = MINSEARCH; |
199 | 0 | KDTree->KeyDesc[i].Max = MAXSEARCH; |
200 | 0 | } |
201 | 0 | } |
202 | 0 | KDTree->Root.Left = nullptr; |
203 | 0 | KDTree->Root.Right = nullptr; |
204 | 0 | return KDTree; |
205 | 0 | } |
206 | | |
207 | | /** |
208 | | * This routine stores Data in the K-D tree specified by Tree |
209 | | * using Key as an access key. |
210 | | * |
211 | | * @param Tree K-D tree in which data is to be stored |
212 | | * @param Key ptr to key by which data can be retrieved |
213 | | * @param Data ptr to data to be stored in the tree |
214 | | */ |
215 | 0 | void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) { |
216 | 0 | auto PtrToNode = &(Tree->Root.Left); |
217 | 0 | auto Node = *PtrToNode; |
218 | 0 | auto Level = NextLevel(Tree, -1); |
219 | 0 | while (Node != nullptr) { |
220 | 0 | if (Key[Level] < Node->BranchPoint) { |
221 | 0 | PtrToNode = &(Node->Left); |
222 | 0 | if (Key[Level] > Node->LeftBranch) { |
223 | 0 | Node->LeftBranch = Key[Level]; |
224 | 0 | } |
225 | 0 | } else { |
226 | 0 | PtrToNode = &(Node->Right); |
227 | 0 | if (Key[Level] < Node->RightBranch) { |
228 | 0 | Node->RightBranch = Key[Level]; |
229 | 0 | } |
230 | 0 | } |
231 | 0 | Level = NextLevel(Tree, Level); |
232 | 0 | Node = *PtrToNode; |
233 | 0 | } |
234 | |
|
235 | 0 | *PtrToNode = new KDNODE(Tree, Key, Data, Level); |
236 | 0 | } /* KDStore */ |
237 | | |
238 | | /** |
239 | | * This routine deletes a node from Tree. The node to be |
240 | | * deleted is specified by the Key for the node and the Data |
241 | | * contents of the node. These two pointers must be identical |
242 | | * to the pointers that were used for the node when it was |
243 | | * originally stored in the tree. A node will be deleted from |
244 | | * the tree only if its key and data pointers are identical |
245 | | * to Key and Data respectively. The tree is re-formed by removing |
246 | | * the affected subtree and inserting all elements but the root. |
247 | | * |
248 | | * @param Tree K-D tree to delete node from |
249 | | * @param Key key of node to be deleted |
250 | | * @param Data data contents of node to be deleted |
251 | | */ |
252 | 0 | void KDDelete(KDTREE *Tree, float Key[], void *Data) { |
253 | 0 | int Level; |
254 | 0 | KDNODE *Current; |
255 | 0 | KDNODE *Father; |
256 | | |
257 | | /* initialize search at root of tree */ |
258 | 0 | Father = &(Tree->Root); |
259 | 0 | Current = Father->Left; |
260 | 0 | Level = NextLevel(Tree, -1); |
261 | | |
262 | | /* search tree for node to be deleted */ |
263 | 0 | while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) { |
264 | 0 | Father = Current; |
265 | 0 | if (Key[Level] < Current->BranchPoint) { |
266 | 0 | Current = Current->Left; |
267 | 0 | } else { |
268 | 0 | Current = Current->Right; |
269 | 0 | } |
270 | |
|
271 | 0 | Level = NextLevel(Tree, Level); |
272 | 0 | } |
273 | |
|
274 | 0 | if (Current != nullptr) { /* if node to be deleted was found */ |
275 | 0 | if (Current == Father->Left) { |
276 | 0 | Father->Left = nullptr; |
277 | 0 | Father->LeftBranch = Tree->KeyDesc[Level].Min; |
278 | 0 | } else { |
279 | 0 | Father->Right = nullptr; |
280 | 0 | Father->RightBranch = Tree->KeyDesc[Level].Max; |
281 | 0 | } |
282 | |
|
283 | 0 | InsertNodes(Tree, Current->Left); |
284 | 0 | InsertNodes(Tree, Current->Right); |
285 | 0 | delete Current; |
286 | 0 | } |
287 | 0 | } /* KDDelete */ |
288 | | |
289 | | /** |
290 | | * This routine searches the K-D tree specified by Tree and |
291 | | * finds the QuerySize nearest neighbors of Query. All neighbors |
292 | | * must be within MaxDistance of Query. The data contents of |
293 | | * the nearest neighbors |
294 | | * are placed in NBuffer and their distances from Query are |
295 | | * placed in DBuffer. |
296 | | * @param Tree ptr to K-D tree to be searched |
297 | | * @param Query ptr to query key (point in D-space) |
298 | | * @param QuerySize number of nearest neighbors to be found |
299 | | * @param MaxDistance all neighbors must be within this distance |
300 | | * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors |
301 | | * @param DBuffer ptr to QuerySize buffer to hold distances |
302 | | * from nearest neighbor to query point |
303 | | * @param NumberOfResults [out] Number of nearest neighbors actually found |
304 | | */ |
305 | | void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance, |
306 | 0 | int *NumberOfResults, void **NBuffer, float DBuffer[]) { |
307 | 0 | KDTreeSearch search(Tree, Query, QuerySize); |
308 | 0 | search.Search(NumberOfResults, DBuffer, NBuffer); |
309 | 0 | } |
310 | | |
311 | | /*---------------------------------------------------------------------------*/ |
312 | | /** Walk a given Tree with action. */ |
313 | 0 | void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context) { |
314 | 0 | if (Tree->Root.Left != nullptr) { |
315 | 0 | Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1)); |
316 | 0 | } |
317 | 0 | } |
318 | | |
319 | | /*----------------------------------------------------------------------------- |
320 | | Private Code |
321 | | -----------------------------------------------------------------------------*/ |
322 | | |
323 | | /*---------------------------------------------------------------------------*/ |
324 | | /** |
325 | | * Recursively accumulate the k_closest points to query_point_ into results_. |
326 | | * @param Level level in tree of sub-tree to be searched |
327 | | * @param SubTree sub-tree to be searched |
328 | | */ |
329 | 0 | void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) { |
330 | 0 | if (level >= tree_->KeySize) { |
331 | 0 | level = 0; |
332 | 0 | } |
333 | |
|
334 | 0 | if (!BoxIntersectsSearch(sb_min_, sb_max_)) { |
335 | 0 | return; |
336 | 0 | } |
337 | | |
338 | 0 | results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key), |
339 | 0 | sub_tree->Data); |
340 | |
|
341 | 0 | if (query_point_[level] < sub_tree->BranchPoint) { |
342 | 0 | if (sub_tree->Left != nullptr) { |
343 | 0 | float tmp = sb_max_[level]; |
344 | 0 | sb_max_[level] = sub_tree->LeftBranch; |
345 | 0 | SearchRec(NextLevel(tree_, level), sub_tree->Left); |
346 | 0 | sb_max_[level] = tmp; |
347 | 0 | } |
348 | 0 | if (sub_tree->Right != nullptr) { |
349 | 0 | float tmp = sb_min_[level]; |
350 | 0 | sb_min_[level] = sub_tree->RightBranch; |
351 | 0 | SearchRec(NextLevel(tree_, level), sub_tree->Right); |
352 | 0 | sb_min_[level] = tmp; |
353 | 0 | } |
354 | 0 | } else { |
355 | 0 | if (sub_tree->Right != nullptr) { |
356 | 0 | float tmp = sb_min_[level]; |
357 | 0 | sb_min_[level] = sub_tree->RightBranch; |
358 | 0 | SearchRec(NextLevel(tree_, level), sub_tree->Right); |
359 | 0 | sb_min_[level] = tmp; |
360 | 0 | } |
361 | 0 | if (sub_tree->Left != nullptr) { |
362 | 0 | float tmp = sb_max_[level]; |
363 | 0 | sb_max_[level] = sub_tree->LeftBranch; |
364 | 0 | SearchRec(NextLevel(tree_, level), sub_tree->Left); |
365 | 0 | sb_max_[level] = tmp; |
366 | 0 | } |
367 | 0 | } |
368 | 0 | } |
369 | | |
370 | | /*---------------------------------------------------------------------------*/ |
371 | | /** |
372 | | *Returns the Euclidean distance squared between p1 and p2 for all essential |
373 | | * dimensions. |
374 | | * @param k keys are in k-space |
375 | | * @param dim dimension descriptions (essential, circular, etc) |
376 | | * @param p1,p2 two different points in K-D space |
377 | | */ |
378 | 0 | float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) { |
379 | 0 | float total_distance = 0; |
380 | |
|
381 | 0 | for (; k > 0; k--, p1++, p2++, dim++) { |
382 | 0 | if (dim->NonEssential) { |
383 | 0 | continue; |
384 | 0 | } |
385 | | |
386 | 0 | float dimension_distance = *p1 - *p2; |
387 | | |
388 | | /* if this dimension is circular - check wraparound distance */ |
389 | 0 | if (dim->Circular) { |
390 | 0 | dimension_distance = Magnitude(dimension_distance); |
391 | 0 | float wrap_distance = dim->Max - dim->Min - dimension_distance; |
392 | 0 | dimension_distance = std::min(dimension_distance, wrap_distance); |
393 | 0 | } |
394 | |
|
395 | 0 | total_distance += dimension_distance * dimension_distance; |
396 | 0 | } |
397 | 0 | return total_distance; |
398 | 0 | } |
399 | | |
400 | 0 | float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) { |
401 | 0 | return std::sqrt(DistanceSquared(k, dim, p1, p2)); |
402 | 0 | } |
403 | | |
404 | | /*---------------------------------------------------------------------------*/ |
405 | | /// Return whether the query region (the smallest known circle about |
406 | | /// query_point_ containing results->k_ points) intersects the box specified |
407 | | /// between lower and upper. For circular dimensions, we also check the point |
408 | | /// one wrap distance away from the query. |
409 | 0 | bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) { |
410 | 0 | float *query = query_point_; |
411 | | // Compute the sum in higher precision. |
412 | 0 | double total_distance = 0.0; |
413 | 0 | double radius_squared = |
414 | 0 | static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key(); |
415 | 0 | PARAM_DESC *dim = &tree_->KeyDesc[0]; |
416 | |
|
417 | 0 | for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) { |
418 | 0 | if (dim->NonEssential) { |
419 | 0 | continue; |
420 | 0 | } |
421 | | |
422 | 0 | float dimension_distance; |
423 | 0 | if (*query < *lower) { |
424 | 0 | dimension_distance = *lower - *query; |
425 | 0 | } else if (*query > *upper) { |
426 | 0 | dimension_distance = *query - *upper; |
427 | 0 | } else { |
428 | 0 | dimension_distance = 0; |
429 | 0 | } |
430 | | |
431 | | /* if this dimension is circular - check wraparound distance */ |
432 | 0 | if (dim->Circular) { |
433 | 0 | float wrap_distance = FLT_MAX; |
434 | 0 | if (*query < *lower) { |
435 | 0 | wrap_distance = *query + dim->Max - dim->Min - *upper; |
436 | 0 | } else if (*query > *upper) { |
437 | 0 | wrap_distance = *lower - (*query - (dim->Max - dim->Min)); |
438 | 0 | } |
439 | 0 | dimension_distance = std::min(dimension_distance, wrap_distance); |
440 | 0 | } |
441 | |
|
442 | 0 | total_distance += static_cast<double>(dimension_distance) * dimension_distance; |
443 | 0 | if (total_distance >= radius_squared) { |
444 | 0 | return false; |
445 | 0 | } |
446 | 0 | } |
447 | 0 | return true; |
448 | 0 | } |
449 | | |
450 | | /*---------------------------------------------------------------------------*/ |
451 | | /** |
452 | | * Walk a tree, calling action once on each node. |
453 | | * |
454 | | * Operation: |
455 | | * This routine walks through the specified sub_tree and invokes action |
456 | | * action at each node as follows: |
457 | | * action(context, data, level) |
458 | | * data the data contents of the node being visited, |
459 | | * level is the level of the node in the tree with the root being level 0. |
460 | | * @param tree root of the tree being walked. |
461 | | * @param action action to be performed at every node |
462 | | * @param context action's context |
463 | | * @param sub_tree ptr to root of subtree to be walked |
464 | | * @param level current level in the tree for this node |
465 | | */ |
466 | 0 | void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) { |
467 | 0 | (*action)(context, sub_tree->Data, level); |
468 | 0 | if (sub_tree->Left != nullptr) { |
469 | 0 | Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level)); |
470 | 0 | } |
471 | 0 | if (sub_tree->Right != nullptr) { |
472 | 0 | Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level)); |
473 | 0 | } |
474 | 0 | } |
475 | | |
476 | | /** Given a subtree nodes, insert all of its elements into tree. */ |
477 | 0 | void InsertNodes(KDTREE *tree, KDNODE *nodes) { |
478 | 0 | if (nodes == nullptr) { |
479 | 0 | return; |
480 | 0 | } |
481 | | |
482 | 0 | KDStore(tree, nodes->Key, nodes->Data); |
483 | 0 | InsertNodes(tree, nodes->Left); |
484 | 0 | InsertNodes(tree, nodes->Right); |
485 | 0 | } |
486 | | |
487 | | } // namespace tesseract |