Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/classify/kdtree.cpp
Line
Count
Source
1
/******************************************************************************
2
 **  Filename:  kdtree.cpp
3
 **  Purpose:   Routines for managing K-D search trees
4
 **  Author:    Dan Johnson
5
 **
6
 **  (c) Copyright Hewlett-Packard Company, 1988.
7
 ** Licensed under the Apache License, Version 2.0 (the "License");
8
 ** you may not use this file except in compliance with the License.
9
 ** You may obtain a copy of the License at
10
 ** http://www.apache.org/licenses/LICENSE-2.0
11
 ** Unless required by applicable law or agreed to in writing, software
12
 ** distributed under the License is distributed on an "AS IS" BASIS,
13
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
 ** See the License for the specific language governing permissions and
15
 ** limitations under the License.
16
 ******************************************************************************/
17
18
/*-----------------------------------------------------------------------------
19
          Include Files and Type Defines
20
-----------------------------------------------------------------------------*/
21
#include "kdtree.h"
22
23
#include <algorithm>
24
#include <cfloat> // for FLT_MAX
25
#include <cmath>
26
#include <cstdio>
27
28
namespace tesseract {
29
30
0
#define Magnitude(X) ((X) < 0 ? -(X) : (X))
31
0
#define NodeFound(N, K, D) (((N)->Key == (K)) && ((N)->Data == (D)))
32
33
/*-----------------------------------------------------------------------------
34
        Global Data Definitions and Declarations
35
-----------------------------------------------------------------------------*/
36
0
#define MINSEARCH (-FLT_MAX)
37
0
#define MAXSEARCH FLT_MAX
38
39
// Helper function to find the next essential dimension in a cycle.
40
0
static int NextLevel(KDTREE *tree, int level) {
41
0
  do {
42
0
    ++level;
43
0
    if (level >= tree->KeySize) {
44
0
      level = 0;
45
0
    }
46
0
  } while (tree->KeyDesc[level].NonEssential);
47
0
  return level;
48
0
}
49
50
//-----------------------------------------------------------------------------
51
/**  Store the k smallest-keyed key-value pairs. */
52
template <typename Key, typename Value>
53
class MinK {
54
public:
55
  MinK(Key max_key, int k);
56
  ~MinK();
57
58
  struct Element {
59
    Element() = default;
60
0
    Element(const Key &k, const Value &v) : key(k), value(v) {}
61
62
    Key key;
63
    Value value;
64
  };
65
66
  bool insert(Key k, Value v);
67
  const Key &max_insertable_key();
68
69
0
  int elements_count() {
70
0
    return elements_count_;
71
0
  }
72
0
  const Element *elements() {
73
0
    return elements_;
74
0
  }
75
76
private:
77
  const Key max_key_;  ///< the maximum possible Key
78
  Element *elements_;  ///< unsorted array of elements
79
  int elements_count_; ///< the number of results collected so far
80
  int k_;              ///< the number of results we want from the search
81
  int max_index_;      ///< the index of the result with the largest key
82
};
83
84
template <typename Key, typename Value>
85
MinK<Key, Value>::MinK(Key max_key, int k)
86
0
    : max_key_(max_key), elements_count_(0), k_(k < 1 ? 1 : k), max_index_(0) {
87
0
  elements_ = new Element[k_];
88
0
}
89
90
template <typename Key, typename Value>
91
0
MinK<Key, Value>::~MinK() {
92
0
  delete[] elements_;
93
0
}
94
95
template <typename Key, typename Value>
96
0
const Key &MinK<Key, Value>::max_insertable_key() {
97
0
  if (elements_count_ < k_) {
98
0
    return max_key_;
99
0
  }
100
0
  return elements_[max_index_].key;
101
0
}
102
103
template <typename Key, typename Value>
104
0
bool MinK<Key, Value>::insert(Key key, Value value) {
105
0
  if (elements_count_ < k_) {
106
0
    elements_[elements_count_++] = Element(key, value);
107
0
    if (key > elements_[max_index_].key) {
108
0
      max_index_ = elements_count_ - 1;
109
0
    }
110
0
    return true;
111
0
  } else if (key < elements_[max_index_].key) {
112
    // evict the largest element.
113
0
    elements_[max_index_] = Element(key, value);
114
    // recompute max_index_
115
0
    for (int i = 0; i < elements_count_; i++) {
116
0
      if (elements_[i].key > elements_[max_index_].key) {
117
0
        max_index_ = i;
118
0
      }
119
0
    }
120
0
    return true;
121
0
  }
122
0
  return false;
123
0
}
124
125
//-----------------------------------------------------------------------------
126
/** Helper class for searching for the k closest points to query_point in tree.
127
 */
128
class KDTreeSearch {
129
public:
130
  KDTreeSearch(KDTREE *tree, float *query_point, int k_closest);
131
  ~KDTreeSearch();
132
133
  /** Return the k nearest points' data. */
134
  void Search(int *result_count, float *distances, void **results);
135
136
private:
137
  void SearchRec(int Level, KDNODE *SubTree);
138
  bool BoxIntersectsSearch(float *lower, float *upper);
139
140
  KDTREE *tree_;
141
  float *query_point_;
142
  float *sb_min_; ///< search box minimum
143
  float *sb_max_; ///< search box maximum
144
  MinK<float, void *> results_;
145
};
146
147
KDTreeSearch::KDTreeSearch(KDTREE *tree, float *query_point, int k_closest)
148
0
    : tree_(tree), query_point_(query_point), results_(MAXSEARCH, k_closest) {
149
0
  sb_min_ = new float[tree->KeySize];
150
0
  sb_max_ = new float[tree->KeySize];
151
0
}
152
153
0
KDTreeSearch::~KDTreeSearch() {
154
0
  delete[] sb_min_;
155
0
  delete[] sb_max_;
156
0
}
157
158
/// Locate the k_closest points to query_point_, and return their distances and
159
/// data into the given buffers.
160
0
void KDTreeSearch::Search(int *result_count, float *distances, void **results) {
161
0
  if (tree_->Root.Left == nullptr) {
162
0
    *result_count = 0;
163
0
  } else {
164
0
    for (int i = 0; i < tree_->KeySize; i++) {
165
0
      sb_min_[i] = tree_->KeyDesc[i].Min;
166
0
      sb_max_[i] = tree_->KeyDesc[i].Max;
167
0
    }
168
0
    SearchRec(0, tree_->Root.Left);
169
0
    int count = results_.elements_count();
170
0
    *result_count = count;
171
0
    for (int j = 0; j < count; j++) {
172
      // Pre-cast to float64 as key is a template type and we have no control
173
      // over its actual type.
174
0
      distances[j] = static_cast<float>(sqrt(static_cast<double>(results_.elements()[j].key)));
175
0
      results[j] = results_.elements()[j].value;
176
0
    }
177
0
  }
178
0
}
179
180
/*-----------------------------------------------------------------------------
181
              Public Code
182
-----------------------------------------------------------------------------*/
183
/// @return a new KDTREE based on the specified parameters.
184
/// @param KeySize  # of dimensions in the K-D tree
185
/// @param KeyDesc  array of params to describe key dimensions
186
0
KDTREE *MakeKDTree(int16_t KeySize, const PARAM_DESC KeyDesc[]) {
187
0
  auto *KDTree = new KDTREE(KeySize);
188
0
  for (int i = 0; i < KeySize; i++) {
189
0
    KDTree->KeyDesc[i].NonEssential = KeyDesc[i].NonEssential;
190
0
    KDTree->KeyDesc[i].Circular = KeyDesc[i].Circular;
191
0
    if (KeyDesc[i].Circular) {
192
0
      KDTree->KeyDesc[i].Min = KeyDesc[i].Min;
193
0
      KDTree->KeyDesc[i].Max = KeyDesc[i].Max;
194
0
      KDTree->KeyDesc[i].Range = KeyDesc[i].Max - KeyDesc[i].Min;
195
0
      KDTree->KeyDesc[i].HalfRange = KDTree->KeyDesc[i].Range / 2;
196
0
      KDTree->KeyDesc[i].MidRange = (KeyDesc[i].Max + KeyDesc[i].Min) / 2;
197
0
    } else {
198
0
      KDTree->KeyDesc[i].Min = MINSEARCH;
199
0
      KDTree->KeyDesc[i].Max = MAXSEARCH;
200
0
    }
201
0
  }
202
0
  KDTree->Root.Left = nullptr;
203
0
  KDTree->Root.Right = nullptr;
204
0
  return KDTree;
205
0
}
206
207
/**
208
 * This routine stores Data in the K-D tree specified by Tree
209
 * using Key as an access key.
210
 *
211
 * @param Tree    K-D tree in which data is to be stored
212
 * @param Key    ptr to key by which data can be retrieved
213
 * @param Data    ptr to data to be stored in the tree
214
 */
215
0
void KDStore(KDTREE *Tree, float *Key, CLUSTER *Data) {
216
0
  auto PtrToNode = &(Tree->Root.Left);
217
0
  auto Node = *PtrToNode;
218
0
  auto Level = NextLevel(Tree, -1);
219
0
  while (Node != nullptr) {
220
0
    if (Key[Level] < Node->BranchPoint) {
221
0
      PtrToNode = &(Node->Left);
222
0
      if (Key[Level] > Node->LeftBranch) {
223
0
        Node->LeftBranch = Key[Level];
224
0
      }
225
0
    } else {
226
0
      PtrToNode = &(Node->Right);
227
0
      if (Key[Level] < Node->RightBranch) {
228
0
        Node->RightBranch = Key[Level];
229
0
      }
230
0
    }
231
0
    Level = NextLevel(Tree, Level);
232
0
    Node = *PtrToNode;
233
0
  }
234
235
0
  *PtrToNode = new KDNODE(Tree, Key, Data, Level);
236
0
} /* KDStore */
237
238
/**
239
 * This routine deletes a node from Tree.  The node to be
240
 * deleted is specified by the Key for the node and the Data
241
 * contents of the node.  These two pointers must be identical
242
 * to the pointers that were used for the node when it was
243
 * originally stored in the tree.  A node will be deleted from
244
 * the tree only if its key and data pointers are identical
245
 * to Key and Data respectively.  The tree is re-formed by removing
246
 * the affected subtree and inserting all elements but the root.
247
 *
248
 * @param Tree K-D tree to delete node from
249
 * @param Key key of node to be deleted
250
 * @param Data data contents of node to be deleted
251
 */
252
0
void KDDelete(KDTREE *Tree, float Key[], void *Data) {
253
0
  int Level;
254
0
  KDNODE *Current;
255
0
  KDNODE *Father;
256
257
  /* initialize search at root of tree */
258
0
  Father = &(Tree->Root);
259
0
  Current = Father->Left;
260
0
  Level = NextLevel(Tree, -1);
261
262
  /* search tree for node to be deleted */
263
0
  while ((Current != nullptr) && (!NodeFound(Current, Key, Data))) {
264
0
    Father = Current;
265
0
    if (Key[Level] < Current->BranchPoint) {
266
0
      Current = Current->Left;
267
0
    } else {
268
0
      Current = Current->Right;
269
0
    }
270
271
0
    Level = NextLevel(Tree, Level);
272
0
  }
273
274
0
  if (Current != nullptr) { /* if node to be deleted was found */
275
0
    if (Current == Father->Left) {
276
0
      Father->Left = nullptr;
277
0
      Father->LeftBranch = Tree->KeyDesc[Level].Min;
278
0
    } else {
279
0
      Father->Right = nullptr;
280
0
      Father->RightBranch = Tree->KeyDesc[Level].Max;
281
0
    }
282
283
0
    InsertNodes(Tree, Current->Left);
284
0
    InsertNodes(Tree, Current->Right);
285
0
    delete Current;
286
0
  }
287
0
} /* KDDelete */
288
289
/**
290
 * This routine searches the K-D tree specified by Tree and
291
 * finds the QuerySize nearest neighbors of Query.  All neighbors
292
 * must be within MaxDistance of Query.  The data contents of
293
 * the nearest neighbors
294
 * are placed in NBuffer and their distances from Query are
295
 * placed in DBuffer.
296
 * @param Tree    ptr to K-D tree to be searched
297
 * @param Query    ptr to query key (point in D-space)
298
 * @param QuerySize  number of nearest neighbors to be found
299
 * @param MaxDistance  all neighbors must be within this distance
300
 * @param NBuffer ptr to QuerySize buffer to hold nearest neighbors
301
 * @param DBuffer ptr to QuerySize buffer to hold distances
302
 *          from nearest neighbor to query point
303
 * @param NumberOfResults [out] Number of nearest neighbors actually found
304
 */
305
void KDNearestNeighborSearch(KDTREE *Tree, float Query[], int QuerySize, float MaxDistance,
306
0
                             int *NumberOfResults, void **NBuffer, float DBuffer[]) {
307
0
  KDTreeSearch search(Tree, Query, QuerySize);
308
0
  search.Search(NumberOfResults, DBuffer, NBuffer);
309
0
}
310
311
/*---------------------------------------------------------------------------*/
312
/** Walk a given Tree with action. */
313
0
void KDWalk(KDTREE *Tree, kdwalk_proc action, ClusteringContext *context) {
314
0
  if (Tree->Root.Left != nullptr) {
315
0
    Walk(Tree, action, context, Tree->Root.Left, NextLevel(Tree, -1));
316
0
  }
317
0
}
318
319
/*-----------------------------------------------------------------------------
320
              Private Code
321
-----------------------------------------------------------------------------*/
322
323
/*---------------------------------------------------------------------------*/
324
/**
325
 * Recursively accumulate the k_closest points to query_point_ into results_.
326
 * @param Level  level in tree of sub-tree to be searched
327
 * @param SubTree  sub-tree to be searched
328
 */
329
0
void KDTreeSearch::SearchRec(int level, KDNODE *sub_tree) {
330
0
  if (level >= tree_->KeySize) {
331
0
    level = 0;
332
0
  }
333
334
0
  if (!BoxIntersectsSearch(sb_min_, sb_max_)) {
335
0
    return;
336
0
  }
337
338
0
  results_.insert(DistanceSquared(tree_->KeySize, &tree_->KeyDesc[0], query_point_, sub_tree->Key),
339
0
                  sub_tree->Data);
340
341
0
  if (query_point_[level] < sub_tree->BranchPoint) {
342
0
    if (sub_tree->Left != nullptr) {
343
0
      float tmp = sb_max_[level];
344
0
      sb_max_[level] = sub_tree->LeftBranch;
345
0
      SearchRec(NextLevel(tree_, level), sub_tree->Left);
346
0
      sb_max_[level] = tmp;
347
0
    }
348
0
    if (sub_tree->Right != nullptr) {
349
0
      float tmp = sb_min_[level];
350
0
      sb_min_[level] = sub_tree->RightBranch;
351
0
      SearchRec(NextLevel(tree_, level), sub_tree->Right);
352
0
      sb_min_[level] = tmp;
353
0
    }
354
0
  } else {
355
0
    if (sub_tree->Right != nullptr) {
356
0
      float tmp = sb_min_[level];
357
0
      sb_min_[level] = sub_tree->RightBranch;
358
0
      SearchRec(NextLevel(tree_, level), sub_tree->Right);
359
0
      sb_min_[level] = tmp;
360
0
    }
361
0
    if (sub_tree->Left != nullptr) {
362
0
      float tmp = sb_max_[level];
363
0
      sb_max_[level] = sub_tree->LeftBranch;
364
0
      SearchRec(NextLevel(tree_, level), sub_tree->Left);
365
0
      sb_max_[level] = tmp;
366
0
    }
367
0
  }
368
0
}
369
370
/*---------------------------------------------------------------------------*/
371
/**
372
 *Returns the Euclidean distance squared between p1 and p2 for all essential
373
 * dimensions.
374
 * @param k      keys are in k-space
375
 * @param dim    dimension descriptions (essential, circular, etc)
376
 * @param p1,p2  two different points in K-D space
377
 */
378
0
float DistanceSquared(int k, PARAM_DESC *dim, float p1[], float p2[]) {
379
0
  float total_distance = 0;
380
381
0
  for (; k > 0; k--, p1++, p2++, dim++) {
382
0
    if (dim->NonEssential) {
383
0
      continue;
384
0
    }
385
386
0
    float dimension_distance = *p1 - *p2;
387
388
    /* if this dimension is circular - check wraparound distance */
389
0
    if (dim->Circular) {
390
0
      dimension_distance = Magnitude(dimension_distance);
391
0
      float wrap_distance = dim->Max - dim->Min - dimension_distance;
392
0
      dimension_distance = std::min(dimension_distance, wrap_distance);
393
0
    }
394
395
0
    total_distance += dimension_distance * dimension_distance;
396
0
  }
397
0
  return total_distance;
398
0
}
399
400
0
float ComputeDistance(int k, PARAM_DESC *dim, float p1[], float p2[]) {
401
0
  return std::sqrt(DistanceSquared(k, dim, p1, p2));
402
0
}
403
404
/*---------------------------------------------------------------------------*/
405
/// Return whether the query region (the smallest known circle about
406
/// query_point_ containing results->k_ points) intersects the box specified
407
/// between lower and upper.  For circular dimensions, we also check the point
408
/// one wrap distance away from the query.
409
0
bool KDTreeSearch::BoxIntersectsSearch(float *lower, float *upper) {
410
0
  float *query = query_point_;
411
  // Compute the sum in higher precision.
412
0
  double total_distance = 0.0;
413
0
  double radius_squared =
414
0
      static_cast<double>(results_.max_insertable_key()) * results_.max_insertable_key();
415
0
  PARAM_DESC *dim = &tree_->KeyDesc[0];
416
417
0
  for (int i = tree_->KeySize; i > 0; i--, dim++, query++, lower++, upper++) {
418
0
    if (dim->NonEssential) {
419
0
      continue;
420
0
    }
421
422
0
    float dimension_distance;
423
0
    if (*query < *lower) {
424
0
      dimension_distance = *lower - *query;
425
0
    } else if (*query > *upper) {
426
0
      dimension_distance = *query - *upper;
427
0
    } else {
428
0
      dimension_distance = 0;
429
0
    }
430
431
    /* if this dimension is circular - check wraparound distance */
432
0
    if (dim->Circular) {
433
0
      float wrap_distance = FLT_MAX;
434
0
      if (*query < *lower) {
435
0
        wrap_distance = *query + dim->Max - dim->Min - *upper;
436
0
      } else if (*query > *upper) {
437
0
        wrap_distance = *lower - (*query - (dim->Max - dim->Min));
438
0
      }
439
0
      dimension_distance = std::min(dimension_distance, wrap_distance);
440
0
    }
441
442
0
    total_distance += static_cast<double>(dimension_distance) * dimension_distance;
443
0
    if (total_distance >= radius_squared) {
444
0
      return false;
445
0
    }
446
0
  }
447
0
  return true;
448
0
}
449
450
/*---------------------------------------------------------------------------*/
451
/**
452
 * Walk a tree, calling action once on each node.
453
 *
454
 * Operation:
455
 *   This routine walks through the specified sub_tree and invokes action
456
 *   action at each node as follows:
457
 *       action(context, data, level)
458
 *   data  the data contents of the node being visited,
459
 *   level is the level of the node in the tree with the root being level 0.
460
 * @param tree  root of the tree being walked.
461
 * @param action  action to be performed at every node
462
 * @param context  action's context
463
 * @param sub_tree  ptr to root of subtree to be walked
464
 * @param level  current level in the tree for this node
465
 */
466
0
void Walk(KDTREE *tree, kdwalk_proc action, ClusteringContext *context, KDNODE *sub_tree, int32_t level) {
467
0
  (*action)(context, sub_tree->Data, level);
468
0
  if (sub_tree->Left != nullptr) {
469
0
    Walk(tree, action, context, sub_tree->Left, NextLevel(tree, level));
470
0
  }
471
0
  if (sub_tree->Right != nullptr) {
472
0
    Walk(tree, action, context, sub_tree->Right, NextLevel(tree, level));
473
0
  }
474
0
}
475
476
/** Given a subtree nodes, insert all of its elements into tree. */
477
0
void InsertNodes(KDTREE *tree, KDNODE *nodes) {
478
0
  if (nodes == nullptr) {
479
0
    return;
480
0
  }
481
482
0
  KDStore(tree, nodes->Key, nodes->Data);
483
0
  InsertNodes(tree, nodes->Left);
484
0
  InsertNodes(tree, nodes->Right);
485
0
}
486
487
} // namespace tesseract