Coverage Report

Created: 2025-09-27 07:10

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccutil/sorthelper.h
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        sorthelper.h
3
// Description: Generic sort and maxfinding class.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2010, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifndef TESSERACT_CCUTIL_SORTHELPER_H_
20
#define TESSERACT_CCUTIL_SORTHELPER_H_
21
22
#include <cstdlib>
23
#include <vector>
24
25
namespace tesseract {
26
27
// Generic class to provide functions based on a <value,count> pair.
28
// T is the value type.
29
// The class keeps a count of each value and can return the most frequent
30
// value or a sorted array of the values with counts.
31
// Note that this class uses linear search for adding. It is better
32
// to use the STATS class to get the mode of a large number of values
33
// in a small space. SortHelper is better to get the mode of a small number
34
// of values from a large space.
35
// T must have a copy constructor.
36
template <typename T>
37
class SortHelper {
38
public:
39
  // Simple pair class to hold the values and counts.
40
  template <typename PairT>
41
  struct SortPair {
42
    PairT value;
43
    int count;
44
  };
45
  // qsort function to sort by decreasing count.
46
  static int SortPairsByCount(const void *v1, const void *v2) {
47
    const auto *p1 = static_cast<const SortPair<T> *>(v1);
48
    const auto *p2 = static_cast<const SortPair<T> *>(v2);
49
    return p2->count - p1->count;
50
  }
51
  // qsort function to sort by decreasing value.
52
  static int SortPairsByValue(const void *v1, const void *v2) {
53
    const auto *p1 = static_cast<const SortPair<T> *>(v1);
54
    const auto *p2 = static_cast<const SortPair<T> *>(v2);
55
    if (p2->value - p1->value < 0) {
56
      return -1;
57
    }
58
    if (p2->value - p1->value > 0) {
59
      return 1;
60
    }
61
    return 0;
62
  }
63
64
  // Constructor takes a hint of the array size, but it need not be accurate.
65
0
  explicit SortHelper(int sizehint) {
66
0
    counts_.reserve(sizehint);
67
0
  }
68
69
  // Add a value that may be a duplicate of an existing value.
70
  // Uses a linear search.
71
0
  void Add(T value, int count) {
72
    // Linear search for value.
73
0
    for (auto &it : counts_) {
74
0
      if (it.value == value) {
75
0
        it.count += count;
76
0
        return;
77
0
      }
78
0
    }
79
0
    SortPair<T> new_pair = {value, count};
80
0
    counts_.push_back(SortPair<T>(new_pair));
81
0
  }
82
83
  // Returns the frequency of the most frequent value.
84
  // If max_value is not nullptr, returns the most frequent value.
85
  // If the array is empty, returns -INT32_MAX and max_value is unchanged.
86
0
  int MaxCount(T *max_value) const {
87
0
    int best_count = -INT32_MAX;
88
0
    for (auto &it : counts_) {
89
0
      if (it.count > best_count) {
90
0
        best_count = it.count;
91
0
        if (max_value != nullptr) {
92
0
          *max_value = it.value;
93
0
        }
94
0
      }
95
0
    }
96
0
    return best_count;
97
0
  }
98
99
  // Returns the data array sorted by decreasing frequency.
100
  const std::vector<SortPair<T>> &SortByCount() {
101
    counts_.sort(&SortPairsByCount);
102
    return counts_;
103
  }
104
  // Returns the data array sorted by decreasing value.
105
  const std::vector<SortPair<T>> &SortByValue() {
106
    counts_.sort(&SortPairsByValue);
107
    return counts_;
108
  }
109
110
private:
111
  std::vector<SortPair<T>> counts_;
112
};
113
114
} // namespace tesseract
115
116
#endif // TESSERACT_CCUTIL_SORTHELPER_H_.