/src/tesseract/src/ccutil/sorthelper.h
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: sorthelper.h |
3 | | // Description: Generic sort and maxfinding class. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2010, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_CCUTIL_SORTHELPER_H_ |
20 | | #define TESSERACT_CCUTIL_SORTHELPER_H_ |
21 | | |
22 | | #include <cstdlib> |
23 | | #include <vector> |
24 | | |
25 | | namespace tesseract { |
26 | | |
27 | | // Generic class to provide functions based on a <value,count> pair. |
28 | | // T is the value type. |
29 | | // The class keeps a count of each value and can return the most frequent |
30 | | // value or a sorted array of the values with counts. |
31 | | // Note that this class uses linear search for adding. It is better |
32 | | // to use the STATS class to get the mode of a large number of values |
33 | | // in a small space. SortHelper is better to get the mode of a small number |
34 | | // of values from a large space. |
35 | | // T must have a copy constructor. |
36 | | template <typename T> |
37 | | class SortHelper { |
38 | | public: |
39 | | // Simple pair class to hold the values and counts. |
40 | | template <typename PairT> |
41 | | struct SortPair { |
42 | | PairT value; |
43 | | int count; |
44 | | }; |
45 | | // qsort function to sort by decreasing count. |
46 | | static int SortPairsByCount(const void *v1, const void *v2) { |
47 | | const auto *p1 = static_cast<const SortPair<T> *>(v1); |
48 | | const auto *p2 = static_cast<const SortPair<T> *>(v2); |
49 | | return p2->count - p1->count; |
50 | | } |
51 | | // qsort function to sort by decreasing value. |
52 | | static int SortPairsByValue(const void *v1, const void *v2) { |
53 | | const auto *p1 = static_cast<const SortPair<T> *>(v1); |
54 | | const auto *p2 = static_cast<const SortPair<T> *>(v2); |
55 | | if (p2->value - p1->value < 0) { |
56 | | return -1; |
57 | | } |
58 | | if (p2->value - p1->value > 0) { |
59 | | return 1; |
60 | | } |
61 | | return 0; |
62 | | } |
63 | | |
64 | | // Constructor takes a hint of the array size, but it need not be accurate. |
65 | 0 | explicit SortHelper(int sizehint) { |
66 | 0 | counts_.reserve(sizehint); |
67 | 0 | } |
68 | | |
69 | | // Add a value that may be a duplicate of an existing value. |
70 | | // Uses a linear search. |
71 | 0 | void Add(T value, int count) { |
72 | | // Linear search for value. |
73 | 0 | for (auto &it : counts_) { |
74 | 0 | if (it.value == value) { |
75 | 0 | it.count += count; |
76 | 0 | return; |
77 | 0 | } |
78 | 0 | } |
79 | 0 | SortPair<T> new_pair = {value, count}; |
80 | 0 | counts_.push_back(SortPair<T>(new_pair)); |
81 | 0 | } |
82 | | |
83 | | // Returns the frequency of the most frequent value. |
84 | | // If max_value is not nullptr, returns the most frequent value. |
85 | | // If the array is empty, returns -INT32_MAX and max_value is unchanged. |
86 | 0 | int MaxCount(T *max_value) const { |
87 | 0 | int best_count = -INT32_MAX; |
88 | 0 | for (auto &it : counts_) { |
89 | 0 | if (it.count > best_count) { |
90 | 0 | best_count = it.count; |
91 | 0 | if (max_value != nullptr) { |
92 | 0 | *max_value = it.value; |
93 | 0 | } |
94 | 0 | } |
95 | 0 | } |
96 | 0 | return best_count; |
97 | 0 | } |
98 | | |
99 | | // Returns the data array sorted by decreasing frequency. |
100 | | const std::vector<SortPair<T>> &SortByCount() { |
101 | | counts_.sort(&SortPairsByCount); |
102 | | return counts_; |
103 | | } |
104 | | // Returns the data array sorted by decreasing value. |
105 | | const std::vector<SortPair<T>> &SortByValue() { |
106 | | counts_.sort(&SortPairsByValue); |
107 | | return counts_; |
108 | | } |
109 | | |
110 | | private: |
111 | | std::vector<SortPair<T>> counts_; |
112 | | }; |
113 | | |
114 | | } // namespace tesseract |
115 | | |
116 | | #endif // TESSERACT_CCUTIL_SORTHELPER_H_. |