/src/tesseract/src/ccutil/helpers.h
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * |
3 | | * File: helpers.h |
4 | | * Description: General utility functions |
5 | | * Author: Daria Antonova |
6 | | * |
7 | | * (c) Copyright 2009, Google Inc. |
8 | | ** Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | ** you may not use this file except in compliance with the License. |
10 | | ** You may obtain a copy of the License at |
11 | | ** http://www.apache.org/licenses/LICENSE-2.0 |
12 | | ** Unless required by applicable law or agreed to in writing, software |
13 | | ** distributed under the License is distributed on an "AS IS" BASIS, |
14 | | ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | ** See the License for the specific language governing permissions and |
16 | | ** limitations under the License. |
17 | | * |
18 | | *****************************************************************************/ |
19 | | |
20 | | #ifndef TESSERACT_CCUTIL_HELPERS_H_ |
21 | | #define TESSERACT_CCUTIL_HELPERS_H_ |
22 | | |
23 | | #include <cassert> |
24 | | #include <climits> // for INT_MIN, INT_MAX |
25 | | #include <cmath> // std::isfinite |
26 | | #include <cstdio> |
27 | | #include <algorithm> // for std::find |
28 | | #include <string> |
29 | | #include <vector> |
30 | | |
31 | | #include "serialis.h" |
32 | | |
33 | | namespace tesseract { |
34 | | |
35 | | // Copy a std::string to a newly allocated char *. |
36 | | // TODO: Remove this function once the related code has been converted |
37 | | // to use std::string. |
38 | 187k | inline char *copy_string(const std::string &from) { |
39 | 187k | auto length = from.length(); |
40 | 187k | char *target_string = new char[length + 1]; |
41 | 187k | from.copy(target_string, length); |
42 | 187k | target_string[length] = '\0'; |
43 | 187k | return target_string; |
44 | 187k | } |
45 | | |
46 | | template <class T> |
47 | 3.62k | inline bool contains(const std::vector<T> &data, const T &value) { |
48 | 3.62k | return std::find(data.begin(), data.end(), value) != data.end(); |
49 | 3.62k | } bool tesseract::contains<tesseract::ParagraphModel const*>(std::__1::vector<tesseract::ParagraphModel const*, std::__1::allocator<tesseract::ParagraphModel const*> > const&, tesseract::ParagraphModel const* const&) Line | Count | Source | 47 | 2.66k | inline bool contains(const std::vector<T> &data, const T &value) { | 48 | 2.66k | return std::find(data.begin(), data.end(), value) != data.end(); | 49 | 2.66k | } |
bool tesseract::contains<tesseract::ParagraphModel*>(std::__1::vector<tesseract::ParagraphModel*, std::__1::allocator<tesseract::ParagraphModel*> > const&, tesseract::ParagraphModel* const&) Line | Count | Source | 47 | 509 | inline bool contains(const std::vector<T> &data, const T &value) { | 48 | 509 | return std::find(data.begin(), data.end(), value) != data.end(); | 49 | 509 | } |
bool tesseract::contains<int>(std::__1::vector<int, std::__1::allocator<int> > const&, int const&) Line | Count | Source | 47 | 444 | inline bool contains(const std::vector<T> &data, const T &value) { | 48 | 444 | return std::find(data.begin(), data.end(), value) != data.end(); | 49 | 444 | } |
|
50 | | |
51 | 76.0k | inline const std::vector<std::string> split(const std::string &s, char c) { |
52 | 76.0k | std::string buff; |
53 | 76.0k | std::vector<std::string> v; |
54 | 607k | for (auto n : s) { |
55 | 607k | if (n != c) { |
56 | 455k | buff += n; |
57 | 455k | } else if (n == c && !buff.empty()) { |
58 | 152k | v.push_back(buff); |
59 | 152k | buff.clear(); |
60 | 152k | } |
61 | 607k | } |
62 | 76.0k | if (!buff.empty()) { |
63 | 76.0k | v.push_back(buff); |
64 | 76.0k | } |
65 | 76.0k | return v; |
66 | 76.0k | } |
67 | | |
68 | | // A simple linear congruential random number generator, |
69 | | // using Knuth's constants from: |
70 | | // http://en.wikipedia.org/wiki/Linear_congruential_generator. |
71 | | class TRand { |
72 | | public: |
73 | 4 | TRand() = default; |
74 | | // Sets the seed to the given value. |
75 | 429k | void set_seed(uint64_t seed) { |
76 | 429k | seed_ = seed; |
77 | 429k | } |
78 | | |
79 | | // Returns an integer in the range 0 to INT32_MAX. |
80 | 122M | int32_t IntRand() { |
81 | 122M | Iterate(); |
82 | 122M | return seed_ >> 33; |
83 | 122M | } |
84 | | // Returns a floating point value in the range [-range, range]. |
85 | 121M | double SignedRand(double range) { |
86 | 121M | return range * 2.0 * IntRand() / INT32_MAX - range; |
87 | 121M | } |
88 | | // Returns a floating point value in the range [0, range]. |
89 | 0 | double UnsignedRand(double range) { |
90 | 0 | return range * IntRand() / INT32_MAX; |
91 | 0 | } |
92 | | |
93 | | private: |
94 | | // Steps the generator to the next value. |
95 | 122M | void Iterate() { |
96 | 122M | seed_ *= 6364136223846793005ULL; |
97 | 122M | seed_ += 1442695040888963407ULL; |
98 | 122M | } |
99 | | |
100 | | // The current value of the seed. |
101 | | uint64_t seed_{1}; |
102 | | }; |
103 | | |
104 | | // Remove newline (if any) at the end of the string. |
105 | 76.3k | inline void chomp_string(char *str) { |
106 | 76.3k | int last_index = static_cast<int>(strlen(str)) - 1; |
107 | 152k | while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) { |
108 | 76.3k | str[last_index--] = '\0'; |
109 | 76.3k | } |
110 | 76.3k | } |
111 | | |
112 | | // return the smallest multiple of block_size greater than or equal to n. |
113 | 2.11M | inline int RoundUp(int n, int block_size) { |
114 | 2.11M | return block_size * ((n + block_size - 1) / block_size); |
115 | 2.11M | } |
116 | | |
117 | | // Clip a numeric value to the interval [lower_bound, upper_bound]. |
118 | | template <typename T> |
119 | 20.0G | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { |
120 | 20.0G | if (x < lower_bound) { |
121 | 133M | return lower_bound; |
122 | 133M | } |
123 | 19.8G | if (x > upper_bound) { |
124 | 57.8M | return upper_bound; |
125 | 57.8M | } |
126 | 19.8G | return x; |
127 | 19.8G | } int tesseract::ClipToRange<int>(int const&, int const&, int const&) Line | Count | Source | 119 | 14.2G | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | 120 | 14.2G | if (x < lower_bound) { | 121 | 111M | return lower_bound; | 122 | 111M | } | 123 | 14.1G | if (x > upper_bound) { | 124 | 29.9M | return upper_bound; | 125 | 29.9M | } | 126 | 14.1G | return x; | 127 | 14.1G | } |
float tesseract::ClipToRange<float>(float const&, float const&, float const&) Line | Count | Source | 119 | 5.28G | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | 120 | 5.28G | if (x < lower_bound) { | 121 | 10.8k | return lower_bound; | 122 | 10.8k | } | 123 | 5.28G | if (x > upper_bound) { | 124 | 34.3k | return upper_bound; | 125 | 34.3k | } | 126 | 5.28G | return x; | 127 | 5.28G | } |
unsigned int tesseract::ClipToRange<unsigned int>(unsigned int const&, unsigned int const&, unsigned int const&) Line | Count | Source | 119 | 3.76k | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | 120 | 3.76k | if (x < lower_bound) { | 121 | 0 | return lower_bound; | 122 | 0 | } | 123 | 3.76k | if (x > upper_bound) { | 124 | 0 | return upper_bound; | 125 | 0 | } | 126 | 3.76k | return x; | 127 | 3.76k | } |
double tesseract::ClipToRange<double>(double const&, double const&, double const&) Line | Count | Source | 119 | 5.43M | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | 120 | 5.43M | if (x < lower_bound) { | 121 | 721k | return lower_bound; | 122 | 721k | } | 123 | 4.71M | if (x > upper_bound) { | 124 | 2.41M | return upper_bound; | 125 | 2.41M | } | 126 | 2.30M | return x; | 127 | 4.71M | } |
short tesseract::ClipToRange<short>(short const&, short const&, short const&) Line | Count | Source | 119 | 458M | inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) { | 120 | 458M | if (x < lower_bound) { | 121 | 21.3M | return lower_bound; | 122 | 21.3M | } | 123 | 436M | if (x > upper_bound) { | 124 | 25.4M | return upper_bound; | 125 | 25.4M | } | 126 | 411M | return x; | 127 | 436M | } |
|
128 | | |
129 | | // Extend the range [lower_bound, upper_bound] to include x. |
130 | | template <typename T1, typename T2> |
131 | 147M | inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { |
132 | 147M | if (x < *lower_bound) { |
133 | 15.1M | *lower_bound = x; |
134 | 15.1M | } |
135 | 147M | if (x > *upper_bound) { |
136 | 28.0M | *upper_bound = x; |
137 | 28.0M | } |
138 | 147M | } void tesseract::UpdateRange<int, int>(int const&, int*, int*) Line | Count | Source | 131 | 17.6M | inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { | 132 | 17.6M | if (x < *lower_bound) { | 133 | 4.08M | *lower_bound = x; | 134 | 4.08M | } | 135 | 17.6M | if (x > *upper_bound) { | 136 | 6.76M | *upper_bound = x; | 137 | 6.76M | } | 138 | 17.6M | } |
void tesseract::UpdateRange<short, float>(short const&, float*, float*) Line | Count | Source | 131 | 127M | inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { | 132 | 127M | if (x < *lower_bound) { | 133 | 10.6M | *lower_bound = x; | 134 | 10.6M | } | 135 | 127M | if (x > *upper_bound) { | 136 | 20.6M | *upper_bound = x; | 137 | 20.6M | } | 138 | 127M | } |
Unexecuted instantiation: void tesseract::UpdateRange<short, int>(short const&, int*, int*) void tesseract::UpdateRange<unsigned char, unsigned char>(unsigned char const&, unsigned char*, unsigned char*) Line | Count | Source | 131 | 3.85k | inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { | 132 | 3.85k | if (x < *lower_bound) { | 133 | 912 | *lower_bound = x; | 134 | 912 | } | 135 | 3.85k | if (x > *upper_bound) { | 136 | 1.70k | *upper_bound = x; | 137 | 1.70k | } | 138 | 3.85k | } |
void tesseract::UpdateRange<double, double>(double const&, double*, double*) Line | Count | Source | 131 | 2.46M | inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) { | 132 | 2.46M | if (x < *lower_bound) { | 133 | 367k | *lower_bound = x; | 134 | 367k | } | 135 | 2.46M | if (x > *upper_bound) { | 136 | 599k | *upper_bound = x; | 137 | 599k | } | 138 | 2.46M | } |
|
139 | | |
140 | | // Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi. |
141 | | template <typename T1, typename T2> |
142 | 3.11M | inline void UpdateRange(const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound) { |
143 | 3.11M | if (x_lo < *lower_bound) { |
144 | 373k | *lower_bound = x_lo; |
145 | 373k | } |
146 | 3.11M | if (x_hi > *upper_bound) { |
147 | 379k | *upper_bound = x_hi; |
148 | 379k | } |
149 | 3.11M | } |
150 | | |
151 | | // Intersect the range [*lower2, *upper2] with the range [lower1, upper1], |
152 | | // putting the result back in [*lower2, *upper2]. |
153 | | // If non-intersecting ranges are given, we end up with *lower2 > *upper2. |
154 | | template <typename T> |
155 | 14.5M | inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2) { |
156 | 14.5M | if (lower1 > *lower2) { |
157 | 7.23M | *lower2 = lower1; |
158 | 7.23M | } |
159 | 14.5M | if (upper1 < *upper2) { |
160 | 7.61M | *upper2 = upper1; |
161 | 7.61M | } |
162 | 14.5M | } |
163 | | |
164 | | // Proper modulo arithmetic operator. Returns a mod b that works for -ve a. |
165 | | // For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for |
166 | | // some integer n. |
167 | 348M | inline int Modulo(int a, int b) { |
168 | 348M | return (a % b + b) % b; |
169 | 348M | } |
170 | | |
171 | | // Integer division operator with rounding that works for negative input. |
172 | | // Returns a divided by b, rounded to the nearest integer, without double |
173 | | // counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0, |
174 | | // -3/3 = 0 and -4/3 = -1. |
175 | | // I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1. |
176 | 70.5k | inline int DivRounded(int a, int b) { |
177 | 70.5k | if (b < 0) { |
178 | 0 | return -DivRounded(a, -b); |
179 | 0 | } |
180 | 70.5k | return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b; |
181 | 70.5k | } |
182 | | |
183 | | // Return a double cast to int with rounding. |
184 | 309M | inline int IntCastRounded(double x) { |
185 | 309M | assert(std::isfinite(x)); |
186 | 309M | assert(x < INT_MAX); |
187 | 309M | assert(x > INT_MIN); |
188 | 309M | return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5); |
189 | 309M | } |
190 | | |
191 | | // Return a float cast to int with rounding. |
192 | 13.9G | inline int IntCastRounded(float x) { |
193 | 13.9G | assert(std::isfinite(x)); |
194 | 13.9G | return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F); |
195 | 13.9G | } |
196 | | |
197 | | // Reverse the order of bytes in a n byte quantity for big/little-endian switch. |
198 | 0 | inline void ReverseN(void *ptr, int num_bytes) { |
199 | 0 | assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8); |
200 | 0 | char *cptr = static_cast<char *>(ptr); |
201 | 0 | int halfsize = num_bytes / 2; |
202 | 0 | for (int i = 0; i < halfsize; ++i) { |
203 | 0 | char tmp = cptr[i]; |
204 | 0 | cptr[i] = cptr[num_bytes - 1 - i]; |
205 | 0 | cptr[num_bytes - 1 - i] = tmp; |
206 | 0 | } |
207 | 0 | } |
208 | | |
209 | | // Reverse the order of bytes in a 32 bit quantity for big/little-endian switch. |
210 | 0 | inline void Reverse32(void *ptr) { |
211 | 0 | ReverseN(ptr, 4); |
212 | 0 | } |
213 | | |
214 | | // Reads a vector of simple types from the given file. Assumes that bitwise |
215 | | // read/write will work with ReverseN according to sizeof(T). |
216 | | // Returns false in case of error. |
217 | | // If swap is true, assumes a big/little-endian swap is needed. |
218 | | template <typename T> |
219 | | bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) { |
220 | | uint32_t size; |
221 | | if (fread(&size, sizeof(size), 1, fp) != 1) { |
222 | | return false; |
223 | | } |
224 | | if (swap) { |
225 | | Reverse32(&size); |
226 | | } |
227 | | // Arbitrarily limit the number of elements to protect against bad data. |
228 | | assert(size <= UINT16_MAX); |
229 | | if (size > UINT16_MAX) { |
230 | | return false; |
231 | | } |
232 | | // TODO: optimize. |
233 | | data.resize(size); |
234 | | if (size > 0) { |
235 | | if (fread(&data[0], sizeof(T), size, fp) != size) { |
236 | | return false; |
237 | | } |
238 | | if (swap) { |
239 | | for (uint32_t i = 0; i < size; ++i) { |
240 | | ReverseN(&data[i], sizeof(T)); |
241 | | } |
242 | | } |
243 | | } |
244 | | return true; |
245 | | } |
246 | | |
247 | | // Writes a vector of simple types to the given file. Assumes that bitwise |
248 | | // read/write of T will work. Returns false in case of error. |
249 | | template <typename T> |
250 | 0 | bool Serialize(FILE *fp, const std::vector<T> &data) { |
251 | 0 | uint32_t size = data.size(); |
252 | 0 | if (fwrite(&size, sizeof(size), 1, fp) != 1) { |
253 | 0 | return false; |
254 | 0 | } else if constexpr (std::is_class<T>::value) { |
255 | | // Serialize a tesseract class. |
256 | 0 | for (auto &item : data) { |
257 | 0 | if (!item.Serialize(fp)) { |
258 | 0 | return false; |
259 | 0 | } |
260 | 0 | } |
261 | 0 | } else if constexpr (std::is_pointer<T>::value) { |
262 | | // Serialize pointers. |
263 | 0 | for (auto &item : data) { |
264 | 0 | uint8_t non_null = (item != nullptr); |
265 | 0 | if (!Serialize(fp, &non_null)) { |
266 | 0 | return false; |
267 | 0 | } |
268 | 0 | if (non_null) { |
269 | 0 | if (!item->Serialize(fp)) { |
270 | 0 | return false; |
271 | 0 | } |
272 | 0 | } |
273 | 0 | } |
274 | 0 | } else if (size > 0) { |
275 | 0 | if (fwrite(&data[0], sizeof(T), size, fp) != size) { |
276 | 0 | return false; |
277 | 0 | } |
278 | 0 | } |
279 | 0 | return true; |
280 | 0 | } Unexecuted instantiation: bool tesseract::Serialize<int>(_IO_FILE*, std::__1::vector<int, std::__1::allocator<int> > const&) Unexecuted instantiation: bool tesseract::Serialize<tesseract::UnicharAndFonts>(_IO_FILE*, std::__1::vector<tesseract::UnicharAndFonts, std::__1::allocator<tesseract::UnicharAndFonts> > const&) Unexecuted instantiation: bool tesseract::Serialize<tesseract::Shape*>(_IO_FILE*, std::__1::vector<tesseract::Shape*, std::__1::allocator<tesseract::Shape*> > const&) Unexecuted instantiation: bool tesseract::Serialize<short>(_IO_FILE*, std::__1::vector<short, std::__1::allocator<short> > const&) |
281 | | |
282 | | } // namespace tesseract |
283 | | |
284 | | #endif // TESSERACT_CCUTIL_HELPERS_H_ |