Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/ccutil/helpers.h
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 *
3
 * File:         helpers.h
4
 * Description:  General utility functions
5
 * Author:       Daria Antonova
6
 *
7
 * (c) Copyright 2009, Google Inc.
8
 ** Licensed under the Apache License, Version 2.0 (the "License");
9
 ** you may not use this file except in compliance with the License.
10
 ** You may obtain a copy of the License at
11
 ** http://www.apache.org/licenses/LICENSE-2.0
12
 ** Unless required by applicable law or agreed to in writing, software
13
 ** distributed under the License is distributed on an "AS IS" BASIS,
14
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 ** See the License for the specific language governing permissions and
16
 ** limitations under the License.
17
 *
18
 *****************************************************************************/
19
20
#ifndef TESSERACT_CCUTIL_HELPERS_H_
21
#define TESSERACT_CCUTIL_HELPERS_H_
22
23
#include <cassert>
24
#include <climits> // for INT_MIN, INT_MAX
25
#include <cmath> // std::isfinite
26
#include <cstdio>
27
#include <algorithm>  // for std::find
28
#include <string>
29
#include <vector>
30
31
#include "serialis.h"
32
33
namespace tesseract {
34
35
// Copy a std::string to a newly allocated char *.
36
// TODO: Remove this function once the related code has been converted
37
// to use std::string.
38
187k
inline char *copy_string(const std::string &from) {
39
187k
  auto length = from.length();
40
187k
  char *target_string = new char[length + 1];
41
187k
  from.copy(target_string, length);
42
187k
  target_string[length] = '\0';
43
187k
  return target_string;
44
187k
}
45
46
template <class T>
47
3.62k
inline bool contains(const std::vector<T> &data, const T &value) {
48
3.62k
  return std::find(data.begin(), data.end(), value) != data.end();
49
3.62k
}
bool tesseract::contains<tesseract::ParagraphModel const*>(std::__1::vector<tesseract::ParagraphModel const*, std::__1::allocator<tesseract::ParagraphModel const*> > const&, tesseract::ParagraphModel const* const&)
Line
Count
Source
47
2.66k
inline bool contains(const std::vector<T> &data, const T &value) {
48
2.66k
  return std::find(data.begin(), data.end(), value) != data.end();
49
2.66k
}
bool tesseract::contains<tesseract::ParagraphModel*>(std::__1::vector<tesseract::ParagraphModel*, std::__1::allocator<tesseract::ParagraphModel*> > const&, tesseract::ParagraphModel* const&)
Line
Count
Source
47
509
inline bool contains(const std::vector<T> &data, const T &value) {
48
509
  return std::find(data.begin(), data.end(), value) != data.end();
49
509
}
bool tesseract::contains<int>(std::__1::vector<int, std::__1::allocator<int> > const&, int const&)
Line
Count
Source
47
444
inline bool contains(const std::vector<T> &data, const T &value) {
48
444
  return std::find(data.begin(), data.end(), value) != data.end();
49
444
}
50
51
76.0k
inline const std::vector<std::string> split(const std::string &s, char c) {
52
76.0k
  std::string buff;
53
76.0k
  std::vector<std::string> v;
54
607k
  for (auto n : s) {
55
607k
    if (n != c) {
56
455k
      buff += n;
57
455k
    } else if (n == c && !buff.empty()) {
58
152k
      v.push_back(buff);
59
152k
      buff.clear();
60
152k
    }
61
607k
  }
62
76.0k
  if (!buff.empty()) {
63
76.0k
    v.push_back(buff);
64
76.0k
  }
65
76.0k
  return v;
66
76.0k
}
67
68
// A simple linear congruential random number generator,
69
// using Knuth's constants from:
70
// http://en.wikipedia.org/wiki/Linear_congruential_generator.
71
class TRand {
72
public:
73
4
  TRand() = default;
74
  // Sets the seed to the given value.
75
429k
  void set_seed(uint64_t seed) {
76
429k
    seed_ = seed;
77
429k
  }
78
79
  // Returns an integer in the range 0 to INT32_MAX.
80
122M
  int32_t IntRand() {
81
122M
    Iterate();
82
122M
    return seed_ >> 33;
83
122M
  }
84
  // Returns a floating point value in the range [-range, range].
85
121M
  double SignedRand(double range) {
86
121M
    return range * 2.0 * IntRand() / INT32_MAX - range;
87
121M
  }
88
  // Returns a floating point value in the range [0, range].
89
0
  double UnsignedRand(double range) {
90
0
    return range * IntRand() / INT32_MAX;
91
0
  }
92
93
private:
94
  // Steps the generator to the next value.
95
122M
  void Iterate() {
96
122M
    seed_ *= 6364136223846793005ULL;
97
122M
    seed_ += 1442695040888963407ULL;
98
122M
  }
99
100
  // The current value of the seed.
101
  uint64_t seed_{1};
102
};
103
104
// Remove newline (if any) at the end of the string.
105
76.3k
inline void chomp_string(char *str) {
106
76.3k
  int last_index = static_cast<int>(strlen(str)) - 1;
107
152k
  while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) {
108
76.3k
    str[last_index--] = '\0';
109
76.3k
  }
110
76.3k
}
111
112
// return the smallest multiple of block_size greater than or equal to n.
113
2.11M
inline int RoundUp(int n, int block_size) {
114
2.11M
  return block_size * ((n + block_size - 1) / block_size);
115
2.11M
}
116
117
// Clip a numeric value to the interval [lower_bound, upper_bound].
118
template <typename T>
119
20.0G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
20.0G
  if (x < lower_bound) {
121
133M
    return lower_bound;
122
133M
  }
123
19.8G
  if (x > upper_bound) {
124
57.8M
    return upper_bound;
125
57.8M
  }
126
19.8G
  return x;
127
19.8G
}
int tesseract::ClipToRange<int>(int const&, int const&, int const&)
Line
Count
Source
119
14.2G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
14.2G
  if (x < lower_bound) {
121
111M
    return lower_bound;
122
111M
  }
123
14.1G
  if (x > upper_bound) {
124
29.9M
    return upper_bound;
125
29.9M
  }
126
14.1G
  return x;
127
14.1G
}
float tesseract::ClipToRange<float>(float const&, float const&, float const&)
Line
Count
Source
119
5.28G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
5.28G
  if (x < lower_bound) {
121
10.8k
    return lower_bound;
122
10.8k
  }
123
5.28G
  if (x > upper_bound) {
124
34.3k
    return upper_bound;
125
34.3k
  }
126
5.28G
  return x;
127
5.28G
}
unsigned int tesseract::ClipToRange<unsigned int>(unsigned int const&, unsigned int const&, unsigned int const&)
Line
Count
Source
119
3.76k
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
3.76k
  if (x < lower_bound) {
121
0
    return lower_bound;
122
0
  }
123
3.76k
  if (x > upper_bound) {
124
0
    return upper_bound;
125
0
  }
126
3.76k
  return x;
127
3.76k
}
double tesseract::ClipToRange<double>(double const&, double const&, double const&)
Line
Count
Source
119
5.43M
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
5.43M
  if (x < lower_bound) {
121
721k
    return lower_bound;
122
721k
  }
123
4.71M
  if (x > upper_bound) {
124
2.41M
    return upper_bound;
125
2.41M
  }
126
2.30M
  return x;
127
4.71M
}
short tesseract::ClipToRange<short>(short const&, short const&, short const&)
Line
Count
Source
119
458M
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
458M
  if (x < lower_bound) {
121
21.3M
    return lower_bound;
122
21.3M
  }
123
436M
  if (x > upper_bound) {
124
25.4M
    return upper_bound;
125
25.4M
  }
126
411M
  return x;
127
436M
}
128
129
// Extend the range [lower_bound, upper_bound] to include x.
130
template <typename T1, typename T2>
131
147M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
147M
  if (x < *lower_bound) {
133
15.1M
    *lower_bound = x;
134
15.1M
  }
135
147M
  if (x > *upper_bound) {
136
28.0M
    *upper_bound = x;
137
28.0M
  }
138
147M
}
void tesseract::UpdateRange<int, int>(int const&, int*, int*)
Line
Count
Source
131
17.6M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
17.6M
  if (x < *lower_bound) {
133
4.08M
    *lower_bound = x;
134
4.08M
  }
135
17.6M
  if (x > *upper_bound) {
136
6.76M
    *upper_bound = x;
137
6.76M
  }
138
17.6M
}
void tesseract::UpdateRange<short, float>(short const&, float*, float*)
Line
Count
Source
131
127M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
127M
  if (x < *lower_bound) {
133
10.6M
    *lower_bound = x;
134
10.6M
  }
135
127M
  if (x > *upper_bound) {
136
20.6M
    *upper_bound = x;
137
20.6M
  }
138
127M
}
Unexecuted instantiation: void tesseract::UpdateRange<short, int>(short const&, int*, int*)
void tesseract::UpdateRange<unsigned char, unsigned char>(unsigned char const&, unsigned char*, unsigned char*)
Line
Count
Source
131
3.85k
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
3.85k
  if (x < *lower_bound) {
133
912
    *lower_bound = x;
134
912
  }
135
3.85k
  if (x > *upper_bound) {
136
1.70k
    *upper_bound = x;
137
1.70k
  }
138
3.85k
}
void tesseract::UpdateRange<double, double>(double const&, double*, double*)
Line
Count
Source
131
2.46M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
2.46M
  if (x < *lower_bound) {
133
367k
    *lower_bound = x;
134
367k
  }
135
2.46M
  if (x > *upper_bound) {
136
599k
    *upper_bound = x;
137
599k
  }
138
2.46M
}
139
140
// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
141
template <typename T1, typename T2>
142
3.11M
inline void UpdateRange(const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound) {
143
3.11M
  if (x_lo < *lower_bound) {
144
373k
    *lower_bound = x_lo;
145
373k
  }
146
3.11M
  if (x_hi > *upper_bound) {
147
379k
    *upper_bound = x_hi;
148
379k
  }
149
3.11M
}
150
151
// Intersect the range [*lower2, *upper2] with the range [lower1, upper1],
152
// putting the result back in [*lower2, *upper2].
153
// If non-intersecting ranges are given, we end up with *lower2 > *upper2.
154
template <typename T>
155
14.5M
inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2) {
156
14.5M
  if (lower1 > *lower2) {
157
7.23M
    *lower2 = lower1;
158
7.23M
  }
159
14.5M
  if (upper1 < *upper2) {
160
7.61M
    *upper2 = upper1;
161
7.61M
  }
162
14.5M
}
163
164
// Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
165
// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
166
// some integer n.
167
348M
inline int Modulo(int a, int b) {
168
348M
  return (a % b + b) % b;
169
348M
}
170
171
// Integer division operator with rounding that works for negative input.
172
// Returns a divided by b, rounded to the nearest integer, without double
173
// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
174
// -3/3 = 0 and -4/3 = -1.
175
// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
176
70.5k
inline int DivRounded(int a, int b) {
177
70.5k
  if (b < 0) {
178
0
    return -DivRounded(a, -b);
179
0
  }
180
70.5k
  return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
181
70.5k
}
182
183
// Return a double cast to int with rounding.
184
309M
inline int IntCastRounded(double x) {
185
309M
  assert(std::isfinite(x));
186
309M
  assert(x < INT_MAX);
187
309M
  assert(x > INT_MIN);
188
309M
  return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
189
309M
}
190
191
// Return a float cast to int with rounding.
192
13.9G
inline int IntCastRounded(float x) {
193
13.9G
  assert(std::isfinite(x));
194
13.9G
  return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F);
195
13.9G
}
196
197
// Reverse the order of bytes in a n byte quantity for big/little-endian switch.
198
0
inline void ReverseN(void *ptr, int num_bytes) {
199
0
  assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8);
200
0
  char *cptr = static_cast<char *>(ptr);
201
0
  int halfsize = num_bytes / 2;
202
0
  for (int i = 0; i < halfsize; ++i) {
203
0
    char tmp = cptr[i];
204
0
    cptr[i] = cptr[num_bytes - 1 - i];
205
0
    cptr[num_bytes - 1 - i] = tmp;
206
0
  }
207
0
}
208
209
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
210
0
inline void Reverse32(void *ptr) {
211
0
  ReverseN(ptr, 4);
212
0
}
213
214
// Reads a vector of simple types from the given file. Assumes that bitwise
215
// read/write will work with ReverseN according to sizeof(T).
216
// Returns false in case of error.
217
// If swap is true, assumes a big/little-endian swap is needed.
218
template <typename T>
219
bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) {
220
  uint32_t size;
221
  if (fread(&size, sizeof(size), 1, fp) != 1) {
222
    return false;
223
  }
224
  if (swap) {
225
    Reverse32(&size);
226
  }
227
  // Arbitrarily limit the number of elements to protect against bad data.
228
  assert(size <= UINT16_MAX);
229
  if (size > UINT16_MAX) {
230
    return false;
231
  }
232
  // TODO: optimize.
233
  data.resize(size);
234
  if (size > 0) {
235
    if (fread(&data[0], sizeof(T), size, fp) != size) {
236
      return false;
237
    }
238
    if (swap) {
239
      for (uint32_t i = 0; i < size; ++i) {
240
        ReverseN(&data[i], sizeof(T));
241
      }
242
    }
243
  }
244
  return true;
245
}
246
247
// Writes a vector of simple types to the given file. Assumes that bitwise
248
// read/write of T will work. Returns false in case of error.
249
template <typename T>
250
0
bool Serialize(FILE *fp, const std::vector<T> &data) {
251
0
  uint32_t size = data.size();
252
0
  if (fwrite(&size, sizeof(size), 1, fp) != 1) {
253
0
    return false;
254
0
  } else if constexpr (std::is_class<T>::value) {
255
    // Serialize a tesseract class.
256
0
    for (auto &item : data) {
257
0
      if (!item.Serialize(fp)) {
258
0
        return false;
259
0
      }
260
0
    }
261
0
  } else if constexpr (std::is_pointer<T>::value) {
262
    // Serialize pointers.
263
0
    for (auto &item : data) {
264
0
      uint8_t non_null = (item != nullptr);
265
0
      if (!Serialize(fp, &non_null)) {
266
0
        return false;
267
0
      }
268
0
      if (non_null) {
269
0
        if (!item->Serialize(fp)) {
270
0
          return false;
271
0
        }
272
0
      }
273
0
    }
274
0
  } else if (size > 0) {
275
0
    if (fwrite(&data[0], sizeof(T), size, fp) != size) {
276
0
      return false;
277
0
    }
278
0
  }
279
0
  return true;
280
0
}
Unexecuted instantiation: bool tesseract::Serialize<int>(_IO_FILE*, std::__1::vector<int, std::__1::allocator<int> > const&)
Unexecuted instantiation: bool tesseract::Serialize<tesseract::UnicharAndFonts>(_IO_FILE*, std::__1::vector<tesseract::UnicharAndFonts, std::__1::allocator<tesseract::UnicharAndFonts> > const&)
Unexecuted instantiation: bool tesseract::Serialize<tesseract::Shape*>(_IO_FILE*, std::__1::vector<tesseract::Shape*, std::__1::allocator<tesseract::Shape*> > const&)
Unexecuted instantiation: bool tesseract::Serialize<short>(_IO_FILE*, std::__1::vector<short, std::__1::allocator<short> > const&)
281
282
} // namespace tesseract
283
284
#endif // TESSERACT_CCUTIL_HELPERS_H_