Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccutil/helpers.h
Line
Count
Source
1
/******************************************************************************
2
 *
3
 * File:         helpers.h
4
 * Description:  General utility functions
5
 * Author:       Daria Antonova
6
 *
7
 * (c) Copyright 2009, Google Inc.
8
 ** Licensed under the Apache License, Version 2.0 (the "License");
9
 ** you may not use this file except in compliance with the License.
10
 ** You may obtain a copy of the License at
11
 ** http://www.apache.org/licenses/LICENSE-2.0
12
 ** Unless required by applicable law or agreed to in writing, software
13
 ** distributed under the License is distributed on an "AS IS" BASIS,
14
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
 ** See the License for the specific language governing permissions and
16
 ** limitations under the License.
17
 *
18
 *****************************************************************************/
19
20
#ifndef TESSERACT_CCUTIL_HELPERS_H_
21
#define TESSERACT_CCUTIL_HELPERS_H_
22
23
#include <cassert>
24
#include <climits> // for INT_MIN, INT_MAX
25
#include <cmath> // std::isfinite
26
#include <cstdio>
27
#include <algorithm>  // for std::find
28
#include <string>
29
#include <vector>
30
31
#include "serialis.h"
32
33
namespace tesseract {
34
35
// Copy a std::string to a newly allocated char *.
36
// TODO: Remove this function once the related code has been converted
37
// to use std::string.
38
173k
inline char *copy_string(const std::string &from) {
39
173k
  auto length = from.length();
40
173k
  char *target_string = new char[length + 1];
41
173k
  from.copy(target_string, length);
42
173k
  target_string[length] = '\0';
43
173k
  return target_string;
44
173k
}
45
46
template <class T>
47
3.48k
inline bool contains(const std::vector<T> &data, const T &value) {
48
3.48k
  return std::find(data.begin(), data.end(), value) != data.end();
49
3.48k
}
bool tesseract::contains<tesseract::ParagraphModel const*>(std::__1::vector<tesseract::ParagraphModel const*, std::__1::allocator<tesseract::ParagraphModel const*> > const&, tesseract::ParagraphModel const* const&)
Line
Count
Source
47
2.55k
inline bool contains(const std::vector<T> &data, const T &value) {
48
2.55k
  return std::find(data.begin(), data.end(), value) != data.end();
49
2.55k
}
bool tesseract::contains<tesseract::ParagraphModel*>(std::__1::vector<tesseract::ParagraphModel*, std::__1::allocator<tesseract::ParagraphModel*> > const&, tesseract::ParagraphModel* const&)
Line
Count
Source
47
490
inline bool contains(const std::vector<T> &data, const T &value) {
48
490
  return std::find(data.begin(), data.end(), value) != data.end();
49
490
}
bool tesseract::contains<int>(std::__1::vector<int, std::__1::allocator<int> > const&, int const&)
Line
Count
Source
47
444
inline bool contains(const std::vector<T> &data, const T &value) {
48
444
  return std::find(data.begin(), data.end(), value) != data.end();
49
444
}
50
51
76.0k
inline const std::vector<std::string> split(const std::string &s, char c) {
52
76.0k
  std::string buff;
53
76.0k
  std::vector<std::string> v;
54
607k
  for (auto n : s) {
55
607k
    if (n != c) {
56
455k
      buff += n;
57
455k
    } else if (n == c && !buff.empty()) {
58
152k
      v.push_back(buff);
59
152k
      buff.clear();
60
152k
    }
61
607k
  }
62
76.0k
  if (!buff.empty()) {
63
76.0k
    v.push_back(buff);
64
76.0k
  }
65
76.0k
  return v;
66
76.0k
}
67
68
// A simple linear congruential random number generator,
69
// using Knuth's constants from:
70
// http://en.wikipedia.org/wiki/Linear_congruential_generator.
71
class TRand {
72
public:
73
4
  TRand() = default;
74
  // Sets the seed to the given value.
75
396k
  void set_seed(uint64_t seed) {
76
396k
    seed_ = seed;
77
396k
  }
78
79
  // Returns an integer in the range 0 to INT32_MAX.
80
111M
  int32_t IntRand() {
81
111M
    Iterate();
82
111M
    return seed_ >> 33;
83
111M
  }
84
  // Returns a floating point value in the range [-range, range].
85
111M
  double SignedRand(double range) {
86
111M
    return range * 2.0 * IntRand() / INT32_MAX - range;
87
111M
  }
88
  // Returns a floating point value in the range [0, range].
89
0
  double UnsignedRand(double range) {
90
0
    return range * IntRand() / INT32_MAX;
91
0
  }
92
93
private:
94
  // Steps the generator to the next value.
95
111M
  void Iterate() {
96
111M
    seed_ *= 6364136223846793005ULL;
97
111M
    seed_ += 1442695040888963407ULL;
98
111M
  }
99
100
  // The current value of the seed.
101
  uint64_t seed_{1};
102
};
103
104
// Remove newline (if any) at the end of the string.
105
76.3k
inline void chomp_string(char *str) {
106
76.3k
  int last_index = static_cast<int>(strlen(str)) - 1;
107
152k
  while (last_index >= 0 && (str[last_index] == '\n' || str[last_index] == '\r')) {
108
76.3k
    str[last_index--] = '\0';
109
76.3k
  }
110
76.3k
}
111
112
// return the smallest multiple of block_size greater than or equal to n.
113
2.87M
inline int RoundUp(int n, int block_size) {
114
2.87M
  return block_size * ((n + block_size - 1) / block_size);
115
2.87M
}
116
117
// Clip a numeric value to the interval [lower_bound, upper_bound].
118
template <typename T>
119
18.1G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
18.1G
  if (x < lower_bound) {
121
121M
    return lower_bound;
122
121M
  }
123
18.0G
  if (x > upper_bound) {
124
53.4M
    return upper_bound;
125
53.4M
  }
126
17.9G
  return x;
127
18.0G
}
int tesseract::ClipToRange<int>(int const&, int const&, int const&)
Line
Count
Source
119
12.9G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
12.9G
  if (x < lower_bound) {
121
100M
    return lower_bound;
122
100M
  }
123
12.8G
  if (x > upper_bound) {
124
26.8M
    return upper_bound;
125
26.8M
  }
126
12.7G
  return x;
127
12.8G
}
float tesseract::ClipToRange<float>(float const&, float const&, float const&)
Line
Count
Source
119
4.78G
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
4.78G
  if (x < lower_bound) {
121
11.5k
    return lower_bound;
122
11.5k
  }
123
4.78G
  if (x > upper_bound) {
124
25.8k
    return upper_bound;
125
25.8k
  }
126
4.78G
  return x;
127
4.78G
}
unsigned int tesseract::ClipToRange<unsigned int>(unsigned int const&, unsigned int const&, unsigned int const&)
Line
Count
Source
119
2.11k
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
2.11k
  if (x < lower_bound) {
121
0
    return lower_bound;
122
0
  }
123
2.11k
  if (x > upper_bound) {
124
0
    return upper_bound;
125
0
  }
126
2.11k
  return x;
127
2.11k
}
double tesseract::ClipToRange<double>(double const&, double const&, double const&)
Line
Count
Source
119
4.46M
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
4.46M
  if (x < lower_bound) {
121
671k
    return lower_bound;
122
671k
  }
123
3.78M
  if (x > upper_bound) {
124
1.63M
    return upper_bound;
125
1.63M
  }
126
2.15M
  return x;
127
3.78M
}
short tesseract::ClipToRange<short>(short const&, short const&, short const&)
Line
Count
Source
119
426M
inline T ClipToRange(const T &x, const T &lower_bound, const T &upper_bound) {
120
426M
  if (x < lower_bound) {
121
20.5M
    return lower_bound;
122
20.5M
  }
123
406M
  if (x > upper_bound) {
124
24.9M
    return upper_bound;
125
24.9M
  }
126
381M
  return x;
127
406M
}
128
129
// Extend the range [lower_bound, upper_bound] to include x.
130
template <typename T1, typename T2>
131
138M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
138M
  if (x < *lower_bound) {
133
14.1M
    *lower_bound = x;
134
14.1M
  }
135
138M
  if (x > *upper_bound) {
136
26.0M
    *upper_bound = x;
137
26.0M
  }
138
138M
}
void tesseract::UpdateRange<int, int>(int const&, int*, int*)
Line
Count
Source
131
15.1M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
15.1M
  if (x < *lower_bound) {
133
3.52M
    *lower_bound = x;
134
3.52M
  }
135
15.1M
  if (x > *upper_bound) {
136
5.84M
    *upper_bound = x;
137
5.84M
  }
138
15.1M
}
void tesseract::UpdateRange<short, float>(short const&, float*, float*)
Line
Count
Source
131
120M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
120M
  if (x < *lower_bound) {
133
10.2M
    *lower_bound = x;
134
10.2M
  }
135
120M
  if (x > *upper_bound) {
136
19.6M
    *upper_bound = x;
137
19.6M
  }
138
120M
}
Unexecuted instantiation: void tesseract::UpdateRange<short, int>(short const&, int*, int*)
void tesseract::UpdateRange<unsigned char, unsigned char>(unsigned char const&, unsigned char*, unsigned char*)
Line
Count
Source
131
3.85k
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
3.85k
  if (x < *lower_bound) {
133
912
    *lower_bound = x;
134
912
  }
135
3.85k
  if (x > *upper_bound) {
136
1.70k
    *upper_bound = x;
137
1.70k
  }
138
3.85k
}
void tesseract::UpdateRange<double, double>(double const&, double*, double*)
Line
Count
Source
131
2.36M
inline void UpdateRange(const T1 &x, T2 *lower_bound, T2 *upper_bound) {
132
2.36M
  if (x < *lower_bound) {
133
344k
    *lower_bound = x;
134
344k
  }
135
2.36M
  if (x > *upper_bound) {
136
561k
    *upper_bound = x;
137
561k
  }
138
2.36M
}
139
140
// Decrease lower_bound to be <= x_lo AND increase upper_bound to be >= x_hi.
141
template <typename T1, typename T2>
142
2.63M
inline void UpdateRange(const T1 &x_lo, const T1 &x_hi, T2 *lower_bound, T2 *upper_bound) {
143
2.63M
  if (x_lo < *lower_bound) {
144
343k
    *lower_bound = x_lo;
145
343k
  }
146
2.63M
  if (x_hi > *upper_bound) {
147
353k
    *upper_bound = x_hi;
148
353k
  }
149
2.63M
}
150
151
// Intersect the range [*lower2, *upper2] with the range [lower1, upper1],
152
// putting the result back in [*lower2, *upper2].
153
// If non-intersecting ranges are given, we end up with *lower2 > *upper2.
154
template <typename T>
155
12.2M
inline void IntersectRange(const T &lower1, const T &upper1, T *lower2, T *upper2) {
156
12.2M
  if (lower1 > *lower2) {
157
6.03M
    *lower2 = lower1;
158
6.03M
  }
159
12.2M
  if (upper1 < *upper2) {
160
6.40M
    *upper2 = upper1;
161
6.40M
  }
162
12.2M
}
163
164
// Proper modulo arithmetic operator. Returns a mod b that works for -ve a.
165
// For any integer a and positive b, returns r : 0<=r<b and a=n*b + r for
166
// some integer n.
167
327M
inline int Modulo(int a, int b) {
168
327M
  return (a % b + b) % b;
169
327M
}
170
171
// Integer division operator with rounding that works for negative input.
172
// Returns a divided by b, rounded to the nearest integer, without double
173
// counting at 0. With simple rounding 1/3 = 0, 0/3 = 0 -1/3 = 0, -2/3 = 0,
174
// -3/3 = 0 and -4/3 = -1.
175
// I want 1/3 = 0, 0/3 = 0, -1/3 = 0, -2/3 = -1, -3/3 = -1 and -4/3 = -1.
176
76.9k
inline int DivRounded(int a, int b) {
177
76.9k
  if (b < 0) {
178
0
    return -DivRounded(a, -b);
179
0
  }
180
76.9k
  return a >= 0 ? (a + b / 2) / b : (a - b / 2) / b;
181
76.9k
}
182
183
// Return a double cast to int with rounding.
184
286M
inline int IntCastRounded(double x) {
185
286M
  assert(std::isfinite(x));
186
286M
  assert(x < INT_MAX);
187
286M
  assert(x > INT_MIN);
188
286M
  return x >= 0.0 ? static_cast<int>(x + 0.5) : -static_cast<int>(-x + 0.5);
189
286M
}
190
191
// Return a float cast to int with rounding.
192
12.6G
inline int IntCastRounded(float x) {
193
12.6G
  assert(std::isfinite(x));
194
12.6G
  return x >= 0.0F ? static_cast<int>(x + 0.5F) : -static_cast<int>(-x + 0.5F);
195
12.6G
}
196
197
// Reverse the order of bytes in a n byte quantity for big/little-endian switch.
198
0
inline void ReverseN(void *ptr, int num_bytes) {
199
0
  assert(num_bytes == 1 || num_bytes == 2 || num_bytes == 4 || num_bytes == 8);
200
0
  char *cptr = static_cast<char *>(ptr);
201
0
  int halfsize = num_bytes / 2;
202
0
  for (int i = 0; i < halfsize; ++i) {
203
0
    char tmp = cptr[i];
204
0
    cptr[i] = cptr[num_bytes - 1 - i];
205
0
    cptr[num_bytes - 1 - i] = tmp;
206
0
  }
207
0
}
208
209
// Reverse the order of bytes in a 32 bit quantity for big/little-endian switch.
210
0
inline void Reverse32(void *ptr) {
211
0
  ReverseN(ptr, 4);
212
0
}
213
214
// Reads a vector of simple types from the given file. Assumes that bitwise
215
// read/write will work with ReverseN according to sizeof(T).
216
// Returns false in case of error.
217
// If swap is true, assumes a big/little-endian swap is needed.
218
template <typename T>
219
bool DeSerialize(bool swap, FILE *fp, std::vector<T> &data) {
220
  uint32_t size;
221
  if (fread(&size, sizeof(size), 1, fp) != 1) {
222
    return false;
223
  }
224
  if (swap) {
225
    Reverse32(&size);
226
  }
227
  // Arbitrarily limit the number of elements to protect against bad data.
228
  assert(size <= UINT16_MAX);
229
  if (size > UINT16_MAX) {
230
    return false;
231
  }
232
  // TODO: optimize.
233
  data.resize(size);
234
  if (size > 0) {
235
    if (fread(&data[0], sizeof(T), size, fp) != size) {
236
      return false;
237
    }
238
    if (swap) {
239
      for (uint32_t i = 0; i < size; ++i) {
240
        ReverseN(&data[i], sizeof(T));
241
      }
242
    }
243
  }
244
  return true;
245
}
246
247
// Writes a vector of simple types to the given file. Assumes that bitwise
248
// read/write of T will work. Returns false in case of error.
249
template <typename T>
250
0
bool Serialize(FILE *fp, const std::vector<T> &data) {
251
0
  uint32_t size = data.size();
252
0
  if (fwrite(&size, sizeof(size), 1, fp) != 1) {
253
0
    return false;
254
0
  } else if constexpr (std::is_class<T>::value) {
255
    // Serialize a tesseract class.
256
0
    for (auto &item : data) {
257
0
      if (!item.Serialize(fp)) {
258
0
        return false;
259
0
      }
260
0
    }
261
0
  } else if constexpr (std::is_pointer<T>::value) {
262
    // Serialize pointers.
263
0
    for (auto &item : data) {
264
0
      uint8_t non_null = (item != nullptr);
265
0
      if (!Serialize(fp, &non_null)) {
266
0
        return false;
267
0
      }
268
0
      if (non_null) {
269
0
        if (!item->Serialize(fp)) {
270
0
          return false;
271
0
        }
272
0
      }
273
0
    }
274
0
  } else if (size > 0) {
275
0
    if (fwrite(&data[0], sizeof(T), size, fp) != size) {
276
0
      return false;
277
0
    }
278
0
  }
279
0
  return true;
280
0
}
Unexecuted instantiation: bool tesseract::Serialize<int>(_IO_FILE*, std::__1::vector<int, std::__1::allocator<int> > const&)
Unexecuted instantiation: bool tesseract::Serialize<tesseract::UnicharAndFonts>(_IO_FILE*, std::__1::vector<tesseract::UnicharAndFonts, std::__1::allocator<tesseract::UnicharAndFonts> > const&)
Unexecuted instantiation: bool tesseract::Serialize<tesseract::Shape*>(_IO_FILE*, std::__1::vector<tesseract::Shape*, std::__1::allocator<tesseract::Shape*> > const&)
Unexecuted instantiation: bool tesseract::Serialize<short>(_IO_FILE*, std::__1::vector<short, std::__1::allocator<short> > const&)
281
282
} // namespace tesseract
283
284
#endif // TESSERACT_CCUTIL_HELPERS_H_