Coverage Report

Created: 2026-01-13 07:11

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/ccmain/thresholder.cpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        thresholder.cpp
3
// Description: Base API for thresholding images in tesseract.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2008, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
// Include automatically generated configuration file
20
#ifdef HAVE_CONFIG_H
21
#  include "config_auto.h"
22
#endif
23
24
#include "otsuthr.h"
25
#include "thresholder.h"
26
#include "tprintf.h" // for tprintf
27
28
#include <allheaders.h>
29
#include <tesseract/baseapi.h> // for api->GetIntVariable()
30
31
#include <algorithm> // for std::max, std::min
32
#include <cstdint>   // for uint32_t
33
#include <cstring>
34
#include <tuple>
35
36
namespace tesseract {
37
38
ImageThresholder::ImageThresholder()
39
2
    : pix_(nullptr)
40
2
    , image_width_(0)
41
2
    , image_height_(0)
42
2
    , pix_channels_(0)
43
2
    , pix_wpl_(0)
44
2
    , scale_(1)
45
2
    , yres_(300)
46
2
    , estimated_res_(300) {
47
2
  SetRectangle(0, 0, 0, 0);
48
2
}
49
50
0
ImageThresholder::~ImageThresholder() {
51
0
  Clear();
52
0
}
53
54
// Destroy the Pix if there is one, freeing memory.
55
0
void ImageThresholder::Clear() {
56
0
  pix_.destroy();
57
0
}
58
59
// Return true if no image has been set.
60
16.2k
bool ImageThresholder::IsEmpty() const {
61
16.2k
  return pix_ == nullptr;
62
16.2k
}
63
64
// SetImage makes a copy of all the image data, so it may be deleted
65
// immediately after this call.
66
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
67
// Palette color images will not work properly and must be converted to
68
// 24 bit.
69
// Binary images of 1 bit per pixel may also be given but they must be
70
// byte packed with the MSB of the first byte being the first pixel, and a
71
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
72
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
73
0
                                int bytes_per_pixel, int bytes_per_line) {
74
0
  int bpp = bytes_per_pixel * 8;
75
0
  if (bpp == 0) {
76
0
    bpp = 1;
77
0
  }
78
0
  Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
79
0
  l_uint32 *data = pixGetData(pix);
80
0
  int wpl = pixGetWpl(pix);
81
0
  switch (bpp) {
82
0
    case 1:
83
0
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
84
0
        for (int x = 0; x < width; ++x) {
85
0
          if (imagedata[x / 8] & (0x80 >> (x % 8))) {
86
0
            CLEAR_DATA_BIT(data, x);
87
0
          } else {
88
0
            SET_DATA_BIT(data, x);
89
0
          }
90
0
        }
91
0
      }
92
0
      break;
93
94
0
    case 8:
95
      // Greyscale just copies the bytes in the right order.
96
0
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
97
0
        for (int x = 0; x < width; ++x) {
98
0
          SET_DATA_BYTE(data, x, imagedata[x]);
99
0
        }
100
0
      }
101
0
      break;
102
103
0
    case 24:
104
      // Put the colors in the correct places in the line buffer.
105
0
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
106
0
        for (int x = 0; x < width; ++x, ++data) {
107
0
          SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
108
0
          SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
109
0
          SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
110
0
        }
111
0
      }
112
0
      break;
113
114
0
    case 32:
115
      // Maintain byte order consistency across different endianness.
116
0
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
117
0
        for (int x = 0; x < width; ++x) {
118
0
          data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
119
0
                    (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
120
0
        }
121
0
      }
122
0
      break;
123
124
0
    default:
125
0
      tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
126
0
  }
127
0
  SetImage(pix);
128
0
  pix.destroy();
129
0
}
130
131
// Store the coordinates of the rectangle to process for later use.
132
// Doesn't actually do any thresholding.
133
16.2k
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
134
16.2k
  rect_left_ = left;
135
16.2k
  rect_top_ = top;
136
16.2k
  rect_width_ = width;
137
16.2k
  rect_height_ = height;
138
16.2k
}
139
140
// Get enough parameters to be able to rebuild bounding boxes in the
141
// original image (not just within the rectangle).
142
// Left and top are enough with top-down coordinates, but
143
// the height of the rectangle and the image are needed for bottom-up.
144
void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
145
16.2k
                                     int *imageheight) {
146
16.2k
  *left = rect_left_;
147
16.2k
  *top = rect_top_;
148
16.2k
  *width = rect_width_;
149
16.2k
  *height = rect_height_;
150
16.2k
  *imagewidth = image_width_;
151
16.2k
  *imageheight = image_height_;
152
16.2k
}
153
154
// Pix vs raw, which to use? Pix is the preferred input for efficiency,
155
// since raw buffers are copied.
156
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
157
// immediately after, but may not go away until after the Thresholder has
158
// finished with it.
159
16.2k
void ImageThresholder::SetImage(const Image pix) {
160
16.2k
  if (pix_ != nullptr) {
161
16.2k
    pix_.destroy();
162
16.2k
  }
163
16.2k
  Image src = pix;
164
16.2k
  int depth;
165
16.2k
  pixGetDimensions(src, &image_width_, &image_height_, &depth);
166
  // Convert the image as necessary so it is one of binary, plain RGB, or
167
  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
168
  // not just a clone of the input.
169
16.2k
  if (depth > 1 && depth < 8) {
170
0
    pix_ = pixConvertTo8(src, false);
171
16.2k
  } else {
172
16.2k
    pix_ = src.copy();
173
16.2k
  }
174
16.2k
  depth = pixGetDepth(pix_);
175
16.2k
  pix_channels_ = depth / 8;
176
16.2k
  pix_wpl_ = pixGetWpl(pix_);
177
16.2k
  scale_ = 1;
178
16.2k
  estimated_res_ = yres_ = pixGetYRes(pix_);
179
16.2k
  Init();
180
16.2k
}
181
182
std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
183
                                                      TessBaseAPI *api,
184
0
                                                      ThresholdMethod method) {
185
0
  Image pix_binary = nullptr;
186
0
  Image pix_thresholds = nullptr;
187
188
0
  if (pix_channels_ == 0) {
189
    // We have a binary image, but it still has to be copied, as this API
190
    // allows the caller to modify the output.
191
0
    Image original = GetPixRect();
192
0
    pix_binary = original.copy();
193
0
    original.destroy();
194
0
    return std::make_tuple(true, nullptr, pix_binary, nullptr);
195
0
  }
196
197
0
  auto pix_grey = GetPixRectGrey();
198
199
0
  int r;
200
201
0
  l_int32 pix_w, pix_h;
202
0
  pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
203
204
0
  bool thresholding_debug;
205
0
  api->GetBoolVariable("thresholding_debug", &thresholding_debug);
206
0
  if (thresholding_debug) {
207
0
    tprintf("\nimage width: %d  height: %d  ppi: %d\n", pix_w, pix_h, yres_);
208
0
  }
209
210
0
  if (method == ThresholdMethod::Sauvola && pix_w > 6 && pix_h > 6) {
211
    // pixSauvolaBinarizeTiled requires half_window_size >= 2.
212
    // Therefore window_size must be at least 4 which requires
213
    // pix_w and pix_h to be at least 7.
214
0
    int window_size;
215
0
    double window_size_factor;
216
0
    api->GetDoubleVariable("thresholding_window_size", &window_size_factor);
217
0
    window_size = window_size_factor * yres_;
218
0
    window_size = std::max(7, window_size);
219
0
    window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size);
220
0
    int half_window_size = window_size / 2;
221
222
    // factor for image division into tiles; >= 1
223
0
    l_int32 nx, ny;
224
    // tiles size will be approx. 250 x 250 pixels
225
0
    nx = std::max(1, (pix_w + 125) / 250);
226
0
    ny = std::max(1, (pix_h + 125) / 250);
227
0
    auto xrat = pix_w / nx;
228
0
    auto yrat = pix_h / ny;
229
0
    if (xrat < half_window_size + 2) {
230
0
      nx = pix_w / (half_window_size + 2);
231
0
    }
232
0
    if (yrat < half_window_size + 2) {
233
0
      ny = pix_h / (half_window_size + 2);
234
0
    }
235
236
0
    double kfactor;
237
0
    api->GetDoubleVariable("thresholding_kfactor", &kfactor);
238
0
    kfactor = std::max(0.0, kfactor);
239
240
0
    if (thresholding_debug) {
241
0
      tprintf("window size: %d  kfactor: %.3f  nx:%d  ny: %d\n", window_size, kfactor, nx, ny);
242
0
    }
243
244
0
    r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
245
0
                               (PIX**)pix_thresholds,
246
0
                                (PIX**)pix_binary);
247
0
  } else { // if (method == ThresholdMethod::LeptonicaOtsu)
248
0
    int tile_size;
249
0
    double tile_size_factor;
250
0
    api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor);
251
0
    tile_size = tile_size_factor * yres_;
252
0
    tile_size = std::max(16, tile_size);
253
254
0
    int smooth_size;
255
0
    double smooth_size_factor;
256
0
    api->GetDoubleVariable("thresholding_smooth_kernel_size",
257
0
                         &smooth_size_factor);
258
0
    smooth_size_factor = std::max(0.0, smooth_size_factor);
259
0
    smooth_size = smooth_size_factor * yres_;
260
0
    int half_smooth_size = smooth_size / 2;
261
262
0
    double score_fraction;
263
0
    api->GetDoubleVariable("thresholding_score_fraction", &score_fraction);
264
265
0
    if (thresholding_debug) {
266
0
      tprintf("tile size: %d  smooth_size: %d  score_fraction: %.2f\n", tile_size, smooth_size, score_fraction);
267
0
    }
268
269
0
    r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
270
0
                                 half_smooth_size, half_smooth_size,
271
0
                                 score_fraction,
272
0
                                 (PIX**)pix_thresholds,
273
0
                                 (PIX**)pix_binary);
274
0
  }
275
276
0
  bool ok = (r == 0);
277
0
  return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
278
0
}
279
280
// Threshold the source image as efficiently as possible to the output Pix.
281
// Creates a Pix and sets pix to point to the resulting pointer.
282
// Caller must use pixDestroy to free the created Pix.
283
/// Returns false on error.
284
16.2k
bool ImageThresholder::ThresholdToPix(Image *pix) {
285
16.2k
  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
286
0
    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
287
0
    return false;
288
0
  }
289
  // Handle binary image
290
16.2k
  if (pix_channels_ == 0) {
291
    // We have a binary image, but it still has to be copied, as this API
292
    // allows the caller to modify the output.
293
16.2k
    Image original = GetPixRect();
294
16.2k
    *pix = original.copy();
295
16.2k
    original.destroy();
296
16.2k
    return true;
297
16.2k
  }
298
  // Handle colormaps
299
0
  Image src = pix_;
300
0
  if (pixGetColormap(src)) {
301
0
    src = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
302
0
  }
303
0
  OtsuThresholdRectToPix(src, pix);
304
0
  if (src != pix_) {
305
0
    src.destroy();
306
0
  }
307
0
  return true;
308
16.2k
}
309
310
// Gets a pix that contains an 8 bit threshold value at each pixel. The
311
// returned pix may be an integer reduction of the binary image such that
312
// the scale factor may be inferred from the ratio of the sizes, even down
313
// to the extreme of a 1x1 pixel thresholds image.
314
// Ideally the 8 bit threshold should be the exact threshold used to generate
315
// the binary image in ThresholdToPix, but this is not a hard constraint.
316
// Returns nullptr if the input is binary. PixDestroy after use.
317
0
Image ImageThresholder::GetPixRectThresholds() {
318
0
  if (IsBinary()) {
319
0
    return nullptr;
320
0
  }
321
0
  Image pix_grey = GetPixRectGrey();
322
0
  int width = pixGetWidth(pix_grey);
323
0
  int height = pixGetHeight(pix_grey);
324
0
  std::vector<int> thresholds;
325
0
  std::vector<int> hi_values;
326
0
  OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
327
0
  pix_grey.destroy();
328
0
  Image pix_thresholds = pixCreate(width, height, 8);
329
0
  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
330
0
  pixSetAllArbitrary(pix_thresholds, threshold);
331
0
  return pix_thresholds;
332
0
}
333
334
// Common initialization shared between SetImage methods.
335
16.2k
void ImageThresholder::Init() {
336
16.2k
  SetRectangle(0, 0, image_width_, image_height_);
337
16.2k
}
338
339
// Get a clone/copy of the source image rectangle.
340
// The returned Pix must be pixDestroyed.
341
// This function will be used in the future by the page layout analysis, and
342
// the layout analysis that uses it will only be available with Leptonica,
343
// so there is no raw equivalent.
344
32.4k
Image ImageThresholder::GetPixRect() {
345
32.4k
  if (IsFullImage()) {
346
    // Just clone the whole thing.
347
32.4k
    return pix_.clone();
348
32.4k
  } else {
349
    // Crop to the given rectangle.
350
0
    Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
351
0
    Image cropped = pixClipRectangle(pix_, box, nullptr);
352
0
    boxDestroy(&box);
353
0
    return cropped;
354
0
  }
355
32.4k
}
356
357
// Get a clone/copy of the source image rectangle, reduced to greyscale,
358
// and at the same resolution as the output binary.
359
// The returned Pix must be pixDestroyed.
360
// Provided to the classifier to extract features from the greyscale image.
361
0
Image ImageThresholder::GetPixRectGrey() {
362
0
  auto pix = GetPixRect(); // May have to be reduced to grey.
363
0
  int depth = pixGetDepth(pix);
364
0
  if (depth != 8 || pixGetColormap(pix)) {
365
0
    if (depth == 24) {
366
0
      auto tmp = pixConvert24To32(pix);
367
0
      pix.destroy();
368
0
      pix = tmp;
369
0
    }
370
0
    auto result = pixConvertTo8(pix, false);
371
0
    pix.destroy();
372
0
    return result;
373
0
  }
374
0
  return pix;
375
0
}
376
377
// Otsu thresholds the rectangle, taking the rectangle from *this.
378
0
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
379
0
  std::vector<int> thresholds;
380
0
  std::vector<int> hi_values;
381
382
0
  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
383
0
                                   thresholds, hi_values);
384
0
  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
385
0
}
386
387
/// Threshold the rectangle, taking everything except the src_pix
388
/// from the class, using thresholds/hi_values to the output pix.
389
/// NOTE that num_channels is the size of the thresholds and hi_values
390
// arrays and also the bytes per pixel in src_pix.
391
void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
392
0
                                          const std::vector<int> &hi_values, Image *pix) const {
393
0
  *pix = pixCreate(rect_width_, rect_height_, 1);
394
0
  uint32_t *pixdata = pixGetData(*pix);
395
0
  int wpl = pixGetWpl(*pix);
396
0
  int src_wpl = pixGetWpl(src_pix);
397
0
  uint32_t *srcdata = pixGetData(src_pix);
398
0
  pixSetXRes(*pix, pixGetXRes(src_pix));
399
0
  pixSetYRes(*pix, pixGetYRes(src_pix));
400
0
  for (int y = 0; y < rect_height_; ++y) {
401
0
    const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
402
0
    uint32_t *pixline = pixdata + y * wpl;
403
0
    for (int x = 0; x < rect_width_; ++x) {
404
0
      bool white_result = true;
405
0
      for (int ch = 0; ch < num_channels; ++ch) {
406
0
        int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
407
0
        if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
408
0
          white_result = false;
409
0
          break;
410
0
        }
411
0
      }
412
0
      if (white_result) {
413
0
        CLEAR_DATA_BIT(pixline, x);
414
0
      } else {
415
0
        SET_DATA_BIT(pixline, x);
416
0
      }
417
0
    }
418
0
  }
419
0
}
420
421
} // namespace tesseract.