/src/tesseract/src/ccmain/thresholder.cpp

Source
///////////////////////////////////////////////////////////////////////
// File:        thresholder.cpp
// Description: Base API for thresholding images in tesseract.
// Author:      Ray Smith
//
// (C) Copyright 2008, Google Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
///////////////////////////////////////////////////////////////////////

// Include automatically generated configuration file
#ifdef HAVE_CONFIG_H
#  include "config_auto.h"
#endif

#include "otsuthr.h"
#include "thresholder.h"
#include "tprintf.h" // for tprintf

#include <allheaders.h>
#include <tesseract/baseapi.h> // for api->GetIntVariable()

#include <algorithm> // for std::max, std::min
#include <cstdint>   // for uint32_t
#include <cstring>
#include <tuple>

namespace tesseract {

ImageThresholder::ImageThresholder()
    : pix_(nullptr)
    , image_width_(0)
    , image_height_(0)
    , pix_channels_(0)
    , pix_wpl_(0)
    , scale_(1)
    , yres_(300)
    , estimated_res_(300) {
  SetRectangle(0, 0, 0, 0);
}

ImageThresholder::~ImageThresholder() {
  Clear();
}

// Destroy the Pix if there is one, freeing memory.
void ImageThresholder::Clear() {
  pix_.destroy();
}

// Return true if no image has been set.
bool ImageThresholder::IsEmpty() const {
  return pix_ == nullptr;
}

// SetImage makes a copy of all the image data, so it may be deleted
// immediately after this call.
// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
// Palette color images will not work properly and must be converted to
// 24 bit.
// Binary images of 1 bit per pixel may also be given but they must be
// byte packed with the MSB of the first byte being the first pixel, and a
// one pixel is WHITE. For binary images set bytes_per_pixel=0.
void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
                                int bytes_per_pixel, int bytes_per_line) {
  int bpp = bytes_per_pixel * 8;
  if (bpp == 0) {
    bpp = 1;
  }
  Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
  l_uint32 *data = pixGetData(pix);
  int wpl = pixGetWpl(pix);
  switch (bpp) {
    case 1:
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
        for (int x = 0; x < width; ++x) {
          if (imagedata[x / 8] & (0x80 >> (x % 8))) {
            CLEAR_DATA_BIT(data, x);
          } else {
            SET_DATA_BIT(data, x);
          }
        }
      }
      break;

    case 8:
      // Greyscale just copies the bytes in the right order.
      for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
        for (int x = 0; x < width; ++x) {
          SET_DATA_BYTE(data, x, imagedata[x]);
        }
      }
      break;

    case 24:
      // Put the colors in the correct places in the line buffer.
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
        for (int x = 0; x < width; ++x, ++data) {
          SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
          SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
          SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
        }
      }
      break;

    case 32:
      // Maintain byte order consistency across different endianness.
      for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
        for (int x = 0; x < width; ++x) {
          data[x] = (imagedata[x * 4] << 24) | (imagedata[x * 4 + 1] << 16) |
                    (imagedata[x * 4 + 2] << 8) | imagedata[x * 4 + 3];
        }
      }
      break;

    default:
      tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
  }
  SetImage(pix);
  pix.destroy();
}

// Store the coordinates of the rectangle to process for later use.
// Doesn't actually do any thresholding.
void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
  rect_left_ = left;
  rect_top_ = top;
  rect_width_ = width;
  rect_height_ = height;
}

// Get enough parameters to be able to rebuild bounding boxes in the
// original image (not just within the rectangle).
// Left and top are enough with top-down coordinates, but
// the height of the rectangle and the image are needed for bottom-up.
void ImageThresholder::GetImageSizes(int *left, int *top, int *width, int *height, int *imagewidth,
                                     int *imageheight) {
  *left = rect_left_;
  *top = rect_top_;
  *width = rect_width_;
  *height = rect_height_;
  *imagewidth = image_width_;
  *imageheight = image_height_;
}

// Pix vs raw, which to use? Pix is the preferred input for efficiency,
// since raw buffers are copied.
// SetImage for Pix clones its input, so the source pix may be pixDestroyed
// immediately after, but may not go away until after the Thresholder has
// finished with it.
void ImageThresholder::SetImage(const Image pix) {
  if (pix_ != nullptr) {
    pix_.destroy();
  }
  Image src = pix;
  int depth;
  pixGetDimensions(src, &image_width_, &image_height_, &depth);
  // Convert the image as necessary so it is one of binary, plain RGB, or
  // 8 bit with no colormap. Guarantee that we always end up with our own copy,
  // not just a clone of the input.
  if (depth > 1 && depth < 8) {
    pix_ = pixConvertTo8(src, false);
  } else {
    pix_ = src.copy();
  }
  depth = pixGetDepth(pix_);
  pix_channels_ = depth / 8;
  pix_wpl_ = pixGetWpl(pix_);
  scale_ = 1;
  estimated_res_ = yres_ = pixGetYRes(pix_);
  Init();
}

std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
                                                      TessBaseAPI *api,
                                                      ThresholdMethod method) {
  Image pix_binary = nullptr;
  Image pix_thresholds = nullptr;

  if (pix_channels_ == 0) {
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
    Image original = GetPixRect();
    pix_binary = original.copy();
    original.destroy();
    return std::make_tuple(true, nullptr, pix_binary, nullptr);
  }

  auto pix_grey = GetPixRectGrey();

  int r;

  l_int32 pix_w, pix_h;
  pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);

  bool thresholding_debug;
  api->GetBoolVariable("thresholding_debug", &thresholding_debug);
  if (thresholding_debug) {
    tprintf("\nimage width: %d  height: %d  ppi: %d\n", pix_w, pix_h, yres_);
  }

  if (method == ThresholdMethod::Sauvola && pix_w > 6 && pix_h > 6) {
    // pixSauvolaBinarizeTiled requires half_window_size >= 2.
    // Therefore window_size must be at least 4 which requires
    // pix_w and pix_h to be at least 7.
    int window_size;
    double window_size_factor;
    api->GetDoubleVariable("thresholding_window_size", &window_size_factor);
    window_size = window_size_factor * yres_;
    window_size = std::max(7, window_size);
    window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size);
    int half_window_size = window_size / 2;

    // factor for image division into tiles; >= 1
    l_int32 nx, ny;
    // tiles size will be approx. 250 x 250 pixels
    nx = std::max(1, (pix_w + 125) / 250);
    ny = std::max(1, (pix_h + 125) / 250);
    auto xrat = pix_w / nx;
    auto yrat = pix_h / ny;
    if (xrat < half_window_size + 2) {
      nx = pix_w / (half_window_size + 2);
    }
    if (yrat < half_window_size + 2) {
      ny = pix_h / (half_window_size + 2);
    }

    double kfactor;
    api->GetDoubleVariable("thresholding_kfactor", &kfactor);
    kfactor = std::max(0.0, kfactor);

    if (thresholding_debug) {
      tprintf("window size: %d  kfactor: %.3f  nx:%d  ny: %d\n", window_size, kfactor, nx, ny);
    }

    r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
                               (PIX**)pix_thresholds,
                                (PIX**)pix_binary);
  } else { // if (method == ThresholdMethod::LeptonicaOtsu)
    int tile_size;
    double tile_size_factor;
    api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor);
    tile_size = tile_size_factor * yres_;
    tile_size = std::max(16, tile_size);

    int smooth_size;
    double smooth_size_factor;
    api->GetDoubleVariable("thresholding_smooth_kernel_size",
                         &smooth_size_factor);
    smooth_size_factor = std::max(0.0, smooth_size_factor);
    smooth_size = smooth_size_factor * yres_;
    int half_smooth_size = smooth_size / 2;

    double score_fraction;
    api->GetDoubleVariable("thresholding_score_fraction", &score_fraction);

    if (thresholding_debug) {
      tprintf("tile size: %d  smooth_size: %d  score_fraction: %.2f\n", tile_size, smooth_size, score_fraction);
    }

    r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
                                 half_smooth_size, half_smooth_size,
                                 score_fraction,
                                 (PIX**)pix_thresholds,
                                 (PIX**)pix_binary);
  }

  bool ok = (r == 0);
  return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
}

// Threshold the source image as efficiently as possible to the output Pix.
// Creates a Pix and sets pix to point to the resulting pointer.
// Caller must use pixDestroy to free the created Pix.
/// Returns false on error.
bool ImageThresholder::ThresholdToPix(Image *pix) {
  if (image_width_ > INT16_MAX || image_height_ > INT16_MAX) {
    tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
    return false;
  }
  // Handle binary image
  if (pix_channels_ == 0) {
    // We have a binary image, but it still has to be copied, as this API
    // allows the caller to modify the output.
    Image original = GetPixRect();
    *pix = original.copy();
    original.destroy();
    return true;
  }
  // Handle colormaps
  Image src = pix_;
  if (pixGetColormap(src)) {
    src = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
  }
  OtsuThresholdRectToPix(src, pix);
  if (src != pix_) {
    src.destroy();
  }
  return true;
}

// Gets a pix that contains an 8 bit threshold value at each pixel. The
// returned pix may be an integer reduction of the binary image such that
// the scale factor may be inferred from the ratio of the sizes, even down
// to the extreme of a 1x1 pixel thresholds image.
// Ideally the 8 bit threshold should be the exact threshold used to generate
// the binary image in ThresholdToPix, but this is not a hard constraint.
// Returns nullptr if the input is binary. PixDestroy after use.
Image ImageThresholder::GetPixRectThresholds() {
  if (IsBinary()) {
    return nullptr;
  }
  Image pix_grey = GetPixRectGrey();
  int width = pixGetWidth(pix_grey);
  int height = pixGetHeight(pix_grey);
  std::vector<int> thresholds;
  std::vector<int> hi_values;
  OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
  pix_grey.destroy();
  Image pix_thresholds = pixCreate(width, height, 8);
  int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
  pixSetAllArbitrary(pix_thresholds, threshold);
  return pix_thresholds;
}

// Common initialization shared between SetImage methods.
void ImageThresholder::Init() {
  SetRectangle(0, 0, image_width_, image_height_);
}

// Get a clone/copy of the source image rectangle.
// The returned Pix must be pixDestroyed.
// This function will be used in the future by the page layout analysis, and
// the layout analysis that uses it will only be available with Leptonica,
// so there is no raw equivalent.
Image ImageThresholder::GetPixRect() {
  if (IsFullImage()) {
    // Just clone the whole thing.
    return pix_.clone();
  } else {
    // Crop to the given rectangle.
    Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
    Image cropped = pixClipRectangle(pix_, box, nullptr);
    boxDestroy(&box);
    return cropped;
  }
}

// Get a clone/copy of the source image rectangle, reduced to greyscale,
// and at the same resolution as the output binary.
// The returned Pix must be pixDestroyed.
// Provided to the classifier to extract features from the greyscale image.
Image ImageThresholder::GetPixRectGrey() {
  auto pix = GetPixRect(); // May have to be reduced to grey.
  int depth = pixGetDepth(pix);
  if (depth != 8 || pixGetColormap(pix)) {
    if (depth == 24) {
      auto tmp = pixConvert24To32(pix);
      pix.destroy();
      pix = tmp;
    }
    auto result = pixConvertTo8(pix, false);
    pix.destroy();
    return result;
  }
  return pix;
}

// Otsu thresholds the rectangle, taking the rectangle from *this.
void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
  std::vector<int> thresholds;
  std::vector<int> hi_values;

  int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
                                   thresholds, hi_values);
  ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
}

/// Threshold the rectangle, taking everything except the src_pix
/// from the class, using thresholds/hi_values to the output pix.
/// NOTE that num_channels is the size of the thresholds and hi_values
// arrays and also the bytes per pixel in src_pix.
void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
                                          const std::vector<int> &hi_values, Image *pix) const {
  *pix = pixCreate(rect_width_, rect_height_, 1);
  uint32_t *pixdata = pixGetData(*pix);
  int wpl = pixGetWpl(*pix);
  int src_wpl = pixGetWpl(src_pix);
  uint32_t *srcdata = pixGetData(src_pix);
  pixSetXRes(*pix, pixGetXRes(src_pix));
  pixSetYRes(*pix, pixGetYRes(src_pix));
  for (int y = 0; y < rect_height_; ++y) {
    const uint32_t *linedata = srcdata + (y + rect_top_) * src_wpl;
    uint32_t *pixline = pixdata + y * wpl;
    for (int x = 0; x < rect_width_; ++x) {
      bool white_result = true;
      for (int ch = 0; ch < num_channels; ++ch) {
        int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
        if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
          white_result = false;
          break;
        }
      }
      if (white_result) {
        CLEAR_DATA_BIT(pixline, x);
      } else {
        SET_DATA_BIT(pixline, x);
      }
    }
  }
}

} // namespace tesseract.

Coverage Report

Created: 2026-01-13 07:11

Line	Count	Source
1		///////////////////////////////////////////////////////////////////////
2		// File: thresholder.cpp
3		// Description: Base API for thresholding images in tesseract.
4		// Author: Ray Smith
5		//
6		// (C) Copyright 2008, Google Inc.
7		// Licensed under the Apache License, Version 2.0 (the "License");
8		// you may not use this file except in compliance with the License.
9		// You may obtain a copy of the License at
10		// http://www.apache.org/licenses/LICENSE-2.0
11		// Unless required by applicable law or agreed to in writing, software
12		// distributed under the License is distributed on an "AS IS" BASIS,
13		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14		// See the License for the specific language governing permissions and
15		// limitations under the License.
16		//
17		///////////////////////////////////////////////////////////////////////
18
19		// Include automatically generated configuration file
20		#ifdef HAVE_CONFIG_H
21		# include "config_auto.h"
22		#endif
23
24		#include "otsuthr.h"
25		#include "thresholder.h"
26		#include "tprintf.h" // for tprintf
27
28		#include <allheaders.h>
29		#include <tesseract/baseapi.h> // for api->GetIntVariable()
30
31		#include <algorithm> // for std::max, std::min
32		#include <cstdint> // for uint32_t
33		#include <cstring>
34		#include <tuple>
35
36		namespace tesseract {
37
38		ImageThresholder::ImageThresholder()
39	2	: pix_(nullptr)
40	2	, image_width_(0)
41	2	, image_height_(0)
42	2	, pix_channels_(0)
43	2	, pix_wpl_(0)
44	2	, scale_(1)
45	2	, yres_(300)
46	2	, estimated_res_(300) {
47	2	SetRectangle(0, 0, 0, 0);
48	2	}
49
50	0	ImageThresholder::~ImageThresholder() {
51	0	Clear();
52	0	}
53
54		// Destroy the Pix if there is one, freeing memory.
55	0	void ImageThresholder::Clear() {
56	0	pix_.destroy();
57	0	}
58
59		// Return true if no image has been set.
60	16.2k	bool ImageThresholder::IsEmpty() const {
61	16.2k	return pix_ == nullptr;
62	16.2k	}
63
64		// SetImage makes a copy of all the image data, so it may be deleted
65		// immediately after this call.
66		// Greyscale of 8 and color of 24 or 32 bits per pixel may be given.
67		// Palette color images will not work properly and must be converted to
68		// 24 bit.
69		// Binary images of 1 bit per pixel may also be given but they must be
70		// byte packed with the MSB of the first byte being the first pixel, and a
71		// one pixel is WHITE. For binary images set bytes_per_pixel=0.
72		void ImageThresholder::SetImage(const unsigned char *imagedata, int width, int height,
73	0	int bytes_per_pixel, int bytes_per_line) {
74	0	int bpp = bytes_per_pixel * 8;
75	0	if (bpp == 0) {
76	0	bpp = 1;
77	0	}
78	0	Image pix = pixCreate(width, height, bpp == 24 ? 32 : bpp);
79	0	l_uint32 *data = pixGetData(pix);
80	0	int wpl = pixGetWpl(pix);
81	0	switch (bpp) {
82	0	case 1:
83	0	for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
84	0	for (int x = 0; x < width; ++x) {
85	0	if (imagedata[x / 8] & (0x80 >> (x % 8))) {
86	0	CLEAR_DATA_BIT(data, x);
87	0	} else {
88	0	SET_DATA_BIT(data, x);
89	0	}
90	0	}
91	0	}
92	0	break;
93
94	0	case 8:
95		// Greyscale just copies the bytes in the right order.
96	0	for (int y = 0; y < height; ++y, data += wpl, imagedata += bytes_per_line) {
97	0	for (int x = 0; x < width; ++x) {
98	0	SET_DATA_BYTE(data, x, imagedata[x]);
99	0	}
100	0	}
101	0	break;
102
103	0	case 24:
104		// Put the colors in the correct places in the line buffer.
105	0	for (int y = 0; y < height; ++y, imagedata += bytes_per_line) {
106	0	for (int x = 0; x < width; ++x, ++data) {
107	0	SET_DATA_BYTE(data, COLOR_RED, imagedata[3 * x]);
108	0	SET_DATA_BYTE(data, COLOR_GREEN, imagedata[3 * x + 1]);
109	0	SET_DATA_BYTE(data, COLOR_BLUE, imagedata[3 * x + 2]);
110	0	}
111	0	}
112	0	break;
113
114	0	case 32:
115		// Maintain byte order consistency across different endianness.
116	0	for (int y = 0; y < height; ++y, imagedata += bytes_per_line, data += wpl) {
117	0	for (int x = 0; x < width; ++x) {
118	0	data[x] = (imagedata[x * 4] << 24) \| (imagedata[x * 4 + 1] << 16) \|
119	0	(imagedata[x * 4 + 2] << 8) \| imagedata[x * 4 + 3];
120	0	}
121	0	}
122	0	break;
123
124	0	default:
125	0	tprintf("Cannot convert RAW image to Pix with bpp = %d\n", bpp);
126	0	}
127	0	SetImage(pix);
128	0	pix.destroy();
129	0	}
130
131		// Store the coordinates of the rectangle to process for later use.
132		// Doesn't actually do any thresholding.
133	16.2k	void ImageThresholder::SetRectangle(int left, int top, int width, int height) {
134	16.2k	rect_left_ = left;
135	16.2k	rect_top_ = top;
136	16.2k	rect_width_ = width;
137	16.2k	rect_height_ = height;
138	16.2k	}
139
140		// Get enough parameters to be able to rebuild bounding boxes in the
141		// original image (not just within the rectangle).
142		// Left and top are enough with top-down coordinates, but
143		// the height of the rectangle and the image are needed for bottom-up.
144		void ImageThresholder::GetImageSizes(int left, int top, int width, int height, int *imagewidth,
145	16.2k	int *imageheight) {
146	16.2k	*left = rect_left_;
147	16.2k	*top = rect_top_;
148	16.2k	*width = rect_width_;
149	16.2k	*height = rect_height_;
150	16.2k	*imagewidth = image_width_;
151	16.2k	*imageheight = image_height_;
152	16.2k	}
153
154		// Pix vs raw, which to use? Pix is the preferred input for efficiency,
155		// since raw buffers are copied.
156		// SetImage for Pix clones its input, so the source pix may be pixDestroyed
157		// immediately after, but may not go away until after the Thresholder has
158		// finished with it.
159	16.2k	void ImageThresholder::SetImage(const Image pix) {
160	16.2k	if (pix_ != nullptr) {
161	16.2k	pix_.destroy();
162	16.2k	}
163	16.2k	Image src = pix;
164	16.2k	int depth;
165	16.2k	pixGetDimensions(src, &image_width_, &image_height_, &depth);
166		// Convert the image as necessary so it is one of binary, plain RGB, or
167		// 8 bit with no colormap. Guarantee that we always end up with our own copy,
168		// not just a clone of the input.
169	16.2k	if (depth > 1 && depth < 8) {
170	0	pix_ = pixConvertTo8(src, false);
171	16.2k	} else {
172	16.2k	pix_ = src.copy();
173	16.2k	}
174	16.2k	depth = pixGetDepth(pix_);
175	16.2k	pix_channels_ = depth / 8;
176	16.2k	pix_wpl_ = pixGetWpl(pix_);
177	16.2k	scale_ = 1;
178	16.2k	estimated_res_ = yres_ = pixGetYRes(pix_);
179	16.2k	Init();
180	16.2k	}
181
182		std::tuple<bool, Image, Image, Image> ImageThresholder::Threshold(
183		TessBaseAPI *api,
184	0	ThresholdMethod method) {
185	0	Image pix_binary = nullptr;
186	0	Image pix_thresholds = nullptr;
187
188	0	if (pix_channels_ == 0) {
189		// We have a binary image, but it still has to be copied, as this API
190		// allows the caller to modify the output.
191	0	Image original = GetPixRect();
192	0	pix_binary = original.copy();
193	0	original.destroy();
194	0	return std::make_tuple(true, nullptr, pix_binary, nullptr);
195	0	}
196
197	0	auto pix_grey = GetPixRectGrey();
198
199	0	int r;
200
201	0	l_int32 pix_w, pix_h;
202	0	pixGetDimensions(pix_grey, &pix_w, &pix_h, nullptr);
203
204	0	bool thresholding_debug;
205	0	api->GetBoolVariable("thresholding_debug", &thresholding_debug);
206	0	if (thresholding_debug) {
207	0	tprintf("\nimage width: %d height: %d ppi: %d\n", pix_w, pix_h, yres_);
208	0	}
209
210	0	if (method == ThresholdMethod::Sauvola && pix_w > 6 && pix_h > 6) {
211		// pixSauvolaBinarizeTiled requires half_window_size >= 2.
212		// Therefore window_size must be at least 4 which requires
213		// pix_w and pix_h to be at least 7.
214	0	int window_size;
215	0	double window_size_factor;
216	0	api->GetDoubleVariable("thresholding_window_size", &window_size_factor);
217	0	window_size = window_size_factor * yres_;
218	0	window_size = std::max(7, window_size);
219	0	window_size = std::min(pix_w < pix_h ? pix_w - 3 : pix_h - 3, window_size);
220	0	int half_window_size = window_size / 2;
221
222		// factor for image division into tiles; >= 1
223	0	l_int32 nx, ny;
224		// tiles size will be approx. 250 x 250 pixels
225	0	nx = std::max(1, (pix_w + 125) / 250);
226	0	ny = std::max(1, (pix_h + 125) / 250);
227	0	auto xrat = pix_w / nx;
228	0	auto yrat = pix_h / ny;
229	0	if (xrat < half_window_size + 2) {
230	0	nx = pix_w / (half_window_size + 2);
231	0	}
232	0	if (yrat < half_window_size + 2) {
233	0	ny = pix_h / (half_window_size + 2);
234	0	}
235
236	0	double kfactor;
237	0	api->GetDoubleVariable("thresholding_kfactor", &kfactor);
238	0	kfactor = std::max(0.0, kfactor);
239
240	0	if (thresholding_debug) {
241	0	tprintf("window size: %d kfactor: %.3f nx:%d ny: %d\n", window_size, kfactor, nx, ny);
242	0	}
243
244	0	r = pixSauvolaBinarizeTiled(pix_grey, half_window_size, kfactor, nx, ny,
245	0	(PIX**)pix_thresholds,
246	0	(PIX**)pix_binary);
247	0	} else { // if (method == ThresholdMethod::LeptonicaOtsu)
248	0	int tile_size;
249	0	double tile_size_factor;
250	0	api->GetDoubleVariable("thresholding_tile_size", &tile_size_factor);
251	0	tile_size = tile_size_factor * yres_;
252	0	tile_size = std::max(16, tile_size);
253
254	0	int smooth_size;
255	0	double smooth_size_factor;
256	0	api->GetDoubleVariable("thresholding_smooth_kernel_size",
257	0	&smooth_size_factor);
258	0	smooth_size_factor = std::max(0.0, smooth_size_factor);
259	0	smooth_size = smooth_size_factor * yres_;
260	0	int half_smooth_size = smooth_size / 2;
261
262	0	double score_fraction;
263	0	api->GetDoubleVariable("thresholding_score_fraction", &score_fraction);
264
265	0	if (thresholding_debug) {
266	0	tprintf("tile size: %d smooth_size: %d score_fraction: %.2f\n", tile_size, smooth_size, score_fraction);
267	0	}
268
269	0	r = pixOtsuAdaptiveThreshold(pix_grey, tile_size, tile_size,
270	0	half_smooth_size, half_smooth_size,
271	0	score_fraction,
272	0	(PIX**)pix_thresholds,
273	0	(PIX**)pix_binary);
274	0	}
275
276	0	bool ok = (r == 0);
277	0	return std::make_tuple(ok, pix_grey, pix_binary, pix_thresholds);
278	0	}
279
280		// Threshold the source image as efficiently as possible to the output Pix.
281		// Creates a Pix and sets pix to point to the resulting pointer.
282		// Caller must use pixDestroy to free the created Pix.
283		/// Returns false on error.
284	16.2k	bool ImageThresholder::ThresholdToPix(Image *pix) {
285	16.2k	if (image_width_ > INT16_MAX \|\| image_height_ > INT16_MAX) {
286	0	tprintf("Image too large: (%d, %d)\n", image_width_, image_height_);
287	0	return false;
288	0	}
289		// Handle binary image
290	16.2k	if (pix_channels_ == 0) {
291		// We have a binary image, but it still has to be copied, as this API
292		// allows the caller to modify the output.
293	16.2k	Image original = GetPixRect();
294	16.2k	*pix = original.copy();
295	16.2k	original.destroy();
296	16.2k	return true;
297	16.2k	}
298		// Handle colormaps
299	0	Image src = pix_;
300	0	if (pixGetColormap(src)) {
301	0	src = pixRemoveColormap(src, REMOVE_CMAP_BASED_ON_SRC);
302	0	}
303	0	OtsuThresholdRectToPix(src, pix);
304	0	if (src != pix_) {
305	0	src.destroy();
306	0	}
307	0	return true;
308	16.2k	}
309
310		// Gets a pix that contains an 8 bit threshold value at each pixel. The
311		// returned pix may be an integer reduction of the binary image such that
312		// the scale factor may be inferred from the ratio of the sizes, even down
313		// to the extreme of a 1x1 pixel thresholds image.
314		// Ideally the 8 bit threshold should be the exact threshold used to generate
315		// the binary image in ThresholdToPix, but this is not a hard constraint.
316		// Returns nullptr if the input is binary. PixDestroy after use.
317	0	Image ImageThresholder::GetPixRectThresholds() {
318	0	if (IsBinary()) {
319	0	return nullptr;
320	0	}
321	0	Image pix_grey = GetPixRectGrey();
322	0	int width = pixGetWidth(pix_grey);
323	0	int height = pixGetHeight(pix_grey);
324	0	std::vector<int> thresholds;
325	0	std::vector<int> hi_values;
326	0	OtsuThreshold(pix_grey, 0, 0, width, height, thresholds, hi_values);
327	0	pix_grey.destroy();
328	0	Image pix_thresholds = pixCreate(width, height, 8);
329	0	int threshold = thresholds[0] > 0 ? thresholds[0] : 128;
330	0	pixSetAllArbitrary(pix_thresholds, threshold);
331	0	return pix_thresholds;
332	0	}
333
334		// Common initialization shared between SetImage methods.
335	16.2k	void ImageThresholder::Init() {
336	16.2k	SetRectangle(0, 0, image_width_, image_height_);
337	16.2k	}
338
339		// Get a clone/copy of the source image rectangle.
340		// The returned Pix must be pixDestroyed.
341		// This function will be used in the future by the page layout analysis, and
342		// the layout analysis that uses it will only be available with Leptonica,
343		// so there is no raw equivalent.
344	32.4k	Image ImageThresholder::GetPixRect() {
345	32.4k	if (IsFullImage()) {
346		// Just clone the whole thing.
347	32.4k	return pix_.clone();
348	32.4k	} else {
349		// Crop to the given rectangle.
350	0	Box *box = boxCreate(rect_left_, rect_top_, rect_width_, rect_height_);
351	0	Image cropped = pixClipRectangle(pix_, box, nullptr);
352	0	boxDestroy(&box);
353	0	return cropped;
354	0	}
355	32.4k	}
356
357		// Get a clone/copy of the source image rectangle, reduced to greyscale,
358		// and at the same resolution as the output binary.
359		// The returned Pix must be pixDestroyed.
360		// Provided to the classifier to extract features from the greyscale image.
361	0	Image ImageThresholder::GetPixRectGrey() {
362	0	auto pix = GetPixRect(); // May have to be reduced to grey.
363	0	int depth = pixGetDepth(pix);
364	0	if (depth != 8 \|\| pixGetColormap(pix)) {
365	0	if (depth == 24) {
366	0	auto tmp = pixConvert24To32(pix);
367	0	pix.destroy();
368	0	pix = tmp;
369	0	}
370	0	auto result = pixConvertTo8(pix, false);
371	0	pix.destroy();
372	0	return result;
373	0	}
374	0	return pix;
375	0	}
376
377		// Otsu thresholds the rectangle, taking the rectangle from *this.
378	0	void ImageThresholder::OtsuThresholdRectToPix(Image src_pix, Image *out_pix) const {
379	0	std::vector<int> thresholds;
380	0	std::vector<int> hi_values;
381
382	0	int num_channels = OtsuThreshold(src_pix, rect_left_, rect_top_, rect_width_, rect_height_,
383	0	thresholds, hi_values);
384	0	ThresholdRectToPix(src_pix, num_channels, thresholds, hi_values, out_pix);
385	0	}
386
387		/// Threshold the rectangle, taking everything except the src_pix
388		/// from the class, using thresholds/hi_values to the output pix.
389		/// NOTE that num_channels is the size of the thresholds and hi_values
390		// arrays and also the bytes per pixel in src_pix.
391		void ImageThresholder::ThresholdRectToPix(Image src_pix, int num_channels, const std::vector<int> &thresholds,
392	0	const std::vector<int> &hi_values, Image *pix) const {
393	0	*pix = pixCreate(rect_width_, rect_height_, 1);
394	0	uint32_t pixdata = pixGetData(pix);
395	0	int wpl = pixGetWpl(*pix);
396	0	int src_wpl = pixGetWpl(src_pix);
397	0	uint32_t *srcdata = pixGetData(src_pix);
398	0	pixSetXRes(*pix, pixGetXRes(src_pix));
399	0	pixSetYRes(*pix, pixGetYRes(src_pix));
400	0	for (int y = 0; y < rect_height_; ++y) {
401	0	const uint32_t linedata = srcdata + (y + rect_top_) src_wpl;
402	0	uint32_t pixline = pixdata + y wpl;
403	0	for (int x = 0; x < rect_width_; ++x) {
404	0	bool white_result = true;
405	0	for (int ch = 0; ch < num_channels; ++ch) {
406	0	int pixel = GET_DATA_BYTE(linedata, (x + rect_left_) * num_channels + ch);
407	0	if (hi_values[ch] >= 0 && (pixel > thresholds[ch]) == (hi_values[ch] == 0)) {
408	0	white_result = false;
409	0	break;
410	0	}
411	0	}
412	0	if (white_result) {
413	0	CLEAR_DATA_BIT(pixline, x);
414	0	} else {
415	0	SET_DATA_BIT(pixline, x);
416	0	}
417	0	}
418	0	}
419	0	}
420
421		} // namespace tesseract.