/src/tesseract/src/textord/devanagari_processing.cpp

Source
/**********************************************************************
 * File:        devanagari_processing.cpp
 * Description: Methods to process images containing devanagari symbols,
 *              prior to classification.
 * Author:      Shobhit Saxena
 *
 * (C) Copyright 2008, Google Inc.
 ** Licensed under the Apache License, Version 2.0 (the "License");
 ** you may not use this file except in compliance with the License.
 ** You may obtain a copy of the License at
 ** http://www.apache.org/licenses/LICENSE-2.0
 ** Unless required by applicable law or agreed to in writing, software
 ** distributed under the License is distributed on an "AS IS" BASIS,
 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 ** See the License for the specific language governing permissions and
 ** limitations under the License.
 *
 **********************************************************************/

#ifdef HAVE_CONFIG_H
#  include "config_auto.h"
#endif

#include "devanagari_processing.h"

#include "debugpixa.h"
#include "statistc.h"
#include "tordmain.h"

#include <allheaders.h>

namespace tesseract {

// Flags controlling the debugging information for shiro-rekha splitting
// strategies.
INT_VAR(devanagari_split_debuglevel, 0, "Debug level for split shiro-rekha process.");

BOOL_VAR(devanagari_split_debugimage, 0,
         "Whether to create a debug image for split shiro-rekha process.");

ShiroRekhaSplitter::ShiroRekhaSplitter() :
  orig_pix_(nullptr),
  splitted_image_(nullptr),
  pageseg_split_strategy_(NO_SPLIT),
  ocr_split_strategy_(NO_SPLIT),
  debug_image_(nullptr),
  segmentation_block_list_(nullptr),
  global_xheight_(kUnspecifiedXheight),
  perform_close_(false)
{
}

ShiroRekhaSplitter::~ShiroRekhaSplitter() {
  Clear();
}

void ShiroRekhaSplitter::Clear() {
  orig_pix_.destroy();
  splitted_image_.destroy();
  pageseg_split_strategy_ = NO_SPLIT;
  ocr_split_strategy_ = NO_SPLIT;
  debug_image_.destroy();
  segmentation_block_list_ = nullptr;
  global_xheight_ = kUnspecifiedXheight;
  perform_close_ = false;
}

// On setting the input image, a clone of it is owned by this class.
void ShiroRekhaSplitter::set_orig_pix(Image pix) {
  if (orig_pix_) {
    orig_pix_.destroy();
  }
  orig_pix_ = pix.clone();
}

// Top-level method to perform splitting based on current settings.
// Returns true if a split was actually performed.
// split_for_pageseg should be true if the splitting is being done prior to
// page segmentation. This mode uses the flag
// pageseg_devanagari_split_strategy to determine the splitting strategy.
bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
  SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_;
  if (split_strategy == NO_SPLIT) {
    return false; // Nothing to do.
  }
  ASSERT_HOST(split_strategy == MINIMAL_SPLIT || split_strategy == MAXIMAL_SPLIT);
  ASSERT_HOST(orig_pix_);
  if (devanagari_split_debuglevel > 0) {
    tprintf("Splitting shiro-rekha ...\n");
    tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
    tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
  }
  // Create a copy of original image to store the splitting output.
  splitted_image_.destroy();
  splitted_image_ = orig_pix_.copy();

  // Initialize debug image if required.
  if (devanagari_split_debugimage) {
    debug_image_.destroy();
    debug_image_ = pixConvertTo32(orig_pix_);
  }

  // Determine all connected components in the input image. A close operation
  // may be required prior to this, depending on the current settings.
  Image pix_for_ccs = orig_pix_.clone();
  if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
    if (devanagari_split_debuglevel > 0) {
      tprintf("Performing a global close operation..\n");
    }
    // A global measure is available for xheight, but no local information
    // exists.
    pix_for_ccs.destroy();
    pix_for_ccs = orig_pix_.copy();
    PerformClose(pix_for_ccs, global_xheight_);
  }
  Pixa *ccs;
  Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
  boxaDestroy(&tmp_boxa);
  pix_for_ccs.destroy();

  // Iterate over all connected components. Get their bounding boxes and clip
  // out the image regions corresponding to these boxes from the original image.
  // Conditionally run splitting on each of them.
  Boxa *regions_to_clear = boxaCreate(0);
  int num_ccs = 0;
  if (ccs != nullptr) {
    num_ccs = pixaGetCount(ccs);
  }
  for (int i = 0; i < num_ccs; ++i) {
    Box *box = pixaGetBox(ccs, i, L_CLONE);
    Image word_pix = pixClipRectangle(orig_pix_, box, nullptr);
    ASSERT_HOST(word_pix);
    int xheight = GetXheightForCC(box);
    if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
      pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
    }
    // If some xheight measure is available, attempt to pre-eliminate small
    // blobs from the shiro-rekha process. This is primarily to save the CCs
    // corresponding to punctuation marks/small dots etc which are part of
    // larger graphemes.
    l_int32 x, y, w, h;
    boxGetGeometry(box, &x, &y, &w, &h);
    if (xheight == kUnspecifiedXheight || (w > xheight / 3 && h > xheight / 2)) {
      SplitWordShiroRekha(split_strategy, word_pix, xheight, x, y, regions_to_clear);
    } else if (devanagari_split_debuglevel > 0) {
      tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", x, y, w, h);
    }
    word_pix.destroy();
    boxDestroy(&box);
  }
  // Actually clear the boxes now.
  for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
    Box *box = boxaGetBox(regions_to_clear, i, L_CLONE);
    pixClearInRect(splitted_image_, box);
    boxDestroy(&box);
  }
  boxaDestroy(&regions_to_clear);
  pixaDestroy(&ccs);
  if (devanagari_split_debugimage && pixa_debug != nullptr) {
    pixa_debug->AddPix(debug_image_, split_for_pageseg ? "pageseg_split" : "ocr_split");
  }
  return true;
}

// Method to perform a close operation on the input image. The xheight
// estimate decides the size of sel used.
void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) {
  pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
}

// This method resolves the cc bbox to a particular row and returns the row's
// xheight.
int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) {
  if (!segmentation_block_list_) {
    return global_xheight_;
  }
  // Compute the box coordinates in Tesseract's coordinate system.
  l_int32 x, y, w, h;
  boxGetGeometry(cc_bbox, &x, &y, &w, &h);
  TBOX bbox(x, pixGetHeight(orig_pix_) - y - h - 1,
            x + w, pixGetHeight(orig_pix_) - y - 1);
  // Iterate over all blocks.
  BLOCK_IT block_it(segmentation_block_list_);
  for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
    BLOCK *block = block_it.data();
    // Iterate over all rows in the block.
    ROW_IT row_it(block->row_list());
    for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
      ROW *row = row_it.data();
      if (!row->bounding_box().major_overlap(bbox)) {
        continue;
      }
      // Row could be skewed, warped, etc. Use the position of the box to
      // determine the baseline position of the row for that x-coordinate.
      // Create a square TBOX whose baseline's mid-point lies at this point
      // and side is row's xheight. Take the overlap of this box with the input
      // box and check if it is a 'major overlap'. If so, this box lies in this
      // row. In that case, return the xheight for this row.
      float box_middle = 0.5 * (bbox.left() + bbox.right());
      int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
      TBOX test_box(box_middle - row->x_height() / 2, baseline, box_middle + row->x_height() / 2,
                    static_cast<int>(baseline + row->x_height()));
      // Compute overlap. If it is a major overlap, this is the right row.
      if (bbox.major_overlap(test_box)) {
        return row->x_height();
      }
    }
  }
  // No row found for this bbox.
  return kUnspecifiedXheight;
}

// Returns a list of regions (boxes) which should be cleared in the original
// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
// (or less) word only. Xheight measure could be the global estimate, the row
// estimate, or unspecified. If unspecified, over splitting may occur, since a
// conservative estimate of stroke width along with an associated multiplier
// is used in its place. It is advisable to have a specified xheight when
// splitting for classification/training.
// A vertical projection histogram of all the on-pixels in the input pix is
// computed. The maxima of this histogram is regarded as an approximate location
// of the shiro-rekha. By descending on the maxima's peak on both sides,
// stroke width of shiro-rekha is estimated.
// A horizontal projection histogram is computed for a sub-image of the input
// image, which extends from just below the shiro-rekha down to a certain
// leeway. The leeway depends on the input xheight, if provided, else a
// conservative multiplier on approximate stroke width is used (which may lead
// to over-splitting).
void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight,
                                             int word_left, int word_top, Boxa *regions_to_clear) {
  if (split_strategy == NO_SPLIT) {
    return;
  }
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  // Statistically determine the yextents of the shiro-rekha.
  int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
  GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel);
  // Since the shiro rekha is also a stroke, its width is equal to the stroke
  // width.
  int stroke_width = shirorekha_bottom - shirorekha_top + 1;

  // Some safeguards to protect CCs we do not want to be split.
  // These are particularly useful when the word wasn't eliminated earlier
  // because xheight information was unavailable.
  if (shirorekha_ylevel > height / 2) {
    // Shirorekha shouldn't be in the bottom half of the word.
    if (devanagari_split_debuglevel > 0) {
      tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left,
              word_top);
    }
    return;
  }
  if (stroke_width > height / 3) {
    // Even the boldest of fonts shouldn't do this.
    if (devanagari_split_debuglevel > 0) {
      tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top);
    }
    return;
  }

  // Clear the ascender and descender regions of the word.
  // Obtain a vertical projection histogram for the resulting image.
  Box *box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 * stroke_width / 3);
  Image word_in_xheight = pix.copy();
  pixClearInRect(word_in_xheight, box_to_clear);
  // Also clear any pixels which are below shirorekha_bottom + some leeway.
  // The leeway is set to xheight if the information is available, else it is a
  // multiplier applied to the stroke width.
  int leeway_to_keep = stroke_width * 3;
  if (xheight != kUnspecifiedXheight) {
    // This is because the xheight-region typically includes the shiro-rekha
    // inside it, i.e., the top of the xheight range corresponds to the top of
    // shiro-rekha.
    leeway_to_keep = xheight - stroke_width;
  }
  auto y = shirorekha_bottom + leeway_to_keep;
  boxSetGeometry(box_to_clear, -1, y, -1, height - y);
  pixClearInRect(word_in_xheight, box_to_clear);
  boxDestroy(&box_to_clear);

  PixelHistogram vert_hist;
  vert_hist.ConstructVerticalCountHist(word_in_xheight);
  word_in_xheight.destroy();

  // If the number of black pixel in any column of the image is less than a
  // fraction of the stroke width, treat it as noise / a stray mark. Perform
  // these changes inside the vert_hist data itself, as that is used later on as
  // a bit vector for the final split decision at every column.
  for (int i = 0; i < width; ++i) {
    if (vert_hist.hist()[i] <= stroke_width / 4) {
      vert_hist.hist()[i] = 0;
    } else {
      vert_hist.hist()[i] = 1;
    }
  }
  // In order to split the line at any point, we make sure that the width of the
  // gap is at least half the stroke width.
  int i = 0;
  int cur_component_width = 0;
  while (i < width) {
    if (!vert_hist.hist()[i]) {
      int j = 0;
      while (i + j < width && !vert_hist.hist()[i + j]) {
        ++j;
      }
      if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
        // Perform a shiro-rekha split. The intervening region lies from i to
        // i+j-1.
        // A minimal single-pixel split makes the estimation of intra- and
        // inter-word spacing easier during page layout analysis,
        // whereas a maximal split may be needed for OCR, depending on
        // how the engine was trained.
        bool minimal_split = (split_strategy == MINIMAL_SPLIT);
        int split_width = minimal_split ? 1 : j;
        int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
        if (!minimal_split || (i != 0 && i + j != width)) {
          Box *box_to_clear =
              boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3,
                        split_width, 5 * stroke_width / 3);
          if (box_to_clear) {
            boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
            // Mark this in the debug image if needed.
            if (devanagari_split_debugimage) {
              pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
            }
            boxDestroy(&box_to_clear);
            cur_component_width = 0;
          }
        }
      }
      i += j;
    } else {
      ++i;
      ++cur_component_width;
    }
  }
}

// Refreshes the words in the segmentation block list by using blobs in the
// input block list.
// The segmentation block list must be set.
void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs) {
  // The segmentation block list must have been specified.
  ASSERT_HOST(segmentation_block_list_);
  if (devanagari_split_debuglevel > 0) {
    tprintf("Before refreshing blobs:\n");
    PrintSegmentationStats(segmentation_block_list_);
    tprintf("New Blobs found: %d\n", new_blobs->length());
  }

  C_BLOB_LIST not_found_blobs;
  RefreshWordBlobsFromNewBlobs(
      segmentation_block_list_, new_blobs,
      ((devanagari_split_debugimage && debug_image_) ? &not_found_blobs : nullptr));

  if (devanagari_split_debuglevel > 0) {
    tprintf("After refreshing blobs:\n");
    PrintSegmentationStats(segmentation_block_list_);
  }
  if (devanagari_split_debugimage && debug_image_) {
    // Plot out the original blobs for which no match was found in the new
    // all_blobs list.
    C_BLOB_IT not_found_it(&not_found_blobs);
    for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
      C_BLOB *not_found = not_found_it.data();
      TBOX not_found_box = not_found->bounding_box();
      Box *box_to_plot = GetBoxForTBOX(not_found_box);
      pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255);
      boxDestroy(&box_to_plot);
    }

    // Plot out the blobs unused from all blobs.
    C_BLOB_IT all_blobs_it(new_blobs);
    for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
      C_BLOB *a_blob = all_blobs_it.data();
      Box *box_to_plot = GetBoxForTBOX(a_blob->bounding_box());
      pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0);
      boxDestroy(&box_to_plot);
    }
  }
}

// Returns a new box object for the corresponding TBOX, based on the original
// image's coordinate system.
Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
  return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, tbox.width(),
                   tbox.height());
}

// This method returns the computed mode-height of blobs in the pix.
// It also prunes very small blobs from calculation.
int ShiroRekhaSplitter::GetModeHeight(Image pix) {
  Boxa *boxa = pixConnComp(pix, nullptr, 8);
  STATS heights(0, pixGetHeight(pix) - 1);
  heights.clear();
  for (int i = 0; i < boxaGetCount(boxa); ++i) {
    Box *box = boxaGetBox(boxa, i, L_CLONE);
    l_int32 x, y, w, h;
    boxGetGeometry(box, &x, &y, &w, &h);
    if (h >= 3 || w >= 3) {
      heights.add(h, 1);
    }
    boxDestroy(&box);
  }
  boxaDestroy(&boxa);
  return heights.mode();
}

// This method returns y-extents of the shiro-rekha computed from the input
// word image.
void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top,
                                               int *shirorekha_bottom, int *shirorekha_ylevel) {
  // Compute a histogram from projecting the word on a vertical line.
  PixelHistogram hist_horiz;
  hist_horiz.ConstructHorizontalCountHist(word_pix);
  // Get the ylevel where the top-line exists. This is basically the global
  // maxima in the horizontal histogram.
  int topline_onpixel_count = 0;
  int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count);

  // Get the upper and lower extents of the shiro rekha.
  int thresh = (topline_onpixel_count * 70) / 100;
  int ulimit = topline_ylevel;
  int llimit = topline_ylevel;
  while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
    --ulimit;
  }
  while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
    ++llimit;
  }

  if (shirorekha_top) {
    *shirorekha_top = ulimit;
  }
  if (shirorekha_bottom) {
    *shirorekha_bottom = llimit;
  }
  if (shirorekha_ylevel) {
    *shirorekha_ylevel = topline_ylevel;
  }
}

// This method returns the global-maxima for the histogram. The frequency of
// the global maxima is returned in count, if specified.
int PixelHistogram::GetHistogramMaximum(int *count) const {
  int best_value = 0;
  for (int i = 0; i < length_; ++i) {
    if (hist_[i] > hist_[best_value]) {
      best_value = i;
    }
  }
  if (count) {
    *count = hist_[best_value];
  }
  return best_value;
}

// Methods to construct histograms from images.
void PixelHistogram::ConstructVerticalCountHist(Image pix) {
  Clear();
  int width = pixGetWidth(pix);
  int height = pixGetHeight(pix);
  hist_ = new int[width];
  length_ = width;
  int wpl = pixGetWpl(pix);
  l_uint32 *data = pixGetData(pix);
  for (int i = 0; i < width; ++i) {
    hist_[i] = 0;
  }
  for (int i = 0; i < height; ++i) {
    l_uint32 *line = data + i * wpl;
    for (int j = 0; j < width; ++j) {
      if (GET_DATA_BIT(line, j)) {
        ++(hist_[j]);
      }
    }
  }
}

void PixelHistogram::ConstructHorizontalCountHist(Image pix) {
  Clear();
  Numa *counts = pixCountPixelsByRow(pix, nullptr);
  length_ = numaGetCount(counts);
  hist_ = new int[length_];
  for (int i = 0; i < length_; ++i) {
    l_int32 val = 0;
    numaGetIValue(counts, i, &val);
    hist_[i] = val;
  }
  numaDestroy(&counts);
}

} // namespace tesseract.

Coverage Report

Created: 2025-11-16 06:50

Line	Count	Source
1		/**********************************************************************
2		* File: devanagari_processing.cpp
3		* Description: Methods to process images containing devanagari symbols,
4		* prior to classification.
5		* Author: Shobhit Saxena
6		*
7		* (C) Copyright 2008, Google Inc.
8		** Licensed under the Apache License, Version 2.0 (the "License");
9		** you may not use this file except in compliance with the License.
10		** You may obtain a copy of the License at
11		** http://www.apache.org/licenses/LICENSE-2.0
12		** Unless required by applicable law or agreed to in writing, software
13		** distributed under the License is distributed on an "AS IS" BASIS,
14		** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15		** See the License for the specific language governing permissions and
16		** limitations under the License.
17		*
18		**********************************************************************/
19
20		#ifdef HAVE_CONFIG_H
21		# include "config_auto.h"
22		#endif
23
24		#include "devanagari_processing.h"
25
26		#include "debugpixa.h"
27		#include "statistc.h"
28		#include "tordmain.h"
29
30		#include <allheaders.h>
31
32		namespace tesseract {
33
34		// Flags controlling the debugging information for shiro-rekha splitting
35		// strategies.
36		INT_VAR(devanagari_split_debuglevel, 0, "Debug level for split shiro-rekha process.");
37
38		BOOL_VAR(devanagari_split_debugimage, 0,
39		"Whether to create a debug image for split shiro-rekha process.");
40
41		ShiroRekhaSplitter::ShiroRekhaSplitter() :
42	4	orig_pix_(nullptr),
43	4	splitted_image_(nullptr),
44	4	pageseg_split_strategy_(NO_SPLIT),
45	4	ocr_split_strategy_(NO_SPLIT),
46	4	debug_image_(nullptr),
47	4	segmentation_block_list_(nullptr),
48	4	global_xheight_(kUnspecifiedXheight),
49	4	perform_close_(false)
50	4	{
51	4	}
52
53	0	ShiroRekhaSplitter::~ShiroRekhaSplitter() {
54	0	Clear();
55	0	}
56
57	32.5k	void ShiroRekhaSplitter::Clear() {
58	32.5k	orig_pix_.destroy();
59	32.5k	splitted_image_.destroy();
60	32.5k	pageseg_split_strategy_ = NO_SPLIT;
61	32.5k	ocr_split_strategy_ = NO_SPLIT;
62	32.5k	debug_image_.destroy();
63	32.5k	segmentation_block_list_ = nullptr;
64	32.5k	global_xheight_ = kUnspecifiedXheight;
65	32.5k	perform_close_ = false;
66	32.5k	}
67
68		// On setting the input image, a clone of it is owned by this class.
69	16.2k	void ShiroRekhaSplitter::set_orig_pix(Image pix) {
70	16.2k	if (orig_pix_) {
71	0	orig_pix_.destroy();
72	0	}
73	16.2k	orig_pix_ = pix.clone();
74	16.2k	}
75
76		// Top-level method to perform splitting based on current settings.
77		// Returns true if a split was actually performed.
78		// split_for_pageseg should be true if the splitting is being done prior to
79		// page segmentation. This mode uses the flag
80		// pageseg_devanagari_split_strategy to determine the splitting strategy.
81	32.5k	bool ShiroRekhaSplitter::Split(bool split_for_pageseg, DebugPixa *pixa_debug) {
82	32.5k	SplitStrategy split_strategy = split_for_pageseg ? pageseg_split_strategy_ : ocr_split_strategy_;
83	32.5k	if (split_strategy == NO_SPLIT) {
84	32.5k	return false; // Nothing to do.
85	32.5k	}
86	0	ASSERT_HOST(split_strategy == MINIMAL_SPLIT \|\| split_strategy == MAXIMAL_SPLIT);
87	0	ASSERT_HOST(orig_pix_);
88	0	if (devanagari_split_debuglevel > 0) {
89	0	tprintf("Splitting shiro-rekha ...\n");
90	0	tprintf("Split strategy = %s\n", split_strategy == MINIMAL_SPLIT ? "Minimal" : "Maximal");
91	0	tprintf("Initial pageseg available = %s\n", segmentation_block_list_ ? "yes" : "no");
92	0	}
93		// Create a copy of original image to store the splitting output.
94	0	splitted_image_.destroy();
95	0	splitted_image_ = orig_pix_.copy();
96
97		// Initialize debug image if required.
98	0	if (devanagari_split_debugimage) {
99	0	debug_image_.destroy();
100	0	debug_image_ = pixConvertTo32(orig_pix_);
101	0	}
102
103		// Determine all connected components in the input image. A close operation
104		// may be required prior to this, depending on the current settings.
105	0	Image pix_for_ccs = orig_pix_.clone();
106	0	if (perform_close_ && global_xheight_ != kUnspecifiedXheight && !segmentation_block_list_) {
107	0	if (devanagari_split_debuglevel > 0) {
108	0	tprintf("Performing a global close operation..\n");
109	0	}
110		// A global measure is available for xheight, but no local information
111		// exists.
112	0	pix_for_ccs.destroy();
113	0	pix_for_ccs = orig_pix_.copy();
114	0	PerformClose(pix_for_ccs, global_xheight_);
115	0	}
116	0	Pixa *ccs;
117	0	Boxa *tmp_boxa = pixConnComp(pix_for_ccs, &ccs, 8);
118	0	boxaDestroy(&tmp_boxa);
119	0	pix_for_ccs.destroy();
120
121		// Iterate over all connected components. Get their bounding boxes and clip
122		// out the image regions corresponding to these boxes from the original image.
123		// Conditionally run splitting on each of them.
124	0	Boxa *regions_to_clear = boxaCreate(0);
125	0	int num_ccs = 0;
126	0	if (ccs != nullptr) {
127	0	num_ccs = pixaGetCount(ccs);
128	0	}
129	0	for (int i = 0; i < num_ccs; ++i) {
130	0	Box *box = pixaGetBox(ccs, i, L_CLONE);
131	0	Image word_pix = pixClipRectangle(orig_pix_, box, nullptr);
132	0	ASSERT_HOST(word_pix);
133	0	int xheight = GetXheightForCC(box);
134	0	if (xheight == kUnspecifiedXheight && segmentation_block_list_ && devanagari_split_debugimage) {
135	0	pixRenderBoxArb(debug_image_, box, 1, 255, 0, 0);
136	0	}
137		// If some xheight measure is available, attempt to pre-eliminate small
138		// blobs from the shiro-rekha process. This is primarily to save the CCs
139		// corresponding to punctuation marks/small dots etc which are part of
140		// larger graphemes.
141	0	l_int32 x, y, w, h;
142	0	boxGetGeometry(box, &x, &y, &w, &h);
143	0	if (xheight == kUnspecifiedXheight \|\| (w > xheight / 3 && h > xheight / 2)) {
144	0	SplitWordShiroRekha(split_strategy, word_pix, xheight, x, y, regions_to_clear);
145	0	} else if (devanagari_split_debuglevel > 0) {
146	0	tprintf("CC dropped from splitting: %d,%d (%d, %d)\n", x, y, w, h);
147	0	}
148	0	word_pix.destroy();
149	0	boxDestroy(&box);
150	0	}
151		// Actually clear the boxes now.
152	0	for (int i = 0; i < boxaGetCount(regions_to_clear); ++i) {
153	0	Box *box = boxaGetBox(regions_to_clear, i, L_CLONE);
154	0	pixClearInRect(splitted_image_, box);
155	0	boxDestroy(&box);
156	0	}
157	0	boxaDestroy(&regions_to_clear);
158	0	pixaDestroy(&ccs);
159	0	if (devanagari_split_debugimage && pixa_debug != nullptr) {
160	0	pixa_debug->AddPix(debug_image_, split_for_pageseg ? "pageseg_split" : "ocr_split");
161	0	}
162	0	return true;
163	32.5k	}
164
165		// Method to perform a close operation on the input image. The xheight
166		// estimate decides the size of sel used.
167	0	void ShiroRekhaSplitter::PerformClose(Image pix, int xheight_estimate) {
168	0	pixCloseBrick(pix, pix, xheight_estimate / 8, xheight_estimate / 3);
169	0	}
170
171		// This method resolves the cc bbox to a particular row and returns the row's
172		// xheight.
173	0	int ShiroRekhaSplitter::GetXheightForCC(Box *cc_bbox) {
174	0	if (!segmentation_block_list_) {
175	0	return global_xheight_;
176	0	}
177		// Compute the box coordinates in Tesseract's coordinate system.
178	0	l_int32 x, y, w, h;
179	0	boxGetGeometry(cc_bbox, &x, &y, &w, &h);
180	0	TBOX bbox(x, pixGetHeight(orig_pix_) - y - h - 1,
181	0	x + w, pixGetHeight(orig_pix_) - y - 1);
182		// Iterate over all blocks.
183	0	BLOCK_IT block_it(segmentation_block_list_);
184	0	for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) {
185	0	BLOCK *block = block_it.data();
186		// Iterate over all rows in the block.
187	0	ROW_IT row_it(block->row_list());
188	0	for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) {
189	0	ROW *row = row_it.data();
190	0	if (!row->bounding_box().major_overlap(bbox)) {
191	0	continue;
192	0	}
193		// Row could be skewed, warped, etc. Use the position of the box to
194		// determine the baseline position of the row for that x-coordinate.
195		// Create a square TBOX whose baseline's mid-point lies at this point
196		// and side is row's xheight. Take the overlap of this box with the input
197		// box and check if it is a 'major overlap'. If so, this box lies in this
198		// row. In that case, return the xheight for this row.
199	0	float box_middle = 0.5 * (bbox.left() + bbox.right());
200	0	int baseline = static_cast<int>(row->base_line(box_middle) + 0.5);
201	0	TBOX test_box(box_middle - row->x_height() / 2, baseline, box_middle + row->x_height() / 2,
202	0	static_cast<int>(baseline + row->x_height()));
203		// Compute overlap. If it is a major overlap, this is the right row.
204	0	if (bbox.major_overlap(test_box)) {
205	0	return row->x_height();
206	0	}
207	0	}
208	0	}
209		// No row found for this bbox.
210	0	return kUnspecifiedXheight;
211	0	}
212
213		// Returns a list of regions (boxes) which should be cleared in the original
214		// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
215		// (or less) word only. Xheight measure could be the global estimate, the row
216		// estimate, or unspecified. If unspecified, over splitting may occur, since a
217		// conservative estimate of stroke width along with an associated multiplier
218		// is used in its place. It is advisable to have a specified xheight when
219		// splitting for classification/training.
220		// A vertical projection histogram of all the on-pixels in the input pix is
221		// computed. The maxima of this histogram is regarded as an approximate location
222		// of the shiro-rekha. By descending on the maxima's peak on both sides,
223		// stroke width of shiro-rekha is estimated.
224		// A horizontal projection histogram is computed for a sub-image of the input
225		// image, which extends from just below the shiro-rekha down to a certain
226		// leeway. The leeway depends on the input xheight, if provided, else a
227		// conservative multiplier on approximate stroke width is used (which may lead
228		// to over-splitting).
229		void ShiroRekhaSplitter::SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight,
230	0	int word_left, int word_top, Boxa *regions_to_clear) {
231	0	if (split_strategy == NO_SPLIT) {
232	0	return;
233	0	}
234	0	int width = pixGetWidth(pix);
235	0	int height = pixGetHeight(pix);
236		// Statistically determine the yextents of the shiro-rekha.
237	0	int shirorekha_top, shirorekha_bottom, shirorekha_ylevel;
238	0	GetShiroRekhaYExtents(pix, &shirorekha_top, &shirorekha_bottom, &shirorekha_ylevel);
239		// Since the shiro rekha is also a stroke, its width is equal to the stroke
240		// width.
241	0	int stroke_width = shirorekha_bottom - shirorekha_top + 1;
242
243		// Some safeguards to protect CCs we do not want to be split.
244		// These are particularly useful when the word wasn't eliminated earlier
245		// because xheight information was unavailable.
246	0	if (shirorekha_ylevel > height / 2) {
247		// Shirorekha shouldn't be in the bottom half of the word.
248	0	if (devanagari_split_debuglevel > 0) {
249	0	tprintf("Skipping splitting CC at (%d, %d): shirorekha in lower half..\n", word_left,
250	0	word_top);
251	0	}
252	0	return;
253	0	}
254	0	if (stroke_width > height / 3) {
255		// Even the boldest of fonts shouldn't do this.
256	0	if (devanagari_split_debuglevel > 0) {
257	0	tprintf("Skipping splitting CC at (%d, %d): stroke width too huge..\n", word_left, word_top);
258	0	}
259	0	return;
260	0	}
261
262		// Clear the ascender and descender regions of the word.
263		// Obtain a vertical projection histogram for the resulting image.
264	0	Box box_to_clear = boxCreate(0, shirorekha_top - stroke_width / 3, width, 5 stroke_width / 3);
265	0	Image word_in_xheight = pix.copy();
266	0	pixClearInRect(word_in_xheight, box_to_clear);
267		// Also clear any pixels which are below shirorekha_bottom + some leeway.
268		// The leeway is set to xheight if the information is available, else it is a
269		// multiplier applied to the stroke width.
270	0	int leeway_to_keep = stroke_width * 3;
271	0	if (xheight != kUnspecifiedXheight) {
272		// This is because the xheight-region typically includes the shiro-rekha
273		// inside it, i.e., the top of the xheight range corresponds to the top of
274		// shiro-rekha.
275	0	leeway_to_keep = xheight - stroke_width;
276	0	}
277	0	auto y = shirorekha_bottom + leeway_to_keep;
278	0	boxSetGeometry(box_to_clear, -1, y, -1, height - y);
279	0	pixClearInRect(word_in_xheight, box_to_clear);
280	0	boxDestroy(&box_to_clear);
281
282	0	PixelHistogram vert_hist;
283	0	vert_hist.ConstructVerticalCountHist(word_in_xheight);
284	0	word_in_xheight.destroy();
285
286		// If the number of black pixel in any column of the image is less than a
287		// fraction of the stroke width, treat it as noise / a stray mark. Perform
288		// these changes inside the vert_hist data itself, as that is used later on as
289		// a bit vector for the final split decision at every column.
290	0	for (int i = 0; i < width; ++i) {
291	0	if (vert_hist.hist()[i] <= stroke_width / 4) {
292	0	vert_hist.hist()[i] = 0;
293	0	} else {
294	0	vert_hist.hist()[i] = 1;
295	0	}
296	0	}
297		// In order to split the line at any point, we make sure that the width of the
298		// gap is at least half the stroke width.
299	0	int i = 0;
300	0	int cur_component_width = 0;
301	0	while (i < width) {
302	0	if (!vert_hist.hist()[i]) {
303	0	int j = 0;
304	0	while (i + j < width && !vert_hist.hist()[i + j]) {
305	0	++j;
306	0	}
307	0	if (j >= stroke_width / 2 && cur_component_width >= stroke_width / 2) {
308		// Perform a shiro-rekha split. The intervening region lies from i to
309		// i+j-1.
310		// A minimal single-pixel split makes the estimation of intra- and
311		// inter-word spacing easier during page layout analysis,
312		// whereas a maximal split may be needed for OCR, depending on
313		// how the engine was trained.
314	0	bool minimal_split = (split_strategy == MINIMAL_SPLIT);
315	0	int split_width = minimal_split ? 1 : j;
316	0	int split_left = minimal_split ? i + (j / 2) - (split_width / 2) : i;
317	0	if (!minimal_split \|\| (i != 0 && i + j != width)) {
318	0	Box *box_to_clear =
319	0	boxCreate(word_left + split_left, word_top + shirorekha_top - stroke_width / 3,
320	0	split_width, 5 * stroke_width / 3);
321	0	if (box_to_clear) {
322	0	boxaAddBox(regions_to_clear, box_to_clear, L_CLONE);
323		// Mark this in the debug image if needed.
324	0	if (devanagari_split_debugimage) {
325	0	pixRenderBoxArb(debug_image_, box_to_clear, 1, 128, 255, 128);
326	0	}
327	0	boxDestroy(&box_to_clear);
328	0	cur_component_width = 0;
329	0	}
330	0	}
331	0	}
332	0	i += j;
333	0	} else {
334	0	++i;
335	0	++cur_component_width;
336	0	}
337	0	}
338	0	}
339
340		// Refreshes the words in the segmentation block list by using blobs in the
341		// input block list.
342		// The segmentation block list must be set.
343	0	void ShiroRekhaSplitter::RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs) {
344		// The segmentation block list must have been specified.
345	0	ASSERT_HOST(segmentation_block_list_);
346	0	if (devanagari_split_debuglevel > 0) {
347	0	tprintf("Before refreshing blobs:\n");
348	0	PrintSegmentationStats(segmentation_block_list_);
349	0	tprintf("New Blobs found: %d\n", new_blobs->length());
350	0	}
351
352	0	C_BLOB_LIST not_found_blobs;
353	0	RefreshWordBlobsFromNewBlobs(
354	0	segmentation_block_list_, new_blobs,
355	0	((devanagari_split_debugimage && debug_image_) ? &not_found_blobs : nullptr));
356
357	0	if (devanagari_split_debuglevel > 0) {
358	0	tprintf("After refreshing blobs:\n");
359	0	PrintSegmentationStats(segmentation_block_list_);
360	0	}
361	0	if (devanagari_split_debugimage && debug_image_) {
362		// Plot out the original blobs for which no match was found in the new
363		// all_blobs list.
364	0	C_BLOB_IT not_found_it(&not_found_blobs);
365	0	for (not_found_it.mark_cycle_pt(); !not_found_it.cycled_list(); not_found_it.forward()) {
366	0	C_BLOB *not_found = not_found_it.data();
367	0	TBOX not_found_box = not_found->bounding_box();
368	0	Box *box_to_plot = GetBoxForTBOX(not_found_box);
369	0	pixRenderBoxArb(debug_image_, box_to_plot, 1, 255, 0, 255);
370	0	boxDestroy(&box_to_plot);
371	0	}
372
373		// Plot out the blobs unused from all blobs.
374	0	C_BLOB_IT all_blobs_it(new_blobs);
375	0	for (all_blobs_it.mark_cycle_pt(); !all_blobs_it.cycled_list(); all_blobs_it.forward()) {
376	0	C_BLOB *a_blob = all_blobs_it.data();
377	0	Box *box_to_plot = GetBoxForTBOX(a_blob->bounding_box());
378	0	pixRenderBoxArb(debug_image_, box_to_plot, 3, 0, 127, 0);
379	0	boxDestroy(&box_to_plot);
380	0	}
381	0	}
382	0	}
383
384		// Returns a new box object for the corresponding TBOX, based on the original
385		// image's coordinate system.
386	0	Box *ShiroRekhaSplitter::GetBoxForTBOX(const TBOX &tbox) const {
387	0	return boxCreate(tbox.left(), pixGetHeight(orig_pix_) - tbox.top() - 1, tbox.width(),
388	0	tbox.height());
389	0	}
390
391		// This method returns the computed mode-height of blobs in the pix.
392		// It also prunes very small blobs from calculation.
393	0	int ShiroRekhaSplitter::GetModeHeight(Image pix) {
394	0	Boxa *boxa = pixConnComp(pix, nullptr, 8);
395	0	STATS heights(0, pixGetHeight(pix) - 1);
396	0	heights.clear();
397	0	for (int i = 0; i < boxaGetCount(boxa); ++i) {
398	0	Box *box = boxaGetBox(boxa, i, L_CLONE);
399	0	l_int32 x, y, w, h;
400	0	boxGetGeometry(box, &x, &y, &w, &h);
401	0	if (h >= 3 \|\| w >= 3) {
402	0	heights.add(h, 1);
403	0	}
404	0	boxDestroy(&box);
405	0	}
406	0	boxaDestroy(&boxa);
407	0	return heights.mode();
408	0	}
409
410		// This method returns y-extents of the shiro-rekha computed from the input
411		// word image.
412		void ShiroRekhaSplitter::GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top,
413	0	int shirorekha_bottom, int shirorekha_ylevel) {
414		// Compute a histogram from projecting the word on a vertical line.
415	0	PixelHistogram hist_horiz;
416	0	hist_horiz.ConstructHorizontalCountHist(word_pix);
417		// Get the ylevel where the top-line exists. This is basically the global
418		// maxima in the horizontal histogram.
419	0	int topline_onpixel_count = 0;
420	0	int topline_ylevel = hist_horiz.GetHistogramMaximum(&topline_onpixel_count);
421
422		// Get the upper and lower extents of the shiro rekha.
423	0	int thresh = (topline_onpixel_count * 70) / 100;
424	0	int ulimit = topline_ylevel;
425	0	int llimit = topline_ylevel;
426	0	while (ulimit > 0 && hist_horiz.hist()[ulimit] >= thresh) {
427	0	--ulimit;
428	0	}
429	0	while (llimit < pixGetHeight(word_pix) && hist_horiz.hist()[llimit] >= thresh) {
430	0	++llimit;
431	0	}
432
433	0	if (shirorekha_top) {
434	0	*shirorekha_top = ulimit;
435	0	}
436	0	if (shirorekha_bottom) {
437	0	*shirorekha_bottom = llimit;
438	0	}
439	0	if (shirorekha_ylevel) {
440	0	*shirorekha_ylevel = topline_ylevel;
441	0	}
442	0	}
443
444		// This method returns the global-maxima for the histogram. The frequency of
445		// the global maxima is returned in count, if specified.
446	0	int PixelHistogram::GetHistogramMaximum(int *count) const {
447	0	int best_value = 0;
448	0	for (int i = 0; i < length_; ++i) {
449	0	if (hist_[i] > hist_[best_value]) {
450	0	best_value = i;
451	0	}
452	0	}
453	0	if (count) {
454	0	*count = hist_[best_value];
455	0	}
456	0	return best_value;
457	0	}
458
459		// Methods to construct histograms from images.
460	0	void PixelHistogram::ConstructVerticalCountHist(Image pix) {
461	0	Clear();
462	0	int width = pixGetWidth(pix);
463	0	int height = pixGetHeight(pix);
464	0	hist_ = new int[width];
465	0	length_ = width;
466	0	int wpl = pixGetWpl(pix);
467	0	l_uint32 *data = pixGetData(pix);
468	0	for (int i = 0; i < width; ++i) {
469	0	hist_[i] = 0;
470	0	}
471	0	for (int i = 0; i < height; ++i) {
472	0	l_uint32 line = data + i wpl;
473	0	for (int j = 0; j < width; ++j) {
474	0	if (GET_DATA_BIT(line, j)) {
475	0	++(hist_[j]);
476	0	}
477	0	}
478	0	}
479	0	}
480
481	0	void PixelHistogram::ConstructHorizontalCountHist(Image pix) {
482	0	Clear();
483	0	Numa *counts = pixCountPixelsByRow(pix, nullptr);
484	0	length_ = numaGetCount(counts);
485	0	hist_ = new int[length_];
486	0	for (int i = 0; i < length_; ++i) {
487	0	l_int32 val = 0;
488	0	numaGetIValue(counts, i, &val);
489	0	hist_[i] = val;
490	0	}
491	0	numaDestroy(&counts);
492	0	}
493
494		} // namespace tesseract.