/src/tesseract/src/textord/devanagari_processing.h

Source (jump to first uncovered line)
// Copyright 2008 Google Inc. All Rights Reserved.
// Author: shobhitsaxena@google.com (Shobhit Saxena)
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_

#include <allheaders.h>
#include "ocrblock.h"
#include "params.h"

struct Pix;
struct Box;
struct Boxa;

namespace tesseract {

extern INT_VAR_H(devanagari_split_debuglevel);

extern BOOL_VAR_H(devanagari_split_debugimage);

class TBOX;
class DebugPixa;

class PixelHistogram {
public:
  PixelHistogram() {
    hist_ = nullptr;
    length_ = 0;
  }

  ~PixelHistogram() {
    Clear();
  }

  void Clear() {
    delete[] hist_;
    length_ = 0;
  }

  int *hist() const {
    return hist_;
  }

  int length() const {
    return length_;
  }

  // Methods to construct histograms from images. These clear any existing data.
  void ConstructVerticalCountHist(Image pix);
  void ConstructHorizontalCountHist(Image pix);

  // This method returns the global-maxima for the histogram. The frequency of
  // the global maxima is returned in count, if specified.
  int GetHistogramMaximum(int *count) const;

private:
  int *hist_;
  int length_;
};

class ShiroRekhaSplitter {
public:
  enum SplitStrategy {
    NO_SPLIT = 0,  // No splitting is performed for the phase.
    MINIMAL_SPLIT, // Blobs are split minimally.
    MAXIMAL_SPLIT  // Blobs are split maximally.
  };

  ShiroRekhaSplitter();
  virtual ~ShiroRekhaSplitter();

  // Top-level method to perform splitting based on current settings.
  // Returns true if a split was actually performed.
  // If split_for_pageseg is true, the pageseg_split_strategy_ is used for
  // splitting. If false, the ocr_split_strategy_ is used.
  bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);

  // Clears the memory held by this object.
  void Clear();

  // Refreshes the words in the segmentation block list by using blobs in the
  // input blob list.
  // The segmentation block list must be set.
  void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);

  // Returns true if the split strategies for pageseg and ocr are different.
  bool HasDifferentSplitStrategies() const {
    return pageseg_split_strategy_ != ocr_split_strategy_;
  }

  // This only keeps a copy of the block list pointer. At split call, the list
  // object should still be alive. This block list is used as a golden
  // segmentation when performing splitting.
  void set_segmentation_block_list(BLOCK_LIST *block_list) {
    segmentation_block_list_ = block_list;
  }

  static const int kUnspecifiedXheight = -1;

  void set_global_xheight(int xheight) {
    global_xheight_ = xheight;
  }

  void set_perform_close(bool perform) {
    perform_close_ = perform;
  }

  // Returns the image obtained from shiro-rekha splitting. The returned object
  // is owned by this class. Callers may want to clone the returned pix to keep
  // it alive beyond the life of ShiroRekhaSplitter object.
  Image splitted_image() {
    return splitted_image_;
  }

  // On setting the input image, a clone of it is owned by this class.
  void set_orig_pix(Image pix);

  // Returns the input image provided to the object. This object is owned by
  // this class. Callers may want to clone the returned pix to work with it.
  Image orig_pix() {
    return orig_pix_;
  }

  SplitStrategy ocr_split_strategy() const {
    return ocr_split_strategy_;
  }

  void set_ocr_split_strategy(SplitStrategy strategy) {
    ocr_split_strategy_ = strategy;
  }

  SplitStrategy pageseg_split_strategy() const {
    return pageseg_split_strategy_;
  }

  void set_pageseg_split_strategy(SplitStrategy strategy) {
    pageseg_split_strategy_ = strategy;
  }

  BLOCK_LIST *segmentation_block_list() {
    return segmentation_block_list_;
  }

  // This method returns the computed mode-height of blobs in the pix.
  // It also prunes very small blobs from calculation. Could be used to provide
  // a global xheight estimate for images which have the same point-size text.
  static int GetModeHeight(Image pix);

private:
  // Method to perform a close operation on the input image. The xheight
  // estimate decides the size of sel used.
  static void PerformClose(Image pix, int xheight_estimate);

  // This method resolves the cc bbox to a particular row and returns the row's
  // xheight. This uses block_list_ if available, else just returns the
  // global_xheight_ estimate currently set in the object.
  int GetXheightForCC(Box *cc_bbox);

  // Returns a list of regions (boxes) which should be cleared in the original
  // image so as to perform shiro-rekha splitting. Pix is assumed to carry one
  // (or less) word only. Xheight measure could be the global estimate, the row
  // estimate, or unspecified. If unspecified, over splitting may occur, since a
  // conservative estimate of stroke width along with an associated multiplier
  // is used in its place. It is advisable to have a specified xheight when
  // splitting for classification/training.
  void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
                           int word_top, Boxa *regions_to_clear);

  // Returns a new box object for the corresponding TBOX, based on the original
  // image's coordinate system.
  Box *GetBoxForTBOX(const TBOX &tbox) const;

  // This method returns y-extents of the shiro-rekha computed from the input
  // word image.
  static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,
                                    int *shirorekha_ylevel);

  Image orig_pix_;       // Just a clone of the input image passed.
  Image splitted_image_; // Image produced after the last splitting round. The
                        // object is owned by this class.
  SplitStrategy pageseg_split_strategy_;
  SplitStrategy ocr_split_strategy_;
  Image debug_image_;
  // This block list is used as a golden segmentation when performing splitting.
  BLOCK_LIST *segmentation_block_list_;
  int global_xheight_;
  bool perform_close_; // Whether a morphological close operation should be
                       // performed before CCs are run through splitting.
};

} // namespace tesseract.

#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_

Coverage Report

Created: 2025-06-13 07:15

Line	Count	Source (jump to first uncovered line)
1		// Copyright 2008 Google Inc. All Rights Reserved.
2		// Author: shobhitsaxena@google.com (Shobhit Saxena)
3		// Licensed under the Apache License, Version 2.0 (the "License");
4		// you may not use this file except in compliance with the License.
5		// You may obtain a copy of the License at
6		// http://www.apache.org/licenses/LICENSE-2.0
7		// Unless required by applicable law or agreed to in writing, software
8		// distributed under the License is distributed on an "AS IS" BASIS,
9		// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
10		// See the License for the specific language governing permissions and
11		// limitations under the License.
12
13		#ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
14		#define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_
15
16		#include <allheaders.h>
17		#include "ocrblock.h"
18		#include "params.h"
19
20		struct Pix;
21		struct Box;
22		struct Boxa;
23
24		namespace tesseract {
25
26		extern INT_VAR_H(devanagari_split_debuglevel);
27
28		extern BOOL_VAR_H(devanagari_split_debugimage);
29
30		class TBOX;
31		class DebugPixa;
32
33		class PixelHistogram {
34		public:
35	0	PixelHistogram() {
36	0	hist_ = nullptr;
37	0	length_ = 0;
38	0	}
39
40	0	~PixelHistogram() {
41	0	Clear();
42	0	}
43
44	0	void Clear() {
45	0	delete[] hist_;
46	0	length_ = 0;
47	0	}
48
49	0	int *hist() const {
50	0	return hist_;
51	0	}
52
53	0	int length() const {
54	0	return length_;
55	0	}
56
57		// Methods to construct histograms from images. These clear any existing data.
58		void ConstructVerticalCountHist(Image pix);
59		void ConstructHorizontalCountHist(Image pix);
60
61		// This method returns the global-maxima for the histogram. The frequency of
62		// the global maxima is returned in count, if specified.
63		int GetHistogramMaximum(int *count) const;
64
65		private:
66		int *hist_;
67		int length_;
68		};
69
70		class ShiroRekhaSplitter {
71		public:
72		enum SplitStrategy {
73		NO_SPLIT = 0, // No splitting is performed for the phase.
74		MINIMAL_SPLIT, // Blobs are split minimally.
75		MAXIMAL_SPLIT // Blobs are split maximally.
76		};
77
78		ShiroRekhaSplitter();
79		virtual ~ShiroRekhaSplitter();
80
81		// Top-level method to perform splitting based on current settings.
82		// Returns true if a split was actually performed.
83		// If split_for_pageseg is true, the pageseg_split_strategy_ is used for
84		// splitting. If false, the ocr_split_strategy_ is used.
85		bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);
86
87		// Clears the memory held by this object.
88		void Clear();
89
90		// Refreshes the words in the segmentation block list by using blobs in the
91		// input blob list.
92		// The segmentation block list must be set.
93		void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);
94
95		// Returns true if the split strategies for pageseg and ocr are different.
96	15.4k	bool HasDifferentSplitStrategies() const {
97	15.4k	return pageseg_split_strategy_ != ocr_split_strategy_;
98	15.4k	}
99
100		// This only keeps a copy of the block list pointer. At split call, the list
101		// object should still be alive. This block list is used as a golden
102		// segmentation when performing splitting.
103	15.4k	void set_segmentation_block_list(BLOCK_LIST *block_list) {
104	15.4k	segmentation_block_list_ = block_list;
105	15.4k	}
106
107		static const int kUnspecifiedXheight = -1;
108
109	0	void set_global_xheight(int xheight) {
110	0	global_xheight_ = xheight;
111	0	}
112
113	0	void set_perform_close(bool perform) {
114	0	perform_close_ = perform;
115	0	}
116
117		// Returns the image obtained from shiro-rekha splitting. The returned object
118		// is owned by this class. Callers may want to clone the returned pix to keep
119		// it alive beyond the life of ShiroRekhaSplitter object.
120	0	Image splitted_image() {
121	0	return splitted_image_;
122	0	}
123
124		// On setting the input image, a clone of it is owned by this class.
125		void set_orig_pix(Image pix);
126
127		// Returns the input image provided to the object. This object is owned by
128		// this class. Callers may want to clone the returned pix to work with it.
129	30.9k	Image orig_pix() {
130	30.9k	return orig_pix_;
131	30.9k	}
132
133	0	SplitStrategy ocr_split_strategy() const {
134	0	return ocr_split_strategy_;
135	0	}
136
137	15.4k	void set_ocr_split_strategy(SplitStrategy strategy) {
138	15.4k	ocr_split_strategy_ = strategy;
139	15.4k	}
140
141	0	SplitStrategy pageseg_split_strategy() const {
142	0	return pageseg_split_strategy_;
143	0	}
144
145	15.4k	void set_pageseg_split_strategy(SplitStrategy strategy) {
146	15.4k	pageseg_split_strategy_ = strategy;
147	15.4k	}
148
149	0	BLOCK_LIST *segmentation_block_list() {
150	0	return segmentation_block_list_;
151	0	}
152
153		// This method returns the computed mode-height of blobs in the pix.
154		// It also prunes very small blobs from calculation. Could be used to provide
155		// a global xheight estimate for images which have the same point-size text.
156		static int GetModeHeight(Image pix);
157
158		private:
159		// Method to perform a close operation on the input image. The xheight
160		// estimate decides the size of sel used.
161		static void PerformClose(Image pix, int xheight_estimate);
162
163		// This method resolves the cc bbox to a particular row and returns the row's
164		// xheight. This uses block_list_ if available, else just returns the
165		// global_xheight_ estimate currently set in the object.
166		int GetXheightForCC(Box *cc_bbox);
167
168		// Returns a list of regions (boxes) which should be cleared in the original
169		// image so as to perform shiro-rekha splitting. Pix is assumed to carry one
170		// (or less) word only. Xheight measure could be the global estimate, the row
171		// estimate, or unspecified. If unspecified, over splitting may occur, since a
172		// conservative estimate of stroke width along with an associated multiplier
173		// is used in its place. It is advisable to have a specified xheight when
174		// splitting for classification/training.
175		void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,
176		int word_top, Boxa *regions_to_clear);
177
178		// Returns a new box object for the corresponding TBOX, based on the original
179		// image's coordinate system.
180		Box *GetBoxForTBOX(const TBOX &tbox) const;
181
182		// This method returns y-extents of the shiro-rekha computed from the input
183		// word image.
184		static void GetShiroRekhaYExtents(Image word_pix, int shirorekha_top, int shirorekha_bottom,
185		int *shirorekha_ylevel);
186
187		Image orig_pix_; // Just a clone of the input image passed.
188		Image splitted_image_; // Image produced after the last splitting round. The
189		// object is owned by this class.
190		SplitStrategy pageseg_split_strategy_;
191		SplitStrategy ocr_split_strategy_;
192		Image debug_image_;
193		// This block list is used as a golden segmentation when performing splitting.
194		BLOCK_LIST *segmentation_block_list_;
195		int global_xheight_;
196		bool perform_close_; // Whether a morphological close operation should be
197		// performed before CCs are run through splitting.
198		};
199
200		} // namespace tesseract.
201
202		#endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_