/src/tesseract/src/textord/devanagari_processing.h
Line | Count | Source (jump to first uncovered line) |
1 | | // Copyright 2008 Google Inc. All Rights Reserved. |
2 | | // Author: shobhitsaxena@google.com (Shobhit Saxena) |
3 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
4 | | // you may not use this file except in compliance with the License. |
5 | | // You may obtain a copy of the License at |
6 | | // http://www.apache.org/licenses/LICENSE-2.0 |
7 | | // Unless required by applicable law or agreed to in writing, software |
8 | | // distributed under the License is distributed on an "AS IS" BASIS, |
9 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
10 | | // See the License for the specific language governing permissions and |
11 | | // limitations under the License. |
12 | | |
13 | | #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ |
14 | | #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ |
15 | | |
16 | | #include <allheaders.h> |
17 | | #include "ocrblock.h" |
18 | | #include "params.h" |
19 | | |
20 | | struct Pix; |
21 | | struct Box; |
22 | | struct Boxa; |
23 | | |
24 | | namespace tesseract { |
25 | | |
26 | | extern INT_VAR_H(devanagari_split_debuglevel); |
27 | | |
28 | | extern BOOL_VAR_H(devanagari_split_debugimage); |
29 | | |
30 | | class TBOX; |
31 | | class DebugPixa; |
32 | | |
33 | | class PixelHistogram { |
34 | | public: |
35 | 0 | PixelHistogram() { |
36 | 0 | hist_ = nullptr; |
37 | 0 | length_ = 0; |
38 | 0 | } |
39 | | |
40 | 0 | ~PixelHistogram() { |
41 | 0 | Clear(); |
42 | 0 | } |
43 | | |
44 | 0 | void Clear() { |
45 | 0 | delete[] hist_; |
46 | 0 | length_ = 0; |
47 | 0 | } |
48 | | |
49 | 0 | int *hist() const { |
50 | 0 | return hist_; |
51 | 0 | } |
52 | | |
53 | 0 | int length() const { |
54 | 0 | return length_; |
55 | 0 | } |
56 | | |
57 | | // Methods to construct histograms from images. These clear any existing data. |
58 | | void ConstructVerticalCountHist(Image pix); |
59 | | void ConstructHorizontalCountHist(Image pix); |
60 | | |
61 | | // This method returns the global-maxima for the histogram. The frequency of |
62 | | // the global maxima is returned in count, if specified. |
63 | | int GetHistogramMaximum(int *count) const; |
64 | | |
65 | | private: |
66 | | int *hist_; |
67 | | int length_; |
68 | | }; |
69 | | |
70 | | class ShiroRekhaSplitter { |
71 | | public: |
72 | | enum SplitStrategy { |
73 | | NO_SPLIT = 0, // No splitting is performed for the phase. |
74 | | MINIMAL_SPLIT, // Blobs are split minimally. |
75 | | MAXIMAL_SPLIT // Blobs are split maximally. |
76 | | }; |
77 | | |
78 | | ShiroRekhaSplitter(); |
79 | | virtual ~ShiroRekhaSplitter(); |
80 | | |
81 | | // Top-level method to perform splitting based on current settings. |
82 | | // Returns true if a split was actually performed. |
83 | | // If split_for_pageseg is true, the pageseg_split_strategy_ is used for |
84 | | // splitting. If false, the ocr_split_strategy_ is used. |
85 | | bool Split(bool split_for_pageseg, DebugPixa *pixa_debug); |
86 | | |
87 | | // Clears the memory held by this object. |
88 | | void Clear(); |
89 | | |
90 | | // Refreshes the words in the segmentation block list by using blobs in the |
91 | | // input blob list. |
92 | | // The segmentation block list must be set. |
93 | | void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs); |
94 | | |
95 | | // Returns true if the split strategies for pageseg and ocr are different. |
96 | 15.4k | bool HasDifferentSplitStrategies() const { |
97 | 15.4k | return pageseg_split_strategy_ != ocr_split_strategy_; |
98 | 15.4k | } |
99 | | |
100 | | // This only keeps a copy of the block list pointer. At split call, the list |
101 | | // object should still be alive. This block list is used as a golden |
102 | | // segmentation when performing splitting. |
103 | 15.4k | void set_segmentation_block_list(BLOCK_LIST *block_list) { |
104 | 15.4k | segmentation_block_list_ = block_list; |
105 | 15.4k | } |
106 | | |
107 | | static const int kUnspecifiedXheight = -1; |
108 | | |
109 | 0 | void set_global_xheight(int xheight) { |
110 | 0 | global_xheight_ = xheight; |
111 | 0 | } |
112 | | |
113 | 0 | void set_perform_close(bool perform) { |
114 | 0 | perform_close_ = perform; |
115 | 0 | } |
116 | | |
117 | | // Returns the image obtained from shiro-rekha splitting. The returned object |
118 | | // is owned by this class. Callers may want to clone the returned pix to keep |
119 | | // it alive beyond the life of ShiroRekhaSplitter object. |
120 | 0 | Image splitted_image() { |
121 | 0 | return splitted_image_; |
122 | 0 | } |
123 | | |
124 | | // On setting the input image, a clone of it is owned by this class. |
125 | | void set_orig_pix(Image pix); |
126 | | |
127 | | // Returns the input image provided to the object. This object is owned by |
128 | | // this class. Callers may want to clone the returned pix to work with it. |
129 | 30.9k | Image orig_pix() { |
130 | 30.9k | return orig_pix_; |
131 | 30.9k | } |
132 | | |
133 | 0 | SplitStrategy ocr_split_strategy() const { |
134 | 0 | return ocr_split_strategy_; |
135 | 0 | } |
136 | | |
137 | 15.4k | void set_ocr_split_strategy(SplitStrategy strategy) { |
138 | 15.4k | ocr_split_strategy_ = strategy; |
139 | 15.4k | } |
140 | | |
141 | 0 | SplitStrategy pageseg_split_strategy() const { |
142 | 0 | return pageseg_split_strategy_; |
143 | 0 | } |
144 | | |
145 | 15.4k | void set_pageseg_split_strategy(SplitStrategy strategy) { |
146 | 15.4k | pageseg_split_strategy_ = strategy; |
147 | 15.4k | } |
148 | | |
149 | 0 | BLOCK_LIST *segmentation_block_list() { |
150 | 0 | return segmentation_block_list_; |
151 | 0 | } |
152 | | |
153 | | // This method returns the computed mode-height of blobs in the pix. |
154 | | // It also prunes very small blobs from calculation. Could be used to provide |
155 | | // a global xheight estimate for images which have the same point-size text. |
156 | | static int GetModeHeight(Image pix); |
157 | | |
158 | | private: |
159 | | // Method to perform a close operation on the input image. The xheight |
160 | | // estimate decides the size of sel used. |
161 | | static void PerformClose(Image pix, int xheight_estimate); |
162 | | |
163 | | // This method resolves the cc bbox to a particular row and returns the row's |
164 | | // xheight. This uses block_list_ if available, else just returns the |
165 | | // global_xheight_ estimate currently set in the object. |
166 | | int GetXheightForCC(Box *cc_bbox); |
167 | | |
168 | | // Returns a list of regions (boxes) which should be cleared in the original |
169 | | // image so as to perform shiro-rekha splitting. Pix is assumed to carry one |
170 | | // (or less) word only. Xheight measure could be the global estimate, the row |
171 | | // estimate, or unspecified. If unspecified, over splitting may occur, since a |
172 | | // conservative estimate of stroke width along with an associated multiplier |
173 | | // is used in its place. It is advisable to have a specified xheight when |
174 | | // splitting for classification/training. |
175 | | void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left, |
176 | | int word_top, Boxa *regions_to_clear); |
177 | | |
178 | | // Returns a new box object for the corresponding TBOX, based on the original |
179 | | // image's coordinate system. |
180 | | Box *GetBoxForTBOX(const TBOX &tbox) const; |
181 | | |
182 | | // This method returns y-extents of the shiro-rekha computed from the input |
183 | | // word image. |
184 | | static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom, |
185 | | int *shirorekha_ylevel); |
186 | | |
187 | | Image orig_pix_; // Just a clone of the input image passed. |
188 | | Image splitted_image_; // Image produced after the last splitting round. The |
189 | | // object is owned by this class. |
190 | | SplitStrategy pageseg_split_strategy_; |
191 | | SplitStrategy ocr_split_strategy_; |
192 | | Image debug_image_; |
193 | | // This block list is used as a golden segmentation when performing splitting. |
194 | | BLOCK_LIST *segmentation_block_list_; |
195 | | int global_xheight_; |
196 | | bool perform_close_; // Whether a morphological close operation should be |
197 | | // performed before CCs are run through splitting. |
198 | | }; |
199 | | |
200 | | } // namespace tesseract. |
201 | | |
202 | | #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_ |