/src/tesseract/src/textord/devanagari_processing.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | // Copyright 2008 Google Inc. All Rights Reserved.  | 
2  |  | // Author: shobhitsaxena@google.com (Shobhit Saxena)  | 
3  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
4  |  | // you may not use this file except in compliance with the License.  | 
5  |  | // You may obtain a copy of the License at  | 
6  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
7  |  | // Unless required by applicable law or agreed to in writing, software  | 
8  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
9  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
10  |  | // See the License for the specific language governing permissions and  | 
11  |  | // limitations under the License.  | 
12  |  |  | 
13  |  | #ifndef TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_  | 
14  |  | #define TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_  | 
15  |  |  | 
16  |  | #include <allheaders.h>  | 
17  |  | #include "ocrblock.h"  | 
18  |  | #include "params.h"  | 
19  |  |  | 
20  |  | struct Pix;  | 
21  |  | struct Box;  | 
22  |  | struct Boxa;  | 
23  |  |  | 
24  |  | namespace tesseract { | 
25  |  |  | 
26  |  | extern INT_VAR_H(devanagari_split_debuglevel);  | 
27  |  |  | 
28  |  | extern BOOL_VAR_H(devanagari_split_debugimage);  | 
29  |  |  | 
30  |  | class TBOX;  | 
31  |  | class DebugPixa;  | 
32  |  |  | 
33  |  | class PixelHistogram { | 
34  |  | public:  | 
35  | 0  |   PixelHistogram() { | 
36  | 0  |     hist_ = nullptr;  | 
37  | 0  |     length_ = 0;  | 
38  | 0  |   }  | 
39  |  |  | 
40  | 0  |   ~PixelHistogram() { | 
41  | 0  |     Clear();  | 
42  | 0  |   }  | 
43  |  |  | 
44  | 0  |   void Clear() { | 
45  | 0  |     delete[] hist_;  | 
46  | 0  |     length_ = 0;  | 
47  | 0  |   }  | 
48  |  |  | 
49  | 0  |   int *hist() const { | 
50  | 0  |     return hist_;  | 
51  | 0  |   }  | 
52  |  |  | 
53  | 0  |   int length() const { | 
54  | 0  |     return length_;  | 
55  | 0  |   }  | 
56  |  |  | 
57  |  |   // Methods to construct histograms from images. These clear any existing data.  | 
58  |  |   void ConstructVerticalCountHist(Image pix);  | 
59  |  |   void ConstructHorizontalCountHist(Image pix);  | 
60  |  |  | 
61  |  |   // This method returns the global-maxima for the histogram. The frequency of  | 
62  |  |   // the global maxima is returned in count, if specified.  | 
63  |  |   int GetHistogramMaximum(int *count) const;  | 
64  |  |  | 
65  |  | private:  | 
66  |  |   int *hist_;  | 
67  |  |   int length_;  | 
68  |  | };  | 
69  |  |  | 
70  |  | class ShiroRekhaSplitter { | 
71  |  | public:  | 
72  |  |   enum SplitStrategy { | 
73  |  |     NO_SPLIT = 0,  // No splitting is performed for the phase.  | 
74  |  |     MINIMAL_SPLIT, // Blobs are split minimally.  | 
75  |  |     MAXIMAL_SPLIT  // Blobs are split maximally.  | 
76  |  |   };  | 
77  |  |  | 
78  |  |   ShiroRekhaSplitter();  | 
79  |  |   virtual ~ShiroRekhaSplitter();  | 
80  |  |  | 
81  |  |   // Top-level method to perform splitting based on current settings.  | 
82  |  |   // Returns true if a split was actually performed.  | 
83  |  |   // If split_for_pageseg is true, the pageseg_split_strategy_ is used for  | 
84  |  |   // splitting. If false, the ocr_split_strategy_ is used.  | 
85  |  |   bool Split(bool split_for_pageseg, DebugPixa *pixa_debug);  | 
86  |  |  | 
87  |  |   // Clears the memory held by this object.  | 
88  |  |   void Clear();  | 
89  |  |  | 
90  |  |   // Refreshes the words in the segmentation block list by using blobs in the  | 
91  |  |   // input blob list.  | 
92  |  |   // The segmentation block list must be set.  | 
93  |  |   void RefreshSegmentationWithNewBlobs(C_BLOB_LIST *new_blobs);  | 
94  |  |  | 
95  |  |   // Returns true if the split strategies for pageseg and ocr are different.  | 
96  | 15.4k  |   bool HasDifferentSplitStrategies() const { | 
97  | 15.4k  |     return pageseg_split_strategy_ != ocr_split_strategy_;  | 
98  | 15.4k  |   }  | 
99  |  |  | 
100  |  |   // This only keeps a copy of the block list pointer. At split call, the list  | 
101  |  |   // object should still be alive. This block list is used as a golden  | 
102  |  |   // segmentation when performing splitting.  | 
103  | 15.4k  |   void set_segmentation_block_list(BLOCK_LIST *block_list) { | 
104  | 15.4k  |     segmentation_block_list_ = block_list;  | 
105  | 15.4k  |   }  | 
106  |  |  | 
107  |  |   static const int kUnspecifiedXheight = -1;  | 
108  |  |  | 
109  | 0  |   void set_global_xheight(int xheight) { | 
110  | 0  |     global_xheight_ = xheight;  | 
111  | 0  |   }  | 
112  |  |  | 
113  | 0  |   void set_perform_close(bool perform) { | 
114  | 0  |     perform_close_ = perform;  | 
115  | 0  |   }  | 
116  |  |  | 
117  |  |   // Returns the image obtained from shiro-rekha splitting. The returned object  | 
118  |  |   // is owned by this class. Callers may want to clone the returned pix to keep  | 
119  |  |   // it alive beyond the life of ShiroRekhaSplitter object.  | 
120  | 0  |   Image splitted_image() { | 
121  | 0  |     return splitted_image_;  | 
122  | 0  |   }  | 
123  |  |  | 
124  |  |   // On setting the input image, a clone of it is owned by this class.  | 
125  |  |   void set_orig_pix(Image pix);  | 
126  |  |  | 
127  |  |   // Returns the input image provided to the object. This object is owned by  | 
128  |  |   // this class. Callers may want to clone the returned pix to work with it.  | 
129  | 30.9k  |   Image orig_pix() { | 
130  | 30.9k  |     return orig_pix_;  | 
131  | 30.9k  |   }  | 
132  |  |  | 
133  | 0  |   SplitStrategy ocr_split_strategy() const { | 
134  | 0  |     return ocr_split_strategy_;  | 
135  | 0  |   }  | 
136  |  |  | 
137  | 15.4k  |   void set_ocr_split_strategy(SplitStrategy strategy) { | 
138  | 15.4k  |     ocr_split_strategy_ = strategy;  | 
139  | 15.4k  |   }  | 
140  |  |  | 
141  | 0  |   SplitStrategy pageseg_split_strategy() const { | 
142  | 0  |     return pageseg_split_strategy_;  | 
143  | 0  |   }  | 
144  |  |  | 
145  | 15.4k  |   void set_pageseg_split_strategy(SplitStrategy strategy) { | 
146  | 15.4k  |     pageseg_split_strategy_ = strategy;  | 
147  | 15.4k  |   }  | 
148  |  |  | 
149  | 0  |   BLOCK_LIST *segmentation_block_list() { | 
150  | 0  |     return segmentation_block_list_;  | 
151  | 0  |   }  | 
152  |  |  | 
153  |  |   // This method returns the computed mode-height of blobs in the pix.  | 
154  |  |   // It also prunes very small blobs from calculation. Could be used to provide  | 
155  |  |   // a global xheight estimate for images which have the same point-size text.  | 
156  |  |   static int GetModeHeight(Image pix);  | 
157  |  |  | 
158  |  | private:  | 
159  |  |   // Method to perform a close operation on the input image. The xheight  | 
160  |  |   // estimate decides the size of sel used.  | 
161  |  |   static void PerformClose(Image pix, int xheight_estimate);  | 
162  |  |  | 
163  |  |   // This method resolves the cc bbox to a particular row and returns the row's  | 
164  |  |   // xheight. This uses block_list_ if available, else just returns the  | 
165  |  |   // global_xheight_ estimate currently set in the object.  | 
166  |  |   int GetXheightForCC(Box *cc_bbox);  | 
167  |  |  | 
168  |  |   // Returns a list of regions (boxes) which should be cleared in the original  | 
169  |  |   // image so as to perform shiro-rekha splitting. Pix is assumed to carry one  | 
170  |  |   // (or less) word only. Xheight measure could be the global estimate, the row  | 
171  |  |   // estimate, or unspecified. If unspecified, over splitting may occur, since a  | 
172  |  |   // conservative estimate of stroke width along with an associated multiplier  | 
173  |  |   // is used in its place. It is advisable to have a specified xheight when  | 
174  |  |   // splitting for classification/training.  | 
175  |  |   void SplitWordShiroRekha(SplitStrategy split_strategy, Image pix, int xheight, int word_left,  | 
176  |  |                            int word_top, Boxa *regions_to_clear);  | 
177  |  |  | 
178  |  |   // Returns a new box object for the corresponding TBOX, based on the original  | 
179  |  |   // image's coordinate system.  | 
180  |  |   Box *GetBoxForTBOX(const TBOX &tbox) const;  | 
181  |  |  | 
182  |  |   // This method returns y-extents of the shiro-rekha computed from the input  | 
183  |  |   // word image.  | 
184  |  |   static void GetShiroRekhaYExtents(Image word_pix, int *shirorekha_top, int *shirorekha_bottom,  | 
185  |  |                                     int *shirorekha_ylevel);  | 
186  |  |  | 
187  |  |   Image orig_pix_;       // Just a clone of the input image passed.  | 
188  |  |   Image splitted_image_; // Image produced after the last splitting round. The  | 
189  |  |                         // object is owned by this class.  | 
190  |  |   SplitStrategy pageseg_split_strategy_;  | 
191  |  |   SplitStrategy ocr_split_strategy_;  | 
192  |  |   Image debug_image_;  | 
193  |  |   // This block list is used as a golden segmentation when performing splitting.  | 
194  |  |   BLOCK_LIST *segmentation_block_list_;  | 
195  |  |   int global_xheight_;  | 
196  |  |   bool perform_close_; // Whether a morphological close operation should be  | 
197  |  |                        // performed before CCs are run through splitting.  | 
198  |  | };  | 
199  |  |  | 
200  |  | } // namespace tesseract.  | 
201  |  |  | 
202  |  | #endif // TESSERACT_TEXTORD_DEVNAGARI_PROCESSING_H_  |