/src/tesseract/src/textord/strokewidth.h
Line  | Count  | Source (jump to first uncovered line)  | 
1  |  | ///////////////////////////////////////////////////////////////////////  | 
2  |  | // File:        strokewidth.h  | 
3  |  | // Description: Subclass of BBGrid to find uniformity of strokewidth.  | 
4  |  | // Author:      Ray Smith  | 
5  |  | //  | 
6  |  | // (C) Copyright 2008, Google Inc.  | 
7  |  | // Licensed under the Apache License, Version 2.0 (the "License");  | 
8  |  | // you may not use this file except in compliance with the License.  | 
9  |  | // You may obtain a copy of the License at  | 
10  |  | // http://www.apache.org/licenses/LICENSE-2.0  | 
11  |  | // Unless required by applicable law or agreed to in writing, software  | 
12  |  | // distributed under the License is distributed on an "AS IS" BASIS,  | 
13  |  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  | 
14  |  | // See the License for the specific language governing permissions and  | 
15  |  | // limitations under the License.  | 
16  |  | //  | 
17  |  | ///////////////////////////////////////////////////////////////////////  | 
18  |  |  | 
19  |  | #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_  | 
20  |  | #define TESSERACT_TEXTORD_STROKEWIDTH_H_  | 
21  |  |  | 
22  |  | #include "blobbox.h"  // BlobNeighbourDir.  | 
23  |  | #include "blobgrid.h" // Base class.  | 
24  |  | #include "colpartitiongrid.h"  | 
25  |  | #include "textlineprojection.h"  | 
26  |  |  | 
27  |  | class DENORM;  | 
28  |  | class ScrollView;  | 
29  |  | class TO_BLOCK;  | 
30  |  |  | 
31  |  | namespace tesseract { | 
32  |  |  | 
33  |  | class ColPartition_LIST;  | 
34  |  | class TabFind;  | 
35  |  | class TextlineProjection;  | 
36  |  |  | 
37  |  | // Misc enums to clarify bool arguments for direction-controlling args.  | 
38  |  | enum LeftOrRight { LR_LEFT, LR_RIGHT }; | 
39  |  |  | 
40  |  | // Return value from FindInitialPartitions indicates detection of severe  | 
41  |  | // skew or noise.  | 
42  |  | enum PartitionFindResult { | 
43  |  |   PFR_OK,   // Everything is OK.  | 
44  |  |   PFR_SKEW, // Skew was detected and rotated.  | 
45  |  |   PFR_NOISE // Noise was detected and removed.  | 
46  |  | };  | 
47  |  |  | 
48  |  | /**  | 
49  |  |  * The StrokeWidth class holds all the normal and large blobs.  | 
50  |  |  * It is used to find good large blobs and move them to the normal blobs  | 
51  |  |  * by virtue of having a reasonable strokewidth compatible neighbour.  | 
52  |  |  */  | 
53  |  | class StrokeWidth : public BlobGrid { | 
54  |  | public:  | 
55  |  |   StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright);  | 
56  |  |   ~StrokeWidth() override;  | 
57  |  |  | 
58  |  |   // Sets the neighbours member of the medium-sized blobs in the block.  | 
59  |  |   // Searches on 4 sides of each blob for similar-sized, similar-strokewidth  | 
60  |  |   // blobs and sets pointers to the good neighbours.  | 
61  |  |   void SetNeighboursOnMediumBlobs(TO_BLOCK *block);  | 
62  |  |  | 
63  |  |   // Sets the neighbour/textline writing direction members of the medium  | 
64  |  |   // and large blobs with optional repair of broken CJK characters first.  | 
65  |  |   // Repair of broken CJK is needed here because broken CJK characters  | 
66  |  |   // can fool the textline direction detection algorithm.  | 
67  |  |   void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge,  | 
68  |  |                                             TO_BLOCK *input_block);  | 
69  |  |  | 
70  |  |   // To save computation, the process of generating partitions is broken  | 
71  |  |   // into the following 4 steps:  | 
72  |  |   // TestVerticalTextDirection  | 
73  |  |   // CorrectForRotation (used only if a rotation is to be applied)  | 
74  |  |   // FindLeaderPartitions  | 
75  |  |   // GradeBlobsIntoPartitions.  | 
76  |  |   // These functions are all required, in sequence, except for  | 
77  |  |   // CorrectForRotation, which is not needed if no rotation is applied.  | 
78  |  |  | 
79  |  |   // Types all the blobs as vertical or horizontal text or unknown and  | 
80  |  |   // returns true if the majority are vertical.  | 
81  |  |   // If the blobs are rotated, it is necessary to call CorrectForRotation  | 
82  |  |   // after rotating everything, otherwise the work done here will be enough.  | 
83  |  |   // If osd_blobs is not null, a list of blobs from the dominant textline  | 
84  |  |   // direction are returned for use in orientation and script detection.  | 
85  |  |   // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio.  | 
86  |  |   bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block,  | 
87  |  |                                  BLOBNBOX_CLIST *osd_blobs);  | 
88  |  |  | 
89  |  |   // Corrects the data structures for the given rotation.  | 
90  |  |   void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid);  | 
91  |  |  | 
92  |  |   // Finds leader partitions and inserts them into the given grid.  | 
93  |  |   void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid);  | 
94  |  |  | 
95  |  |   // Finds and marks noise those blobs that look like bits of vertical lines  | 
96  |  |   // that would otherwise screw up layout analysis.  | 
97  |  |   void RemoveLineResidue(ColPartition_LIST *big_part_list);  | 
98  |  |  | 
99  |  |   // Types all the blobs as vertical text or horizontal text or unknown and  | 
100  |  |   // puts them into initial ColPartitions in the supplied part_grid.  | 
101  |  |   // rerotation determines how to get back to the image coordinates from the  | 
102  |  |   // blob coordinates (since they may have been rotated for vertical text).  | 
103  |  |   // block is the single block for the whole page or rectangle to be OCRed.  | 
104  |  |   // nontext_pix (full-size), is a binary mask used to prevent merges across  | 
105  |  |   // photo/text boundaries. It is not kept beyond this function.  | 
106  |  |   // denorm provides a mapping back to the image from the current blob  | 
107  |  |   // coordinate space.  | 
108  |  |   // projection provides a measure of textline density over the image and  | 
109  |  |   // provides functions to assist with diacritic detection. It should be a  | 
110  |  |   // pointer to a new TextlineProjection, and will be setup here.  | 
111  |  |   // part_grid is the output grid of textline partitions.  | 
112  |  |   // Large blobs that cause overlap are put in separate partitions and added  | 
113  |  |   // to the big_parts list.  | 
114  |  |   void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block,  | 
115  |  |                                 Image nontext_pix, const DENORM *denorm, bool cjk_script,  | 
116  |  |                                 TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs,  | 
117  |  |                                 ColPartitionGrid *part_grid, ColPartition_LIST *big_parts);  | 
118  |  |  | 
119  |  |   // Handles a click event in a display window.  | 
120  |  |   void HandleClick(int x, int y) override;  | 
121  |  |  | 
122  |  | private:  | 
123  |  |   // Computes the noise_density_ by summing the number of elements in a  | 
124  |  |   // neighbourhood of each grid cell.  | 
125  |  |   void ComputeNoiseDensity(TO_BLOCK *block, TabFind *line_grid);  | 
126  |  |  | 
127  |  |   // Detects and marks leader dots/dashes.  | 
128  |  |   //    Leaders are horizontal chains of small or noise blobs that look  | 
129  |  |   //    monospace according to ColPartition::MarkAsLeaderIfMonospaced().  | 
130  |  |   // Detected leaders become the only occupants of the block->small_blobs list.  | 
131  |  |   // Non-leader small blobs get moved to the blobs list.  | 
132  |  |   // Non-leader noise blobs remain singletons in the noise list.  | 
133  |  |   // All small and noise blobs in high density regions are marked BTFT_NONTEXT.  | 
134  |  |   // block is the single block for the whole page or rectangle to be OCRed.  | 
135  |  |   // leader_parts is the output.  | 
136  |  |   void FindLeadersAndMarkNoise(TO_BLOCK *block, ColPartition_LIST *leader_parts);  | 
137  |  |  | 
138  |  |   /** Inserts the block blobs (normal and large) into this grid.  | 
139  |  |    * Blobs remain owned by the block. */  | 
140  |  |   void InsertBlobs(TO_BLOCK *block);  | 
141  |  |  | 
142  |  |   // Fix broken CJK characters, using the fake joined blobs mechanism.  | 
143  |  |   // Blobs are really merged, ie the master takes all the outlines and the  | 
144  |  |   // others are deleted.  | 
145  |  |   // Returns true if sufficient blobs are merged that it may be worth running  | 
146  |  |   // again, due to a better estimate of character size.  | 
147  |  |   bool FixBrokenCJK(TO_BLOCK *block);  | 
148  |  |  | 
149  |  |   // Collect blobs that overlap or are within max_dist of the input bbox.  | 
150  |  |   // Return them in the list of blobs and expand the bbox to be the union  | 
151  |  |   // of all the boxes. not_this is excluded from the search, as are blobs  | 
152  |  |   // that cause the merged box to exceed max_size in either dimension.  | 
153  |  |   void AccumulateOverlaps(const BLOBNBOX *not_this, bool debug, int max_size, int max_dist,  | 
154  |  |                           TBOX *bbox, BLOBNBOX_CLIST *blobs);  | 
155  |  |  | 
156  |  |   // For each blob in this grid, Finds the textline direction to be horizontal  | 
157  |  |   // or vertical according to distance to neighbours and 1st and 2nd order  | 
158  |  |   // neighbours. Non-text tends to end up without a definite direction.  | 
159  |  |   // Result is setting of the neighbours and vert_possible/horz_possible  | 
160  |  |   // flags in the BLOBNBOXes currently in this grid.  | 
161  |  |   // This function is called more than once if page orientation is uncertain,  | 
162  |  |   // so display_if_debugging is true on the final call to display the results.  | 
163  |  |   void FindTextlineFlowDirection(PageSegMode pageseg_mode, bool display_if_debugging);  | 
164  |  |  | 
165  |  |   // Sets the neighbours and good_stroke_neighbours members of the blob by  | 
166  |  |   // searching close on all 4 sides.  | 
167  |  |   // When finding leader dots/dashes, there is a slightly different rule for  | 
168  |  |   // what makes a good neighbour.  | 
169  |  |   // If activate_line_trap, then line-like objects are found and isolated.  | 
170  |  |   void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX *blob);  | 
171  |  |  | 
172  |  |   // Sets the good_stroke_neighbours member of the blob if it has a  | 
173  |  |   // GoodNeighbour on the given side.  | 
174  |  |   // Also sets the neighbour in the blob, whether or not a good one is found.  | 
175  |  |   // Return value is the number of neighbours in the line trap size range.  | 
176  |  |   // Leaders get extra special lenient treatment.  | 
177  |  |   int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX *blob);  | 
178  |  |  | 
179  |  |   // Makes the blob to be only horizontal or vertical where evidence  | 
180  |  |   // is clear based on gaps of 2nd order neighbours.  | 
181  |  |   void SetNeighbourFlows(BLOBNBOX *blob);  | 
182  |  |  | 
183  |  |   // Nullify the neighbours in the wrong directions where the direction  | 
184  |  |   // is clear-cut based on a distance margin. Good for isolating vertical  | 
185  |  |   // text from neighbouring horizontal text.  | 
186  |  |   void SimplifyObviousNeighbours(BLOBNBOX *blob);  | 
187  |  |  | 
188  |  |   // Smoothes the vertical/horizontal type of the blob based on the  | 
189  |  |   // 2nd-order neighbours. If reset_all is true, then all blobs are  | 
190  |  |   // changed. Otherwise, only ambiguous blobs are processed.  | 
191  |  |   void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, BLOBNBOX *blob);  | 
192  |  |  | 
193  |  |   // Checks the left or right side of the given leader partition and sets the  | 
194  |  |   // (opposite) leader_on_right or leader_on_left flags for blobs  | 
195  |  |   // that are next to the given side of the given leader partition.  | 
196  |  |   void MarkLeaderNeighbours(const ColPartition *part, LeftOrRight side);  | 
197  |  |  | 
198  |  |   // Partition creation. Accumulates vertical and horizontal text chains,  | 
199  |  |   // puts the remaining blobs in as unknowns, and then merges/splits to  | 
200  |  |   // minimize overlap and smoothes the types with neighbours and the color  | 
201  |  |   // image if provided. rerotation is used to rotate the coordinate space  | 
202  |  |   // back to the nontext_map_ image.  | 
203  |  |   // If find_problems is true, detects possible noise pollution by the amount  | 
204  |  |   // of partition overlap that is created by the diacritics. If excessive, the  | 
205  |  |   // noise is separated out into diacritic blobs, and PFR_NOISE is returned.  | 
206  |  |   // [TODO(rays): if the partition overlap is caused by heavy skew, deskews  | 
207  |  |   // the components, saves the skew_angle and returns PFR_SKEW.] If the return  | 
208  |  |   // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be  | 
209  |  |   // called again after cleaning up the partly done work.  | 
210  |  |   PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation,  | 
211  |  |                                             bool find_problems, TO_BLOCK *block,  | 
212  |  |                                             BLOBNBOX_LIST *diacritic_blobs,  | 
213  |  |                                             ColPartitionGrid *part_grid,  | 
214  |  |                                             ColPartition_LIST *big_parts, FCOORD *skew_angle);  | 
215  |  |   // Detects noise by a significant increase in partition overlap from  | 
216  |  |   // pre_overlap to now, and removes noise from the union of all the overlapping  | 
217  |  |   // partitions, placing the blobs in diacritic_blobs. Returns true if any noise  | 
218  |  |   // was found and removed.  | 
219  |  |   bool DetectAndRemoveNoise(int pre_overlap, const TBOX &grid_box, TO_BLOCK *block,  | 
220  |  |                             ColPartitionGrid *part_grid, BLOBNBOX_LIST *diacritic_blobs);  | 
221  |  |   // Finds vertical chains of text-like blobs and puts them in ColPartitions.  | 
222  |  |   void FindVerticalTextChains(ColPartitionGrid *part_grid);  | 
223  |  |   // Finds horizontal chains of text-like blobs and puts them in ColPartitions.  | 
224  |  |   void FindHorizontalTextChains(ColPartitionGrid *part_grid);  | 
225  |  |   // Finds diacritics and saves their base character in the blob.  | 
226  |  |   void TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block);  | 
227  |  |   // Searches this grid for an appropriately close and sized neighbour of the  | 
228  |  |   // given [small] blob. If such a blob is found, the diacritic base is saved  | 
229  |  |   // in the blob and true is returned.  | 
230  |  |   // The small_grid is a secondary grid that contains the small/noise objects  | 
231  |  |   // that are not in this grid, but may be useful for determining a connection  | 
232  |  |   // between blob and its potential base character. (See DiacriticXGapFilled.)  | 
233  |  |   bool DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob);  | 
234  |  |   // Returns true if there is no gap between the base char and the diacritic  | 
235  |  |   // bigger than a fraction of the height of the base char:  | 
236  |  |   // Eg: line end.....'  | 
237  |  |   // The quote is a long way from the end of the line, yet it needs to be a  | 
238  |  |   // diacritic. To determine that the quote is not part of an image, or  | 
239  |  |   // a different text block, we check for other marks in the gap between  | 
240  |  |   // the base char and the diacritic.  | 
241  |  |   //                          '<--Diacritic  | 
242  |  |   // |---------|  | 
243  |  |   // |         |<-toobig-gap->  | 
244  |  |   // | Base    |<ok gap>  | 
245  |  |   // |---------|        x<-----Dot occupying gap  | 
246  |  |   // The grid is const really.  | 
247  |  |   bool DiacriticXGapFilled(BlobGrid *grid, const TBOX &diacritic_box, const TBOX &base_box);  | 
248  |  |   // Merges diacritics with the ColPartition of the base character blob.  | 
249  |  |   void MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid);  | 
250  |  |   // Any blobs on the large_blobs list of block that are still unowned by a  | 
251  |  |   // ColPartition, are probably drop-cap or vertically touching so the blobs  | 
252  |  |   // are removed to the big_parts list and treated separately.  | 
253  |  |   void RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid,  | 
254  |  |                               ColPartition_LIST *big_parts);  | 
255  |  |  | 
256  |  |   // All remaining unused blobs are put in individual ColPartitions.  | 
257  |  |   void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid *part_grid);  | 
258  |  |  | 
259  |  |   // If combine, put all blobs in the cell_list into a single partition,  | 
260  |  |   // otherwise put each one into its own partition.  | 
261  |  |   void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine,  | 
262  |  |                                   ColPartitionGrid *part_grid, BLOBNBOX_CLIST *cell_list);  | 
263  |  |  | 
264  |  |   // Helper function to finish setting up a ColPartition and insert into  | 
265  |  |   // part_grid.  | 
266  |  |   void CompletePartition(PageSegMode pageseg_mode, ColPartition *part, ColPartitionGrid *part_grid);  | 
267  |  |  | 
268  |  |   // Helper returns true if we are looking only for vertical textlines,  | 
269  |  |   // taking into account any rotation that has been done.  | 
270  | 0  |   bool FindingVerticalOnly(PageSegMode pageseg_mode) const { | 
271  | 0  |     if (rerotation_.y() == 0.0f) { | 
272  | 0  |       return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;  | 
273  | 0  |     }  | 
274  | 0  |     return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;  | 
275  | 0  |   }  | 
276  |  |   // Helper returns true if we are looking only for horizontal textlines,  | 
277  |  |   // taking into account any rotation that has been done.  | 
278  | 0  |   bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { | 
279  | 0  |     if (rerotation_.y() == 0.0f) { | 
280  | 0  |       return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT;  | 
281  | 0  |     }  | 
282  | 0  |     return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT;  | 
283  | 0  |   }  | 
284  |  |  | 
285  |  |   // Merge partitions where the merge appears harmless.  | 
286  |  |   void EasyMerges(ColPartitionGrid *part_grid);  | 
287  |  |  | 
288  |  |   // Compute a search box based on the orientation of the partition.  | 
289  |  |   // Returns true if a suitable box can be calculated.  | 
290  |  |   // Callback for EasyMerges.  | 
291  |  |   bool OrientationSearchBox(ColPartition *part, TBOX *box);  | 
292  |  |  | 
293  |  |   // Merge confirmation callback for EasyMerges.  | 
294  |  |   bool ConfirmEasyMerge(const ColPartition *p1, const ColPartition *p2);  | 
295  |  |  | 
296  |  |   // Returns true if there is no significant noise in between the boxes.  | 
297  |  |   bool NoNoiseInBetween(const TBOX &box1, const TBOX &box2) const;  | 
298  |  |  | 
299  |  | #ifndef GRAPHICS_DISABLED  | 
300  |  |   // Displays the blobs colored according to the number of good neighbours  | 
301  |  |   // and the vertical/horizontal flow.  | 
302  |  |   ScrollView *DisplayGoodBlobs(const char *window_name, int x, int y);  | 
303  |  |  | 
304  |  |   // Displays blobs colored according to whether or not they are diacritics.  | 
305  |  |   ScrollView *DisplayDiacritics(const char *window_name, int x, int y, TO_BLOCK *block);  | 
306  |  | #endif  | 
307  |  |  | 
308  |  | private:  | 
309  |  |   // Image map of photo/noise areas on the page. Borrowed pointer (not owned.)  | 
310  |  |   Image nontext_map_;  | 
311  |  |   // Textline projection map. Borrowed pointer.  | 
312  |  |   TextlineProjection *projection_;  | 
313  |  |   // DENORM used by projection_ to get back to image coords. Borrowed pointer.  | 
314  |  |   const DENORM *denorm_;  | 
315  |  |   // Bounding box of the grid.  | 
316  |  |   TBOX grid_box_;  | 
317  |  |   // Rerotation to get back to the original image.  | 
318  |  |   FCOORD rerotation_;  | 
319  |  | #ifndef GRAPHICS_DISABLED  | 
320  |  |   // Windows for debug display.  | 
321  |  |   ScrollView *leaders_win_ = nullptr;  | 
322  |  |   ScrollView *initial_widths_win_ = nullptr;  | 
323  |  |   ScrollView *widths_win_ = nullptr;  | 
324  |  |   ScrollView *chains_win_ = nullptr;  | 
325  |  |   ScrollView *diacritics_win_ = nullptr;  | 
326  |  |   ScrollView *textlines_win_ = nullptr;  | 
327  |  |   ScrollView *smoothed_win_ = nullptr;  | 
328  |  | #endif  | 
329  |  | };  | 
330  |  |  | 
331  |  | } // namespace tesseract.  | 
332  |  |  | 
333  |  | #endif // TESSERACT_TEXTORD_STROKEWIDTH_H_  |