/src/tesseract/src/textord/strokewidth.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: strokewidth.h |
3 | | // Description: Subclass of BBGrid to find uniformity of strokewidth. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2008, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_TEXTORD_STROKEWIDTH_H_ |
20 | | #define TESSERACT_TEXTORD_STROKEWIDTH_H_ |
21 | | |
22 | | #include "blobbox.h" // BlobNeighbourDir. |
23 | | #include "blobgrid.h" // Base class. |
24 | | #include "colpartitiongrid.h" |
25 | | #include "textlineprojection.h" |
26 | | |
27 | | class DENORM; |
28 | | class ScrollView; |
29 | | class TO_BLOCK; |
30 | | |
31 | | namespace tesseract { |
32 | | |
33 | | class ColPartition_LIST; |
34 | | class TabFind; |
35 | | class TextlineProjection; |
36 | | |
37 | | // Misc enums to clarify bool arguments for direction-controlling args. |
38 | | enum LeftOrRight { LR_LEFT, LR_RIGHT }; |
39 | | |
40 | | // Return value from FindInitialPartitions indicates detection of severe |
41 | | // skew or noise. |
42 | | enum PartitionFindResult { |
43 | | PFR_OK, // Everything is OK. |
44 | | PFR_SKEW, // Skew was detected and rotated. |
45 | | PFR_NOISE // Noise was detected and removed. |
46 | | }; |
47 | | |
48 | | /** |
49 | | * The StrokeWidth class holds all the normal and large blobs. |
50 | | * It is used to find good large blobs and move them to the normal blobs |
51 | | * by virtue of having a reasonable strokewidth compatible neighbour. |
52 | | */ |
53 | | class StrokeWidth : public BlobGrid { |
54 | | public: |
55 | | StrokeWidth(int gridsize, const ICOORD &bleft, const ICOORD &tright); |
56 | | ~StrokeWidth() override; |
57 | | |
58 | | // Sets the neighbours member of the medium-sized blobs in the block. |
59 | | // Searches on 4 sides of each blob for similar-sized, similar-strokewidth |
60 | | // blobs and sets pointers to the good neighbours. |
61 | | void SetNeighboursOnMediumBlobs(TO_BLOCK *block); |
62 | | |
63 | | // Sets the neighbour/textline writing direction members of the medium |
64 | | // and large blobs with optional repair of broken CJK characters first. |
65 | | // Repair of broken CJK is needed here because broken CJK characters |
66 | | // can fool the textline direction detection algorithm. |
67 | | void FindTextlineDirectionAndFixBrokenCJK(PageSegMode pageseg_mode, bool cjk_merge, |
68 | | TO_BLOCK *input_block); |
69 | | |
70 | | // To save computation, the process of generating partitions is broken |
71 | | // into the following 4 steps: |
72 | | // TestVerticalTextDirection |
73 | | // CorrectForRotation (used only if a rotation is to be applied) |
74 | | // FindLeaderPartitions |
75 | | // GradeBlobsIntoPartitions. |
76 | | // These functions are all required, in sequence, except for |
77 | | // CorrectForRotation, which is not needed if no rotation is applied. |
78 | | |
79 | | // Types all the blobs as vertical or horizontal text or unknown and |
80 | | // returns true if the majority are vertical. |
81 | | // If the blobs are rotated, it is necessary to call CorrectForRotation |
82 | | // after rotating everything, otherwise the work done here will be enough. |
83 | | // If osd_blobs is not null, a list of blobs from the dominant textline |
84 | | // direction are returned for use in orientation and script detection. |
85 | | // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. |
86 | | bool TestVerticalTextDirection(double find_vertical_text_ratio, TO_BLOCK *block, |
87 | | BLOBNBOX_CLIST *osd_blobs); |
88 | | |
89 | | // Corrects the data structures for the given rotation. |
90 | | void CorrectForRotation(const FCOORD &rerotation, ColPartitionGrid *part_grid); |
91 | | |
92 | | // Finds leader partitions and inserts them into the given grid. |
93 | | void FindLeaderPartitions(TO_BLOCK *block, ColPartitionGrid *part_grid); |
94 | | |
95 | | // Finds and marks noise those blobs that look like bits of vertical lines |
96 | | // that would otherwise screw up layout analysis. |
97 | | void RemoveLineResidue(ColPartition_LIST *big_part_list); |
98 | | |
99 | | // Types all the blobs as vertical text or horizontal text or unknown and |
100 | | // puts them into initial ColPartitions in the supplied part_grid. |
101 | | // rerotation determines how to get back to the image coordinates from the |
102 | | // blob coordinates (since they may have been rotated for vertical text). |
103 | | // block is the single block for the whole page or rectangle to be OCRed. |
104 | | // nontext_pix (full-size), is a binary mask used to prevent merges across |
105 | | // photo/text boundaries. It is not kept beyond this function. |
106 | | // denorm provides a mapping back to the image from the current blob |
107 | | // coordinate space. |
108 | | // projection provides a measure of textline density over the image and |
109 | | // provides functions to assist with diacritic detection. It should be a |
110 | | // pointer to a new TextlineProjection, and will be setup here. |
111 | | // part_grid is the output grid of textline partitions. |
112 | | // Large blobs that cause overlap are put in separate partitions and added |
113 | | // to the big_parts list. |
114 | | void GradeBlobsIntoPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, TO_BLOCK *block, |
115 | | Image nontext_pix, const DENORM *denorm, bool cjk_script, |
116 | | TextlineProjection *projection, BLOBNBOX_LIST *diacritic_blobs, |
117 | | ColPartitionGrid *part_grid, ColPartition_LIST *big_parts); |
118 | | |
119 | | // Handles a click event in a display window. |
120 | | void HandleClick(int x, int y) override; |
121 | | |
122 | | private: |
123 | | // Computes the noise_density_ by summing the number of elements in a |
124 | | // neighbourhood of each grid cell. |
125 | | void ComputeNoiseDensity(TO_BLOCK *block, TabFind *line_grid); |
126 | | |
127 | | // Detects and marks leader dots/dashes. |
128 | | // Leaders are horizontal chains of small or noise blobs that look |
129 | | // monospace according to ColPartition::MarkAsLeaderIfMonospaced(). |
130 | | // Detected leaders become the only occupants of the block->small_blobs list. |
131 | | // Non-leader small blobs get moved to the blobs list. |
132 | | // Non-leader noise blobs remain singletons in the noise list. |
133 | | // All small and noise blobs in high density regions are marked BTFT_NONTEXT. |
134 | | // block is the single block for the whole page or rectangle to be OCRed. |
135 | | // leader_parts is the output. |
136 | | void FindLeadersAndMarkNoise(TO_BLOCK *block, ColPartition_LIST *leader_parts); |
137 | | |
138 | | /** Inserts the block blobs (normal and large) into this grid. |
139 | | * Blobs remain owned by the block. */ |
140 | | void InsertBlobs(TO_BLOCK *block); |
141 | | |
142 | | // Fix broken CJK characters, using the fake joined blobs mechanism. |
143 | | // Blobs are really merged, ie the master takes all the outlines and the |
144 | | // others are deleted. |
145 | | // Returns true if sufficient blobs are merged that it may be worth running |
146 | | // again, due to a better estimate of character size. |
147 | | bool FixBrokenCJK(TO_BLOCK *block); |
148 | | |
149 | | // Collect blobs that overlap or are within max_dist of the input bbox. |
150 | | // Return them in the list of blobs and expand the bbox to be the union |
151 | | // of all the boxes. not_this is excluded from the search, as are blobs |
152 | | // that cause the merged box to exceed max_size in either dimension. |
153 | | void AccumulateOverlaps(const BLOBNBOX *not_this, bool debug, int max_size, int max_dist, |
154 | | TBOX *bbox, BLOBNBOX_CLIST *blobs); |
155 | | |
156 | | // For each blob in this grid, Finds the textline direction to be horizontal |
157 | | // or vertical according to distance to neighbours and 1st and 2nd order |
158 | | // neighbours. Non-text tends to end up without a definite direction. |
159 | | // Result is setting of the neighbours and vert_possible/horz_possible |
160 | | // flags in the BLOBNBOXes currently in this grid. |
161 | | // This function is called more than once if page orientation is uncertain, |
162 | | // so display_if_debugging is true on the final call to display the results. |
163 | | void FindTextlineFlowDirection(PageSegMode pageseg_mode, bool display_if_debugging); |
164 | | |
165 | | // Sets the neighbours and good_stroke_neighbours members of the blob by |
166 | | // searching close on all 4 sides. |
167 | | // When finding leader dots/dashes, there is a slightly different rule for |
168 | | // what makes a good neighbour. |
169 | | // If activate_line_trap, then line-like objects are found and isolated. |
170 | | void SetNeighbours(bool leaders, bool activate_line_trap, BLOBNBOX *blob); |
171 | | |
172 | | // Sets the good_stroke_neighbours member of the blob if it has a |
173 | | // GoodNeighbour on the given side. |
174 | | // Also sets the neighbour in the blob, whether or not a good one is found. |
175 | | // Return value is the number of neighbours in the line trap size range. |
176 | | // Leaders get extra special lenient treatment. |
177 | | int FindGoodNeighbour(BlobNeighbourDir dir, bool leaders, BLOBNBOX *blob); |
178 | | |
179 | | // Makes the blob to be only horizontal or vertical where evidence |
180 | | // is clear based on gaps of 2nd order neighbours. |
181 | | void SetNeighbourFlows(BLOBNBOX *blob); |
182 | | |
183 | | // Nullify the neighbours in the wrong directions where the direction |
184 | | // is clear-cut based on a distance margin. Good for isolating vertical |
185 | | // text from neighbouring horizontal text. |
186 | | void SimplifyObviousNeighbours(BLOBNBOX *blob); |
187 | | |
188 | | // Smoothes the vertical/horizontal type of the blob based on the |
189 | | // 2nd-order neighbours. If reset_all is true, then all blobs are |
190 | | // changed. Otherwise, only ambiguous blobs are processed. |
191 | | void SmoothNeighbourTypes(PageSegMode pageseg_mode, bool desperate, BLOBNBOX *blob); |
192 | | |
193 | | // Checks the left or right side of the given leader partition and sets the |
194 | | // (opposite) leader_on_right or leader_on_left flags for blobs |
195 | | // that are next to the given side of the given leader partition. |
196 | | void MarkLeaderNeighbours(const ColPartition *part, LeftOrRight side); |
197 | | |
198 | | // Partition creation. Accumulates vertical and horizontal text chains, |
199 | | // puts the remaining blobs in as unknowns, and then merges/splits to |
200 | | // minimize overlap and smoothes the types with neighbours and the color |
201 | | // image if provided. rerotation is used to rotate the coordinate space |
202 | | // back to the nontext_map_ image. |
203 | | // If find_problems is true, detects possible noise pollution by the amount |
204 | | // of partition overlap that is created by the diacritics. If excessive, the |
205 | | // noise is separated out into diacritic blobs, and PFR_NOISE is returned. |
206 | | // [TODO(rays): if the partition overlap is caused by heavy skew, deskews |
207 | | // the components, saves the skew_angle and returns PFR_SKEW.] If the return |
208 | | // is not PFR_OK, the job is incomplete, and FindInitialPartitions must be |
209 | | // called again after cleaning up the partly done work. |
210 | | PartitionFindResult FindInitialPartitions(PageSegMode pageseg_mode, const FCOORD &rerotation, |
211 | | bool find_problems, TO_BLOCK *block, |
212 | | BLOBNBOX_LIST *diacritic_blobs, |
213 | | ColPartitionGrid *part_grid, |
214 | | ColPartition_LIST *big_parts, FCOORD *skew_angle); |
215 | | // Detects noise by a significant increase in partition overlap from |
216 | | // pre_overlap to now, and removes noise from the union of all the overlapping |
217 | | // partitions, placing the blobs in diacritic_blobs. Returns true if any noise |
218 | | // was found and removed. |
219 | | bool DetectAndRemoveNoise(int pre_overlap, const TBOX &grid_box, TO_BLOCK *block, |
220 | | ColPartitionGrid *part_grid, BLOBNBOX_LIST *diacritic_blobs); |
221 | | // Finds vertical chains of text-like blobs and puts them in ColPartitions. |
222 | | void FindVerticalTextChains(ColPartitionGrid *part_grid); |
223 | | // Finds horizontal chains of text-like blobs and puts them in ColPartitions. |
224 | | void FindHorizontalTextChains(ColPartitionGrid *part_grid); |
225 | | // Finds diacritics and saves their base character in the blob. |
226 | | void TestDiacritics(ColPartitionGrid *part_grid, TO_BLOCK *block); |
227 | | // Searches this grid for an appropriately close and sized neighbour of the |
228 | | // given [small] blob. If such a blob is found, the diacritic base is saved |
229 | | // in the blob and true is returned. |
230 | | // The small_grid is a secondary grid that contains the small/noise objects |
231 | | // that are not in this grid, but may be useful for determining a connection |
232 | | // between blob and its potential base character. (See DiacriticXGapFilled.) |
233 | | bool DiacriticBlob(BlobGrid *small_grid, BLOBNBOX *blob); |
234 | | // Returns true if there is no gap between the base char and the diacritic |
235 | | // bigger than a fraction of the height of the base char: |
236 | | // Eg: line end.....' |
237 | | // The quote is a long way from the end of the line, yet it needs to be a |
238 | | // diacritic. To determine that the quote is not part of an image, or |
239 | | // a different text block, we check for other marks in the gap between |
240 | | // the base char and the diacritic. |
241 | | // '<--Diacritic |
242 | | // |---------| |
243 | | // | |<-toobig-gap-> |
244 | | // | Base |<ok gap> |
245 | | // |---------| x<-----Dot occupying gap |
246 | | // The grid is const really. |
247 | | bool DiacriticXGapFilled(BlobGrid *grid, const TBOX &diacritic_box, const TBOX &base_box); |
248 | | // Merges diacritics with the ColPartition of the base character blob. |
249 | | void MergeDiacritics(TO_BLOCK *block, ColPartitionGrid *part_grid); |
250 | | // Any blobs on the large_blobs list of block that are still unowned by a |
251 | | // ColPartition, are probably drop-cap or vertically touching so the blobs |
252 | | // are removed to the big_parts list and treated separately. |
253 | | void RemoveLargeUnusedBlobs(TO_BLOCK *block, ColPartitionGrid *part_grid, |
254 | | ColPartition_LIST *big_parts); |
255 | | |
256 | | // All remaining unused blobs are put in individual ColPartitions. |
257 | | void PartitionRemainingBlobs(PageSegMode pageseg_mode, ColPartitionGrid *part_grid); |
258 | | |
259 | | // If combine, put all blobs in the cell_list into a single partition, |
260 | | // otherwise put each one into its own partition. |
261 | | void MakePartitionsFromCellList(PageSegMode pageseg_mode, bool combine, |
262 | | ColPartitionGrid *part_grid, BLOBNBOX_CLIST *cell_list); |
263 | | |
264 | | // Helper function to finish setting up a ColPartition and insert into |
265 | | // part_grid. |
266 | | void CompletePartition(PageSegMode pageseg_mode, ColPartition *part, ColPartitionGrid *part_grid); |
267 | | |
268 | | // Helper returns true if we are looking only for vertical textlines, |
269 | | // taking into account any rotation that has been done. |
270 | 0 | bool FindingVerticalOnly(PageSegMode pageseg_mode) const { |
271 | 0 | if (rerotation_.y() == 0.0f) { |
272 | 0 | return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; |
273 | 0 | } |
274 | 0 | return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; |
275 | 0 | } |
276 | | // Helper returns true if we are looking only for horizontal textlines, |
277 | | // taking into account any rotation that has been done. |
278 | 0 | bool FindingHorizontalOnly(PageSegMode pageseg_mode) const { |
279 | 0 | if (rerotation_.y() == 0.0f) { |
280 | 0 | return !PSM_ORIENTATION_ENABLED(pageseg_mode) && pageseg_mode != PSM_SINGLE_BLOCK_VERT_TEXT; |
281 | 0 | } |
282 | 0 | return pageseg_mode == PSM_SINGLE_BLOCK_VERT_TEXT; |
283 | 0 | } |
284 | | |
285 | | // Merge partitions where the merge appears harmless. |
286 | | void EasyMerges(ColPartitionGrid *part_grid); |
287 | | |
288 | | // Compute a search box based on the orientation of the partition. |
289 | | // Returns true if a suitable box can be calculated. |
290 | | // Callback for EasyMerges. |
291 | | bool OrientationSearchBox(ColPartition *part, TBOX *box); |
292 | | |
293 | | // Merge confirmation callback for EasyMerges. |
294 | | bool ConfirmEasyMerge(const ColPartition *p1, const ColPartition *p2); |
295 | | |
296 | | // Returns true if there is no significant noise in between the boxes. |
297 | | bool NoNoiseInBetween(const TBOX &box1, const TBOX &box2) const; |
298 | | |
299 | | #ifndef GRAPHICS_DISABLED |
300 | | // Displays the blobs colored according to the number of good neighbours |
301 | | // and the vertical/horizontal flow. |
302 | | ScrollView *DisplayGoodBlobs(const char *window_name, int x, int y); |
303 | | |
304 | | // Displays blobs colored according to whether or not they are diacritics. |
305 | | ScrollView *DisplayDiacritics(const char *window_name, int x, int y, TO_BLOCK *block); |
306 | | #endif |
307 | | |
308 | | private: |
309 | | // Image map of photo/noise areas on the page. Borrowed pointer (not owned.) |
310 | | Image nontext_map_; |
311 | | // Textline projection map. Borrowed pointer. |
312 | | TextlineProjection *projection_; |
313 | | // DENORM used by projection_ to get back to image coords. Borrowed pointer. |
314 | | const DENORM *denorm_; |
315 | | // Bounding box of the grid. |
316 | | TBOX grid_box_; |
317 | | // Rerotation to get back to the original image. |
318 | | FCOORD rerotation_; |
319 | | #ifndef GRAPHICS_DISABLED |
320 | | // Windows for debug display. |
321 | | ScrollView *leaders_win_ = nullptr; |
322 | | ScrollView *initial_widths_win_ = nullptr; |
323 | | ScrollView *widths_win_ = nullptr; |
324 | | ScrollView *chains_win_ = nullptr; |
325 | | ScrollView *diacritics_win_ = nullptr; |
326 | | ScrollView *textlines_win_ = nullptr; |
327 | | ScrollView *smoothed_win_ = nullptr; |
328 | | #endif |
329 | | }; |
330 | | |
331 | | } // namespace tesseract. |
332 | | |
333 | | #endif // TESSERACT_TEXTORD_STROKEWIDTH_H_ |