/src/tesseract/src/textord/colfind.h
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: colfind.h |
3 | | // Description: Class to find columns in the grid of BLOBNBOXes. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2008, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifndef TESSERACT_TEXTORD_COLFIND_H_ |
20 | | #define TESSERACT_TEXTORD_COLFIND_H_ |
21 | | |
22 | | #include "colpartitiongrid.h" |
23 | | #include "colpartitionset.h" |
24 | | #include "debugpixa.h" |
25 | | #include "imagefind.h" |
26 | | #include "ocrblock.h" |
27 | | #include "tabfind.h" |
28 | | #include "textlineprojection.h" |
29 | | |
30 | | class BLOCK_LIST; |
31 | | struct Boxa; |
32 | | struct Pixa; |
33 | | class DENORM; |
34 | | class ScrollView; |
35 | | class STATS; |
36 | | class TO_BLOCK; |
37 | | |
38 | | namespace tesseract { |
39 | | |
40 | | class ColPartitionSet; |
41 | | class ColPartitionSet_LIST; |
42 | | class ColSegment_LIST; |
43 | | class ColumnGroup_LIST; |
44 | | class LineSpacing; |
45 | | class StrokeWidth; |
46 | | class TempColumn_LIST; |
47 | | class EquationDetectBase; |
48 | | |
49 | | // The ColumnFinder class finds columns in the grid. |
50 | | class TESS_API ColumnFinder : public TabFind { |
51 | | public: |
52 | | // Gridsize is an estimate of the text size in the image. A suitable value |
53 | | // is in TO_BLOCK::line_size after find_components has been used to make |
54 | | // the blobs. |
55 | | // bleft and tright are the bounds of the image (rectangle) being processed. |
56 | | // vlines is a (possibly empty) list of TabVector and vertical_x and y are |
57 | | // the sum logical vertical vector produced by LineFinder::FindVerticalLines. |
58 | | // If cjk_script is true, then broken CJK characters are fixed during |
59 | | // layout analysis to assist in detecting horizontal vs vertically written |
60 | | // textlines. |
61 | | ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, |
62 | | bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, |
63 | | TabVector_LIST *hlines, int vertical_x, int vertical_y); |
64 | | ~ColumnFinder() override; |
65 | | |
66 | | // Accessors for testing |
67 | 0 | const DENORM *denorm() const { |
68 | 0 | return denorm_; |
69 | 0 | } |
70 | 0 | const TextlineProjection *projection() const { |
71 | 0 | return &projection_; |
72 | 0 | } |
73 | 0 | void set_cjk_script(bool is_cjk) { |
74 | 0 | cjk_script_ = is_cjk; |
75 | 0 | } |
76 | | |
77 | | // ====================================================================== |
78 | | // The main function of ColumnFinder is broken into pieces to facilitate |
79 | | // optional insertion of orientation and script detection in an efficient |
80 | | // way. The calling sequence IS MANDATORY however, whether or not |
81 | | // OSD is being used: |
82 | | // 1. Construction. |
83 | | // 2. SetupAndFilterNoise. |
84 | | // 3. IsVerticallyAlignedText. |
85 | | // 4. CorrectOrientation. |
86 | | // 5. FindBlocks. |
87 | | // 6. Destruction. Use of a single column finder for multiple images does not |
88 | | // make sense. |
89 | | // Throughout these steps, the ColPartitions are owned by part_grid_, which |
90 | | // means that it must be kept correct. Exception: big_parts_ owns its |
91 | | // own ColPartitions. |
92 | | // The BLOBNBOXes are owned by the input TO_BLOCK for the whole time, except |
93 | | // for a phase in FindBlocks before TransformToBlocks, when they become |
94 | | // owned by the ColPartitions. The owner() ColPartition of a BLOBNBOX |
95 | | // indicates more of a betrothal for the majority of layout analysis, ie |
96 | | // which ColPartition will take ownership when the blobs are release from |
97 | | // the input TO_BLOCK. Exception: image_bblobs_ owns the fake blobs that |
98 | | // are part of the image regions, as they are not on any TO_BLOCK list. |
99 | | // TODO(rays) break up column finder further into smaller classes, as |
100 | | // there is a lot more to it than column finding now. |
101 | | // ====================================================================== |
102 | | |
103 | | // Performs initial processing on the blobs in the input_block: |
104 | | // Setup the part_grid, stroke_width_, nontext_map_. |
105 | | // Obvious noise blobs are filtered out and used to mark the nontext_map_. |
106 | | // Initial stroke-width analysis is used to get local text alignment |
107 | | // direction, so the textline projection_ map can be setup. |
108 | | // On return, IsVerticallyAlignedText may be called (now optionally) to |
109 | | // determine the gross textline alignment of the page. |
110 | | void SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, TO_BLOCK *input_block); |
111 | | |
112 | | // Tests for vertical alignment of text (returning true if so), and generates |
113 | | // a list of blobs (in osd_blobs) for orientation and script detection. |
114 | | // block is the single block for the whole page or rectangle to be OCRed. |
115 | | // Note that the vertical alignment may be due to text whose writing direction |
116 | | // is vertical, like say Japanese, or due to text whose writing direction is |
117 | | // horizontal but whose text appears vertically aligned because the image is |
118 | | // not the right way up. |
119 | | // find_vertical_text_ratio should be textord_tabfind_vertical_text_ratio. |
120 | | bool IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, |
121 | | BLOBNBOX_CLIST *osd_blobs); |
122 | | |
123 | | // Rotates the blobs and the TabVectors so that the gross writing direction |
124 | | // (text lines) are horizontal and lines are read down the page. |
125 | | // Applied rotation stored in rotation_. |
126 | | // A second rotation is calculated for application during recognition to |
127 | | // make the rotated blobs upright for recognition. |
128 | | // Subsequent rotation stored in text_rotation_. |
129 | | // |
130 | | // Arguments: |
131 | | // vertical_text_lines is true if the text lines are vertical. |
132 | | // recognition_rotation [0..3] is the number of anti-clockwise 90 degree |
133 | | // rotations from osd required for the text to be upright and readable. |
134 | | void CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, int recognition_rotation); |
135 | | |
136 | | // Finds blocks of text, image, rule line, table etc, returning them in the |
137 | | // blocks and to_blocks |
138 | | // (Each TO_BLOCK points to the basic BLOCK and adds more information.) |
139 | | // Image blocks are generated by a combination of photo_mask_pix (which may |
140 | | // NOT be nullptr) and the rejected text found during preliminary textline |
141 | | // finding. |
142 | | // The input_block is the result of a call to find_components, and contains |
143 | | // the blobs found in the image or rectangle to be OCRed. These blobs will be |
144 | | // removed and placed in the output blocks, while unused ones will be deleted. |
145 | | // If single_column is true, the input is treated as single column, but |
146 | | // it is still divided into blocks of equal line spacing/text size. |
147 | | // scaled_color is scaled down by scaled_factor from the input color image, |
148 | | // and may be nullptr if the input was not color. |
149 | | // grey_pix is optional, but if present must match the photo_mask_pix in size, |
150 | | // and must be a *real* grey image instead of binary_pix * 255. |
151 | | // thresholds_pix is expected to be present iff grey_pix is present and |
152 | | // can be an integer factor reduction of the grey_pix. It represents the |
153 | | // thresholds that were used to create the binary_pix from the grey_pix. |
154 | | // Small blobs that confuse the segmentation into lines are placed into |
155 | | // diacritic_blobs, with the intention that they be put into the most |
156 | | // appropriate word after the rest of layout analysis. |
157 | | // Returns -1 if the user hits the 'd' key in the blocks window while running |
158 | | // in debug mode, which requests a retry with more debug info. |
159 | | int FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, TO_BLOCK *block, |
160 | | Image photo_mask_pix, Image thresholds_pix, Image grey_pix, DebugPixa *pixa_debug, |
161 | | BLOCK_LIST *blocks, BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks); |
162 | | |
163 | | // Get the rotation required to deskew, and its inverse rotation. |
164 | | void GetDeskewVectors(FCOORD *deskew, FCOORD *reskew); |
165 | | |
166 | | // Set the equation detection pointer. |
167 | | void SetEquationDetect(EquationDetectBase *detect); |
168 | | |
169 | | private: |
170 | | // Displays the blob and block bounding boxes in a window called Blocks. |
171 | | void DisplayBlocks(BLOCK_LIST *blocks); |
172 | | // Displays the column edges at each grid y coordinate defined by |
173 | | // best_columns_. |
174 | | void DisplayColumnBounds(PartSetVector *sets); |
175 | | |
176 | | ////// Functions involved in determining the columns used on the page. ///// |
177 | | |
178 | | // Sets up column_sets_ (the determined column layout at each horizontal |
179 | | // slice). Returns false if the page is empty. |
180 | | bool MakeColumns(bool single_column); |
181 | | // Attempt to improve the column_candidates by expanding the columns |
182 | | // and adding new partitions from the partition sets in src_sets. |
183 | | // Src_sets may be equal to column_candidates, in which case it will |
184 | | // use them as a source to improve themselves. |
185 | | void ImproveColumnCandidates(PartSetVector *src_sets, PartSetVector *column_sets); |
186 | | // Prints debug information on the column candidates. |
187 | | void PrintColumnCandidates(const char *title); |
188 | | // Finds the optimal set of columns that cover the entire image with as |
189 | | // few changes in column partition as possible. |
190 | | // Returns true if any part of the page is multi-column. |
191 | | bool AssignColumns(const PartSetVector &part_sets); |
192 | | // Finds the biggest range in part_sets_ that has no assigned column, but |
193 | | // column assignment is possible. |
194 | | bool BiggestUnassignedRange(int set_count, const bool *any_columns_possible, int *start, |
195 | | int *end); |
196 | | // Finds the modal compatible column_set_ index within the given range. |
197 | | int RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start, int end); |
198 | | // Given that there are many column_set_id compatible columns in the range, |
199 | | // shrinks the range to the longest contiguous run of compatibility, allowing |
200 | | // gaps where no columns are possible, but not where competing columns are |
201 | | // possible. |
202 | | void ShrinkRangeToLongestRun(int **column_set_costs, const int *assigned_costs, |
203 | | const bool *any_columns_possible, int column_set_id, int *best_start, |
204 | | int *best_end); |
205 | | // Moves start in the direction of step, up to, but not including end while |
206 | | // the only incompatible regions are no more than kMaxIncompatibleColumnCount |
207 | | // in size, and the compatible regions beyond are bigger. |
208 | | void ExtendRangePastSmallGaps(int **column_set_costs, const int *assigned_costs, |
209 | | const bool *any_columns_possible, int column_set_id, int step, |
210 | | int end, int *start); |
211 | | // Assigns the given column_set_id to the part_sets_ in the given range. |
212 | | void AssignColumnToRange(int column_set_id, int start, int end, int **column_set_costs, |
213 | | int *assigned_costs); |
214 | | |
215 | | // Computes the mean_column_gap_. |
216 | | void ComputeMeanColumnGap(bool any_multi_column); |
217 | | |
218 | | //////// Functions that manipulate ColPartitions in the part_grid_ ///// |
219 | | //////// to split, merge, find margins, and find types. ////////////// |
220 | | |
221 | | // Hoovers up all un-owned blobs and deletes them. |
222 | | // The rest get released from the block so the ColPartitions can pass |
223 | | // ownership to the output blocks. |
224 | | void ReleaseBlobsAndCleanupUnused(TO_BLOCK *block); |
225 | | // Splits partitions that cross columns where they have nothing in the gap. |
226 | | void GridSplitPartitions(); |
227 | | // Merges partitions where there is vertical overlap, within a single column, |
228 | | // and the horizontal gap is small enough. |
229 | | void GridMergePartitions(); |
230 | | // Inserts remaining noise blobs into the most applicable partition if any. |
231 | | // If there is no applicable partition, then the blobs are deleted. |
232 | | void InsertRemainingNoise(TO_BLOCK *block); |
233 | | // Remove partitions that come from horizontal lines that look like |
234 | | // underlines, but are not part of a table. |
235 | | void GridRemoveUnderlinePartitions(); |
236 | | // Add horizontal line separators as partitions. |
237 | | void GridInsertHLinePartitions(); |
238 | | // Add vertical line separators as partitions. |
239 | | void GridInsertVLinePartitions(); |
240 | | // For every ColPartition in the grid, sets its type based on position |
241 | | // in the columns. |
242 | | void SetPartitionTypes(); |
243 | | // Only images remain with multiple types in a run of partners. |
244 | | // Sets the type of all in the group to the maximum of the group. |
245 | | void SmoothPartnerRuns(); |
246 | | |
247 | | //////// Functions that make the final output blocks /////// |
248 | | |
249 | | // Helper functions for TransformToBlocks. |
250 | | // Add the part to the temp list in the correct order. |
251 | | void AddToTempPartList(ColPartition *part, ColPartition_CLIST *temp_list); |
252 | | // Add everything from the temp list to the work_set assuming correct order. |
253 | | void EmptyTempPartList(ColPartition_CLIST *temp_list, WorkingPartSet_LIST *work_set); |
254 | | |
255 | | // Transform the grid of partitions to the output blocks. |
256 | | void TransformToBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks); |
257 | | |
258 | | // Reflect the blob boxes (but not the outlines) in the y-axis so that |
259 | | // the blocks get created in the correct RTL order. Rotates the blobs |
260 | | // in the input_block and the bblobs list. |
261 | | // The reflection is undone in RotateAndReskewBlocks by |
262 | | // reflecting the blocks themselves, and then recomputing the blob bounding |
263 | | // boxes. |
264 | | void ReflectForRtl(TO_BLOCK *input_block, BLOBNBOX_LIST *bblobs); |
265 | | |
266 | | // Undo the deskew that was done in FindTabVectors, as recognition is done |
267 | | // without correcting blobs or blob outlines for skew. |
268 | | // Reskew the completed blocks to put them back to the original rotated coords |
269 | | // that were created by CorrectOrientation. |
270 | | // If the input_is_rtl, then reflect the blocks in the y-axis to undo the |
271 | | // reflection that was done before FindTabVectors. |
272 | | // Blocks that were identified as vertical text (relative to the rotated |
273 | | // coordinates) are further rotated so the text lines are horizontal. |
274 | | // blob polygonal outlines are rotated to match the position of the blocks |
275 | | // that they are in, and their bounding boxes are recalculated to be accurate. |
276 | | // Record appropriate inverse transformations and required |
277 | | // classifier transformation in the blocks. |
278 | | void RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *to_blocks); |
279 | | |
280 | | // Computes the rotations for the block (to make textlines horizontal) and |
281 | | // for the blobs (for classification) and sets the appropriate members |
282 | | // of the given block. |
283 | | // Returns the rotation that needs to be applied to the blobs to make |
284 | | // them sit in the rotated block. |
285 | | FCOORD ComputeBlockAndClassifyRotation(BLOCK *block); |
286 | | |
287 | | // If true then the page language is cjk, so it is safe to perform |
288 | | // FixBrokenCJK. |
289 | | bool cjk_script_; |
290 | | // The minimum gutter width to apply for finding columns. |
291 | | // Modified when vertical text is detected to prevent detection of |
292 | | // vertical text lines as columns. |
293 | | int min_gutter_width_; |
294 | | // The mean gap between columns over the page. |
295 | | int mean_column_gap_; |
296 | | // Config param saved at construction time. Modifies min_gutter_width_ with |
297 | | // vertical text to prevent detection of vertical text as columns. |
298 | | double tabfind_aligned_gap_fraction_; |
299 | | // The rotation vector needed to convert original coords to deskewed. |
300 | | FCOORD deskew_; |
301 | | // The rotation vector needed to convert deskewed back to original coords. |
302 | | FCOORD reskew_; |
303 | | // The rotation vector used to rotate vertically oriented pages. |
304 | | FCOORD rotation_; |
305 | | // The rotation vector needed to convert the rotated back to original coords. |
306 | | FCOORD rerotate_; |
307 | | // The additional rotation vector needed to rotate text for recognition. |
308 | | FCOORD text_rotation_; |
309 | | // The column_sets_ contain the ordered candidate ColPartitionSets that |
310 | | // define the possible divisions of the page into columns. |
311 | | PartSetVector column_sets_; |
312 | | // A simple array of pointers to the best assigned column division at |
313 | | // each grid y coordinate. |
314 | | ColPartitionSet **best_columns_; |
315 | | // The grid used for creating initial partitions with strokewidth. |
316 | | StrokeWidth *stroke_width_; |
317 | | // The grid used to hold ColPartitions after the columns have been determined. |
318 | | ColPartitionGrid part_grid_; |
319 | | // List of ColPartitions that are no longer needed after they have been |
320 | | // turned into regions, but are kept around because they are referenced |
321 | | // by the part_grid_. |
322 | | ColPartition_LIST good_parts_; |
323 | | // List of ColPartitions that are big and might be dropcap or vertically |
324 | | // joined. |
325 | | ColPartition_LIST big_parts_; |
326 | | // List of ColPartitions that have been declared noise. |
327 | | ColPartition_LIST noise_parts_; |
328 | | // The fake blobs that are made from the images. |
329 | | BLOBNBOX_LIST image_bblobs_; |
330 | | // Horizontal line separators. |
331 | | TabVector_LIST horizontal_lines_; |
332 | | // Image map of photo/noise areas on the page. |
333 | | Image nontext_map_; |
334 | | // Textline projection map. |
335 | | TextlineProjection projection_; |
336 | | // Sequence of DENORMS that indicate how to get back to the original image |
337 | | // coordinate space. The destructor must delete all the DENORMs in the chain. |
338 | | DENORM *denorm_; |
339 | | |
340 | | // The equation region detector pointer. Note: This pointer is passed in by |
341 | | // member function SetEquationDetect, and releasing it is NOT owned by this |
342 | | // class. |
343 | | EquationDetectBase *equation_detect_; |
344 | | |
345 | | #ifndef GRAPHICS_DISABLED |
346 | | // Various debug windows that automatically go away on completion. |
347 | | ScrollView *input_blobs_win_ = nullptr; |
348 | | |
349 | | // Allow a subsequent instance to reuse the blocks window. |
350 | | // Not thread-safe, but multiple threads shouldn't be using windows anyway. |
351 | | static ScrollView *blocks_win_; |
352 | | #endif |
353 | | }; |
354 | | |
355 | | } // namespace tesseract. |
356 | | |
357 | | #endif // TESSERACT_TEXTORD_COLFIND_H_ |