/src/tesseract/src/textord/colfind.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: colfind.cpp |
3 | | // Description: Class to hold BLOBNBOXs in a grid for fast access |
4 | | // to neighbours. |
5 | | // Author: Ray Smith |
6 | | // |
7 | | // (C) Copyright 2007, Google Inc. |
8 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
9 | | // you may not use this file except in compliance with the License. |
10 | | // You may obtain a copy of the License at |
11 | | // http://www.apache.org/licenses/LICENSE-2.0 |
12 | | // Unless required by applicable law or agreed to in writing, software |
13 | | // distributed under the License is distributed on an "AS IS" BASIS, |
14 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
15 | | // See the License for the specific language governing permissions and |
16 | | // limitations under the License. |
17 | | // |
18 | | /////////////////////////////////////////////////////////////////////// |
19 | | |
20 | | // Include automatically generated configuration file if running autoconf. |
21 | | #ifdef HAVE_CONFIG_H |
22 | | # include "config_auto.h" |
23 | | #endif |
24 | | |
25 | | #include "colfind.h" |
26 | | |
27 | | #include "ccnontextdetect.h" |
28 | | #include "colpartition.h" |
29 | | #include "colpartitionset.h" |
30 | | #ifndef DISABLED_LEGACY_ENGINE |
31 | | # include "equationdetectbase.h" |
32 | | #endif |
33 | | #include "blobbox.h" |
34 | | #include "linefind.h" |
35 | | #include "normalis.h" |
36 | | #include "params.h" |
37 | | #include "scrollview.h" |
38 | | #include "strokewidth.h" |
39 | | #include "tablefind.h" |
40 | | #include "workingpartset.h" |
41 | | |
42 | | #include <algorithm> |
43 | | |
44 | | namespace tesseract { |
45 | | |
46 | | // When assigning columns, the max number of misfit grid rows/ColPartitionSets |
47 | | // that can be ignored. |
48 | | const int kMaxIncompatibleColumnCount = 2; |
49 | | // Max fraction of mean_column_gap_ for the gap between two partitions within a |
50 | | // column to allow them to merge. |
51 | | const double kHorizontalGapMergeFraction = 0.5; |
52 | | // Minimum gutter width as a fraction of gridsize |
53 | | const double kMinGutterWidthGrid = 0.5; |
54 | | // Max multiple of a partition's median size as a distance threshold for |
55 | | // adding noise blobs. |
56 | | const double kMaxDistToPartSizeRatio = 1.5; |
57 | | |
58 | | #ifndef GRAPHICS_DISABLED |
59 | | static BOOL_VAR(textord_tabfind_show_initial_partitions, false, "Show partition bounds"); |
60 | | static BOOL_VAR(textord_tabfind_show_reject_blobs, false, "Show blobs rejected as noise"); |
61 | | static INT_VAR(textord_tabfind_show_partitions, 0, |
62 | | "Show partition bounds, waiting if >1 (ScrollView)"); |
63 | | static BOOL_VAR(textord_tabfind_show_columns, false, "Show column bounds (ScrollView)"); |
64 | | static BOOL_VAR(textord_tabfind_show_blocks, false, "Show final block bounds (ScrollView)"); |
65 | | #endif |
66 | | static BOOL_VAR(textord_tabfind_find_tables, true, "run table detection"); |
67 | | |
68 | | #ifndef GRAPHICS_DISABLED |
69 | | ScrollView *ColumnFinder::blocks_win_ = nullptr; |
70 | | #endif |
71 | | |
72 | | // Gridsize is an estimate of the text size in the image. A suitable value |
73 | | // is in TO_BLOCK::line_size after find_components has been used to make |
74 | | // the blobs. |
75 | | // bleft and tright are the bounds of the image (or rectangle) being processed. |
76 | | // vlines is a (possibly empty) list of TabVector and vertical_x and y are |
77 | | // the sum logical vertical vector produced by LineFinder::FindVerticalLines. |
78 | | ColumnFinder::ColumnFinder(int gridsize, const ICOORD &bleft, const ICOORD &tright, int resolution, |
79 | | bool cjk_script, double aligned_gap_fraction, TabVector_LIST *vlines, |
80 | | TabVector_LIST *hlines, int vertical_x, int vertical_y) |
81 | 0 | : TabFind(gridsize, bleft, tright, vlines, vertical_x, vertical_y, resolution) |
82 | 0 | , cjk_script_(cjk_script) |
83 | 0 | , min_gutter_width_(static_cast<int>(kMinGutterWidthGrid * gridsize)) |
84 | 0 | , mean_column_gap_(tright.x() - bleft.x()) |
85 | 0 | , tabfind_aligned_gap_fraction_(aligned_gap_fraction) |
86 | 0 | , deskew_(0.0f, 0.0f) |
87 | 0 | , reskew_(1.0f, 0.0f) |
88 | 0 | , rotation_(1.0f, 0.0f) |
89 | 0 | , rerotate_(1.0f, 0.0f) |
90 | 0 | , text_rotation_(0.0f, 0.0f) |
91 | 0 | , best_columns_(nullptr) |
92 | 0 | , stroke_width_(nullptr) |
93 | 0 | , part_grid_(gridsize, bleft, tright) |
94 | 0 | , nontext_map_(nullptr) |
95 | 0 | , projection_(resolution) |
96 | 0 | , denorm_(nullptr) |
97 | 0 | , equation_detect_(nullptr) { |
98 | 0 | TabVector_IT h_it(&horizontal_lines_); |
99 | 0 | h_it.add_list_after(hlines); |
100 | 0 | } |
101 | | |
102 | 0 | ColumnFinder::~ColumnFinder() { |
103 | 0 | for (auto set : column_sets_) { |
104 | 0 | delete set; |
105 | 0 | } |
106 | 0 | delete[] best_columns_; |
107 | 0 | delete stroke_width_; |
108 | | #ifndef GRAPHICS_DISABLED |
109 | | delete input_blobs_win_; |
110 | | #endif |
111 | 0 | nontext_map_.destroy(); |
112 | 0 | while (denorm_ != nullptr) { |
113 | 0 | DENORM *dead_denorm = denorm_; |
114 | 0 | denorm_ = const_cast<DENORM *>(denorm_->predecessor()); |
115 | 0 | delete dead_denorm; |
116 | 0 | } |
117 | | |
118 | | // The ColPartitions are destroyed automatically, but any boxes in |
119 | | // the noise_parts_ list are owned and need to be deleted explicitly. |
120 | 0 | ColPartition_IT part_it(&noise_parts_); |
121 | 0 | for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { |
122 | 0 | ColPartition *part = part_it.data(); |
123 | 0 | part->DeleteBoxes(); |
124 | 0 | } |
125 | | // Likewise any boxes in the good_parts_ list need to be deleted. |
126 | | // These are just the image parts. Text parts have already given their |
127 | | // boxes on to the TO_BLOCK, and have empty lists. |
128 | 0 | part_it.set_to_list(&good_parts_); |
129 | 0 | for (part_it.mark_cycle_pt(); !part_it.cycled_list(); part_it.forward()) { |
130 | 0 | ColPartition *part = part_it.data(); |
131 | 0 | part->DeleteBoxes(); |
132 | 0 | } |
133 | | // Also, any blobs on the image_bblobs_ list need to have their cblobs |
134 | | // deleted. This only happens if there has been an early return from |
135 | | // FindColumns, as in a normal return, the blobs go into the grid and |
136 | | // end up in noise_parts_, good_parts_ or the output blocks. |
137 | 0 | BLOBNBOX_IT bb_it(&image_bblobs_); |
138 | 0 | for (bb_it.mark_cycle_pt(); !bb_it.cycled_list(); bb_it.forward()) { |
139 | 0 | BLOBNBOX *bblob = bb_it.data(); |
140 | 0 | delete bblob->cblob(); |
141 | 0 | } |
142 | 0 | } |
143 | | |
144 | | // Performs initial processing on the blobs in the input_block: |
145 | | // Setup the part_grid_, stroke_width_, nontext_map. |
146 | | // Obvious noise blobs are filtered out and used to mark the nontext_map_. |
147 | | // Initial stroke-width analysis is used to get local text alignment |
148 | | // direction, so the textline projection_ map can be setup. |
149 | | // On return, IsVerticallyAlignedText may be called (now optionally) to |
150 | | // determine the gross textline alignment of the page. |
151 | | void ColumnFinder::SetupAndFilterNoise(PageSegMode pageseg_mode, Image photo_mask_pix, |
152 | 0 | TO_BLOCK *input_block) { |
153 | 0 | part_grid_.Init(gridsize(), bleft(), tright()); |
154 | 0 | delete stroke_width_; |
155 | 0 | stroke_width_ = new StrokeWidth(gridsize(), bleft(), tright()); |
156 | 0 | min_gutter_width_ = static_cast<int>(kMinGutterWidthGrid * gridsize()); |
157 | 0 | input_block->ReSetAndReFilterBlobs(); |
158 | | #ifndef GRAPHICS_DISABLED |
159 | | if (textord_tabfind_show_blocks) { |
160 | | input_blobs_win_ = MakeWindow(0, 0, "Filtered Input Blobs"); |
161 | | input_block->plot_graded_blobs(input_blobs_win_); |
162 | | } |
163 | | #endif // !GRAPHICS_DISABLED |
164 | 0 | SetBlockRuleEdges(input_block); |
165 | 0 | nontext_map_.destroy(); |
166 | | // Run a preliminary strokewidth neighbour detection on the medium blobs. |
167 | 0 | stroke_width_->SetNeighboursOnMediumBlobs(input_block); |
168 | 0 | CCNonTextDetect nontext_detect(gridsize(), bleft(), tright()); |
169 | | // Remove obvious noise and make the initial non-text map. |
170 | 0 | nontext_map_ = |
171 | 0 | nontext_detect.ComputeNonTextMask(textord_debug_tabfind, photo_mask_pix, input_block); |
172 | 0 | stroke_width_->FindTextlineDirectionAndFixBrokenCJK(pageseg_mode, cjk_script_, input_block); |
173 | | // Clear the strokewidth grid ready for rotation or leader finding. |
174 | 0 | stroke_width_->Clear(); |
175 | 0 | } |
176 | | |
177 | | // Tests for vertical alignment of text (returning true if so), and generates |
178 | | // a list of blobs of moderate aspect ratio, in the most frequent writing |
179 | | // direction (in osd_blobs) for orientation and script detection to test |
180 | | // the character orientation. |
181 | | // block is the single block for the whole page or rectangle to be OCRed. |
182 | | // Note that the vertical alignment may be due to text whose writing direction |
183 | | // is vertical, like say Japanese, or due to text whose writing direction is |
184 | | // horizontal but whose text appears vertically aligned because the image is |
185 | | // not the right way up. |
186 | | bool ColumnFinder::IsVerticallyAlignedText(double find_vertical_text_ratio, TO_BLOCK *block, |
187 | 0 | BLOBNBOX_CLIST *osd_blobs) { |
188 | 0 | return stroke_width_->TestVerticalTextDirection(find_vertical_text_ratio, block, osd_blobs); |
189 | 0 | } |
190 | | |
191 | | // Rotates the blobs and the TabVectors so that the gross writing direction |
192 | | // (text lines) are horizontal and lines are read down the page. |
193 | | // Applied rotation stored in rotation_. |
194 | | // A second rotation is calculated for application during recognition to |
195 | | // make the rotated blobs upright for recognition. |
196 | | // Subsequent rotation stored in text_rotation_. |
197 | | // |
198 | | // Arguments: |
199 | | // vertical_text_lines true if the text lines are vertical. |
200 | | // recognition_rotation [0..3] is the number of anti-clockwise 90 degree |
201 | | // rotations from osd required for the text to be upright and readable. |
202 | | void ColumnFinder::CorrectOrientation(TO_BLOCK *block, bool vertical_text_lines, |
203 | 0 | int recognition_rotation) { |
204 | 0 | const FCOORD anticlockwise90(0.0f, 1.0f); |
205 | 0 | const FCOORD clockwise90(0.0f, -1.0f); |
206 | 0 | const FCOORD rotation180(-1.0f, 0.0f); |
207 | 0 | const FCOORD norotation(1.0f, 0.0f); |
208 | |
|
209 | 0 | text_rotation_ = norotation; |
210 | | // Rotate the page to make the text upright, as implied by |
211 | | // recognition_rotation. |
212 | 0 | rotation_ = norotation; |
213 | 0 | if (recognition_rotation == 1) { |
214 | 0 | rotation_ = anticlockwise90; |
215 | 0 | } else if (recognition_rotation == 2) { |
216 | 0 | rotation_ = rotation180; |
217 | 0 | } else if (recognition_rotation == 3) { |
218 | 0 | rotation_ = clockwise90; |
219 | 0 | } |
220 | | // We infer text writing direction to be vertical if there are several |
221 | | // vertical text lines detected, and horizontal if not. But if the page |
222 | | // orientation was determined to be 90 or 270 degrees, the true writing |
223 | | // direction is the opposite of what we inferred. |
224 | 0 | if (recognition_rotation & 1) { |
225 | 0 | vertical_text_lines = !vertical_text_lines; |
226 | 0 | } |
227 | | // If we still believe the writing direction is vertical, we use the |
228 | | // convention of rotating the page ccw 90 degrees to make the text lines |
229 | | // horizontal, and mark the blobs for rotation cw 90 degrees for |
230 | | // classification so that the text order is correct after recognition. |
231 | 0 | if (vertical_text_lines) { |
232 | 0 | rotation_.rotate(anticlockwise90); |
233 | 0 | text_rotation_.rotate(clockwise90); |
234 | 0 | } |
235 | | // Set rerotate_ to the inverse of rotation_. |
236 | 0 | rerotate_ = FCOORD(rotation_.x(), -rotation_.y()); |
237 | 0 | if (rotation_.x() != 1.0f || rotation_.y() != 0.0f) { |
238 | | // Rotate all the blobs and tab vectors. |
239 | 0 | RotateBlobList(rotation_, &block->large_blobs); |
240 | 0 | RotateBlobList(rotation_, &block->blobs); |
241 | 0 | RotateBlobList(rotation_, &block->small_blobs); |
242 | 0 | RotateBlobList(rotation_, &block->noise_blobs); |
243 | 0 | TabFind::ResetForVerticalText(rotation_, rerotate_, &horizontal_lines_, &min_gutter_width_); |
244 | 0 | part_grid_.Init(gridsize(), bleft(), tright()); |
245 | | // Reset all blobs to initial state and filter by size. |
246 | | // Since they have rotated, the list they belong on could have changed. |
247 | 0 | block->ReSetAndReFilterBlobs(); |
248 | 0 | SetBlockRuleEdges(block); |
249 | 0 | stroke_width_->CorrectForRotation(rerotate_, &part_grid_); |
250 | 0 | } |
251 | 0 | if (textord_debug_tabfind) { |
252 | 0 | tprintf("Vertical=%d, orientation=%d, final rotation=(%f, %f)+(%f,%f)\n", vertical_text_lines, |
253 | 0 | recognition_rotation, rotation_.x(), rotation_.y(), text_rotation_.x(), |
254 | 0 | text_rotation_.y()); |
255 | 0 | } |
256 | | // Setup the denormalization. |
257 | 0 | ASSERT_HOST(denorm_ == nullptr); |
258 | 0 | denorm_ = new DENORM; |
259 | 0 | denorm_->SetupNormalization(nullptr, &rotation_, nullptr, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 0.0f); |
260 | 0 | } |
261 | | |
262 | | // Finds blocks of text, image, rule line, table etc, returning them in the |
263 | | // blocks and to_blocks |
264 | | // (Each TO_BLOCK points to the basic BLOCK and adds more information.) |
265 | | // Image blocks are generated by a combination of photo_mask_pix (which may |
266 | | // NOT be nullptr) and the rejected text found during preliminary textline |
267 | | // finding. |
268 | | // The input_block is the result of a call to find_components, and contains |
269 | | // the blobs found in the image or rectangle to be OCRed. These blobs will be |
270 | | // removed and placed in the output blocks, while unused ones will be deleted. |
271 | | // If single_column is true, the input is treated as single column, but |
272 | | // it is still divided into blocks of equal line spacing/text size. |
273 | | // scaled_color is scaled down by scaled_factor from the input color image, |
274 | | // and may be nullptr if the input was not color. |
275 | | // grey_pix is optional, but if present must match the photo_mask_pix in size, |
276 | | // and must be a *real* grey image instead of binary_pix * 255. |
277 | | // thresholds_pix is expected to be present iff grey_pix is present and |
278 | | // can be an integer factor reduction of the grey_pix. It represents the |
279 | | // thresholds that were used to create the binary_pix from the grey_pix. |
280 | | // If diacritic_blobs is non-null, then diacritics/noise blobs, that would |
281 | | // confuse layout analysis by causing textline overlap, are placed there, |
282 | | // with the expectation that they will be reassigned to words later and |
283 | | // noise/diacriticness determined via classification. |
284 | | // Returns -1 if the user hits the 'd' key in the blocks window while running |
285 | | // in debug mode, which requests a retry with more debug info. |
286 | | int ColumnFinder::FindBlocks(PageSegMode pageseg_mode, Image scaled_color, int scaled_factor, |
287 | | TO_BLOCK *input_block, Image photo_mask_pix, Image thresholds_pix, |
288 | | Image grey_pix, DebugPixa *pixa_debug, BLOCK_LIST *blocks, |
289 | 0 | BLOBNBOX_LIST *diacritic_blobs, TO_BLOCK_LIST *to_blocks) { |
290 | 0 | photo_mask_pix |= nontext_map_; |
291 | 0 | stroke_width_->FindLeaderPartitions(input_block, &part_grid_); |
292 | 0 | stroke_width_->RemoveLineResidue(&big_parts_); |
293 | 0 | FindInitialTabVectors(nullptr, min_gutter_width_, tabfind_aligned_gap_fraction_, input_block); |
294 | 0 | SetBlockRuleEdges(input_block); |
295 | 0 | stroke_width_->GradeBlobsIntoPartitions(pageseg_mode, rerotate_, input_block, nontext_map_, |
296 | 0 | denorm_, cjk_script_, &projection_, diacritic_blobs, |
297 | 0 | &part_grid_, &big_parts_); |
298 | 0 | if (!PSM_SPARSE(pageseg_mode)) { |
299 | 0 | ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, |
300 | 0 | pixa_debug, &part_grid_, &big_parts_); |
301 | 0 | ImageFind::TransferImagePartsToImageMask(rerotate_, &part_grid_, photo_mask_pix); |
302 | 0 | ImageFind::FindImagePartitions(photo_mask_pix, rotation_, rerotate_, input_block, this, |
303 | 0 | pixa_debug, &part_grid_, &big_parts_); |
304 | 0 | } |
305 | 0 | part_grid_.ReTypeBlobs(&image_bblobs_); |
306 | 0 | TidyBlobs(input_block); |
307 | 0 | Reset(); |
308 | | // TODO(rays) need to properly handle big_parts_. |
309 | 0 | ColPartition_IT p_it(&big_parts_); |
310 | 0 | for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { |
311 | 0 | p_it.data()->DisownBoxesNoAssert(); |
312 | 0 | } |
313 | 0 | big_parts_.clear(); |
314 | 0 | delete stroke_width_; |
315 | 0 | stroke_width_ = nullptr; |
316 | | // Compute the edge offsets whether or not there is a grey_pix. It is done |
317 | | // here as the c_blobs haven't been touched by rotation or anything yet, |
318 | | // so no denorm is required, yet the text has been separated from image, so |
319 | | // no time is wasted running it on image blobs. |
320 | 0 | input_block->ComputeEdgeOffsets(thresholds_pix, grey_pix); |
321 | | |
322 | | // A note about handling right-to-left scripts (Hebrew/Arabic): |
323 | | // The columns must be reversed and come out in right-to-left instead of |
324 | | // the normal left-to-right order. Because the left-to-right ordering |
325 | | // is implicit in many data structures, it is simpler to fool the algorithms |
326 | | // into thinking they are dealing with left-to-right text. |
327 | | // To do this, we reflect the needed data in the y-axis and then reflect |
328 | | // the blocks back after they have been created. This is a temporary |
329 | | // arrangement that is confined to this function only, so the reflection |
330 | | // is completely invisible in the output blocks. |
331 | | // The only objects reflected are: |
332 | | // The vertical separator lines that have already been found; |
333 | | // The bounding boxes of all BLOBNBOXES on all lists on the input_block |
334 | | // plus the image_bblobs. The outlines are not touched, since they are |
335 | | // not looked at. |
336 | 0 | bool input_is_rtl = input_block->block->right_to_left(); |
337 | 0 | if (input_is_rtl) { |
338 | | // Reflect the vertical separator lines (member of TabFind). |
339 | 0 | ReflectInYAxis(); |
340 | | // Reflect the blob boxes. |
341 | 0 | ReflectForRtl(input_block, &image_bblobs_); |
342 | 0 | part_grid_.ReflectInYAxis(); |
343 | 0 | } |
344 | |
|
345 | 0 | if (!PSM_SPARSE(pageseg_mode)) { |
346 | 0 | if (!PSM_COL_FIND_ENABLED(pageseg_mode)) { |
347 | | // No tab stops needed. Just the grid that FindTabVectors makes. |
348 | 0 | DontFindTabVectors(&image_bblobs_, input_block, &deskew_, &reskew_); |
349 | 0 | } else { |
350 | 0 | SetBlockRuleEdges(input_block); |
351 | | // Find the tab stops, estimate skew, and deskew the tabs, blobs and |
352 | | // part_grid_. |
353 | 0 | FindTabVectors(&horizontal_lines_, &image_bblobs_, input_block, min_gutter_width_, |
354 | 0 | tabfind_aligned_gap_fraction_, &part_grid_, &deskew_, &reskew_); |
355 | | // Add the deskew to the denorm_. |
356 | 0 | auto *new_denorm = new DENORM; |
357 | 0 | new_denorm->SetupNormalization(nullptr, &deskew_, denorm_, 0.0f, 0.0f, 1.0f, 1.0f, 0.0f, |
358 | 0 | 0.0f); |
359 | 0 | denorm_ = new_denorm; |
360 | 0 | } |
361 | 0 | SetBlockRuleEdges(input_block); |
362 | 0 | part_grid_.SetTabStops(this); |
363 | | |
364 | | // Make the column_sets_. |
365 | 0 | if (!MakeColumns(false)) { |
366 | 0 | tprintf("Empty page!!\n"); |
367 | 0 | part_grid_.DeleteParts(); |
368 | 0 | return 0; // This is an empty page. |
369 | 0 | } |
370 | | |
371 | | // Refill the grid using rectangular spreading, and get the benefit |
372 | | // of the completed tab vectors marking the rule edges of each blob. |
373 | 0 | Clear(); |
374 | | #ifndef GRAPHICS_DISABLED |
375 | | if (textord_tabfind_show_reject_blobs) { |
376 | | ScrollView *rej_win = MakeWindow(500, 300, "Rejected blobs"); |
377 | | input_block->plot_graded_blobs(rej_win); |
378 | | } |
379 | | #endif // !GRAPHICS_DISABLED |
380 | 0 | InsertBlobsToGrid(false, false, &image_bblobs_, this); |
381 | 0 | InsertBlobsToGrid(true, true, &input_block->blobs, this); |
382 | |
|
383 | 0 | part_grid_.GridFindMargins(best_columns_); |
384 | | // Split and merge the partitions by looking at local neighbours. |
385 | 0 | GridSplitPartitions(); |
386 | | // Resolve unknown partitions by adding to an existing partition, fixing |
387 | | // the type, or declaring them noise. |
388 | 0 | part_grid_.GridFindMargins(best_columns_); |
389 | 0 | GridMergePartitions(); |
390 | | // Insert any unused noise blobs that are close enough to an appropriate |
391 | | // partition. |
392 | 0 | InsertRemainingNoise(input_block); |
393 | | // Add horizontal line separators as partitions. |
394 | 0 | GridInsertHLinePartitions(); |
395 | 0 | GridInsertVLinePartitions(); |
396 | | // Recompute margins based on a local neighbourhood search. |
397 | 0 | part_grid_.GridFindMargins(best_columns_); |
398 | 0 | SetPartitionTypes(); |
399 | 0 | } |
400 | | #ifndef GRAPHICS_DISABLED |
401 | | if (textord_tabfind_show_initial_partitions) { |
402 | | ScrollView *part_win = MakeWindow(100, 300, "InitialPartitions"); |
403 | | part_grid_.DisplayBoxes(part_win); |
404 | | DisplayTabVectors(part_win); |
405 | | } |
406 | | #endif |
407 | 0 | if (!PSM_SPARSE(pageseg_mode)) { |
408 | 0 | #ifndef DISABLED_LEGACY_ENGINE |
409 | 0 | if (equation_detect_) { |
410 | 0 | equation_detect_->FindEquationParts(&part_grid_, best_columns_); |
411 | 0 | } |
412 | 0 | #endif |
413 | 0 | if (textord_tabfind_find_tables) { |
414 | 0 | TableFinder table_finder; |
415 | 0 | table_finder.Init(gridsize(), bleft(), tright()); |
416 | 0 | table_finder.set_resolution(resolution_); |
417 | 0 | table_finder.set_left_to_right_language(!input_block->block->right_to_left()); |
418 | | // Copy cleaned partitions from part_grid_ to clean_part_grid_ and |
419 | | // insert dot-like noise into period_grid_ |
420 | 0 | table_finder.InsertCleanPartitions(&part_grid_, input_block); |
421 | | // Get Table Regions |
422 | 0 | table_finder.LocateTables(&part_grid_, best_columns_, WidthCB(), reskew_); |
423 | 0 | } |
424 | 0 | GridRemoveUnderlinePartitions(); |
425 | 0 | part_grid_.DeleteUnknownParts(input_block); |
426 | | |
427 | | // Build the partitions into chains that belong in the same block and |
428 | | // refine into one-to-one links, then smooth the types within each chain. |
429 | 0 | part_grid_.FindPartitionPartners(); |
430 | 0 | part_grid_.FindFigureCaptions(); |
431 | 0 | part_grid_.RefinePartitionPartners(true); |
432 | 0 | SmoothPartnerRuns(); |
433 | |
|
434 | | #ifndef GRAPHICS_DISABLED |
435 | | if (textord_tabfind_show_partitions) { |
436 | | ScrollView *window = MakeWindow(400, 300, "Partitions"); |
437 | | if (window != nullptr) { |
438 | | part_grid_.DisplayBoxes(window); |
439 | | if (!textord_debug_printable) { |
440 | | DisplayTabVectors(window); |
441 | | } |
442 | | if (window != nullptr && textord_tabfind_show_partitions > 1) { |
443 | | window->AwaitEvent(SVET_DESTROY); |
444 | | } |
445 | | } |
446 | | } |
447 | | #endif // !GRAPHICS_DISABLED |
448 | 0 | part_grid_.AssertNoDuplicates(); |
449 | 0 | } |
450 | | // Ownership of the ColPartitions moves from part_sets_ to part_grid_ here, |
451 | | // and ownership of the BLOBNBOXes moves to the ColPartitions. |
452 | | // (They were previously owned by the block or the image_bblobs list.) |
453 | 0 | ReleaseBlobsAndCleanupUnused(input_block); |
454 | | // Ownership of the ColPartitions moves from part_grid_ to good_parts_ and |
455 | | // noise_parts_ here. In text blocks, ownership of the BLOBNBOXes moves |
456 | | // from the ColPartitions to the output TO_BLOCK. In non-text, the |
457 | | // BLOBNBOXes stay with the ColPartitions and get deleted in the destructor. |
458 | 0 | if (PSM_SPARSE(pageseg_mode)) { |
459 | 0 | part_grid_.ExtractPartitionsAsBlocks(blocks, to_blocks); |
460 | 0 | } else { |
461 | 0 | TransformToBlocks(blocks, to_blocks); |
462 | 0 | } |
463 | 0 | if (textord_debug_tabfind) { |
464 | 0 | tprintf("Found %d blocks, %d to_blocks\n", blocks->length(), to_blocks->length()); |
465 | 0 | } |
466 | |
|
467 | | #ifndef GRAPHICS_DISABLED |
468 | | if (textord_tabfind_show_blocks) { |
469 | | DisplayBlocks(blocks); |
470 | | } |
471 | | #endif |
472 | 0 | RotateAndReskewBlocks(input_is_rtl, to_blocks); |
473 | 0 | int result = 0; |
474 | | #ifndef GRAPHICS_DISABLED |
475 | | if (blocks_win_ != nullptr) { |
476 | | bool waiting = false; |
477 | | do { |
478 | | waiting = false; |
479 | | auto event = blocks_win_->AwaitEvent(SVET_ANY); |
480 | | if (event->type == SVET_INPUT && event->parameter != nullptr) { |
481 | | if (*event->parameter == 'd') { |
482 | | result = -1; |
483 | | } else { |
484 | | blocks->clear(); |
485 | | } |
486 | | } else if (event->type == SVET_DESTROY) { |
487 | | blocks_win_ = nullptr; |
488 | | } else { |
489 | | waiting = true; |
490 | | } |
491 | | } while (waiting); |
492 | | } |
493 | | #endif // !GRAPHICS_DISABLED |
494 | 0 | return result; |
495 | 0 | } |
496 | | |
497 | | // Get the rotation required to deskew, and its inverse rotation. |
498 | 0 | void ColumnFinder::GetDeskewVectors(FCOORD *deskew, FCOORD *reskew) { |
499 | 0 | *reskew = reskew_; |
500 | 0 | *deskew = reskew_; |
501 | 0 | deskew->set_y(-deskew->y()); |
502 | 0 | } |
503 | | |
504 | | #ifndef DISABLED_LEGACY_ENGINE |
505 | 0 | void ColumnFinder::SetEquationDetect(EquationDetectBase *detect) { |
506 | 0 | equation_detect_ = detect; |
507 | 0 | } |
508 | | #endif |
509 | | |
510 | | //////////////// PRIVATE CODE ///////////////////////// |
511 | | |
512 | | #ifndef GRAPHICS_DISABLED |
513 | | |
514 | | // Displays the blob and block bounding boxes in a window called Blocks. |
515 | | void ColumnFinder::DisplayBlocks(BLOCK_LIST *blocks) { |
516 | | if (blocks_win_ == nullptr) { |
517 | | blocks_win_ = MakeWindow(700, 300, "Blocks"); |
518 | | } else { |
519 | | blocks_win_->Clear(); |
520 | | } |
521 | | DisplayBoxes(blocks_win_); |
522 | | BLOCK_IT block_it(blocks); |
523 | | int serial = 1; |
524 | | for (block_it.mark_cycle_pt(); !block_it.cycled_list(); block_it.forward()) { |
525 | | BLOCK *block = block_it.data(); |
526 | | block->pdblk.plot(blocks_win_, serial++, |
527 | | textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); |
528 | | } |
529 | | blocks_win_->Update(); |
530 | | } |
531 | | |
532 | | // Displays the column edges at each grid y coordinate defined by |
533 | | // best_columns_. |
534 | | void ColumnFinder::DisplayColumnBounds(PartSetVector *sets) { |
535 | | ScrollView *col_win = MakeWindow(50, 300, "Columns"); |
536 | | DisplayBoxes(col_win); |
537 | | col_win->Pen(textord_debug_printable ? ScrollView::BLUE : ScrollView::GREEN); |
538 | | for (int i = 0; i < gridheight_; ++i) { |
539 | | ColPartitionSet *columns = best_columns_[i]; |
540 | | if (columns != nullptr) { |
541 | | columns->DisplayColumnEdges(i * gridsize_, (i + 1) * gridsize_, col_win); |
542 | | } |
543 | | } |
544 | | } |
545 | | |
546 | | #endif // !GRAPHICS_DISABLED |
547 | | |
548 | | // Sets up column_sets_ (the determined column layout at each horizontal |
549 | | // slice). Returns false if the page is empty. |
550 | 0 | bool ColumnFinder::MakeColumns(bool single_column) { |
551 | | // The part_sets_ are a temporary structure used during column creation, |
552 | | // and is a vector of ColPartitionSets, representing ColPartitions found |
553 | | // at horizontal slices through the page. |
554 | 0 | PartSetVector part_sets; |
555 | 0 | if (!single_column) { |
556 | 0 | if (!part_grid_.MakeColPartSets(&part_sets)) { |
557 | 0 | return false; // Empty page. |
558 | 0 | } |
559 | 0 | ASSERT_HOST(part_grid_.gridheight() == gridheight_); |
560 | | // Try using only the good parts first. |
561 | 0 | bool good_only = true; |
562 | 0 | do { |
563 | 0 | for (int i = 0; i < gridheight_; ++i) { |
564 | 0 | ColPartitionSet *line_set = part_sets.at(i); |
565 | 0 | if (line_set != nullptr && line_set->LegalColumnCandidate()) { |
566 | 0 | ColPartitionSet *column_candidate = line_set->Copy(good_only); |
567 | 0 | if (column_candidate != nullptr) { |
568 | 0 | column_candidate->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); |
569 | 0 | } |
570 | 0 | } |
571 | 0 | } |
572 | 0 | good_only = !good_only; |
573 | 0 | } while (column_sets_.empty() && !good_only); |
574 | 0 | if (textord_debug_tabfind) { |
575 | 0 | PrintColumnCandidates("Column candidates"); |
576 | 0 | } |
577 | | // Improve the column candidates against themselves. |
578 | 0 | ImproveColumnCandidates(&column_sets_, &column_sets_); |
579 | 0 | if (textord_debug_tabfind) { |
580 | 0 | PrintColumnCandidates("Improved columns"); |
581 | 0 | } |
582 | | // Improve the column candidates using the part_sets_. |
583 | 0 | ImproveColumnCandidates(&part_sets, &column_sets_); |
584 | 0 | } |
585 | 0 | ColPartitionSet *single_column_set = part_grid_.MakeSingleColumnSet(WidthCB()); |
586 | 0 | if (single_column_set != nullptr) { |
587 | | // Always add the single column set as a backup even if not in |
588 | | // single column mode. |
589 | 0 | single_column_set->AddToColumnSetsIfUnique(&column_sets_, WidthCB()); |
590 | 0 | } |
591 | 0 | if (textord_debug_tabfind) { |
592 | 0 | PrintColumnCandidates("Final Columns"); |
593 | 0 | } |
594 | 0 | bool has_columns = !column_sets_.empty(); |
595 | 0 | if (has_columns) { |
596 | | // Divide the page into sections of uniform column layout. |
597 | 0 | bool any_multi_column = AssignColumns(part_sets); |
598 | | #ifndef GRAPHICS_DISABLED |
599 | | if (textord_tabfind_show_columns) { |
600 | | DisplayColumnBounds(&part_sets); |
601 | | } |
602 | | #endif |
603 | 0 | ComputeMeanColumnGap(any_multi_column); |
604 | 0 | } |
605 | 0 | for (auto line_set : part_sets) { |
606 | 0 | if (line_set != nullptr) { |
607 | 0 | line_set->RelinquishParts(); |
608 | 0 | delete line_set; |
609 | 0 | } |
610 | 0 | } |
611 | 0 | return has_columns; |
612 | 0 | } |
613 | | |
614 | | // Attempt to improve the column_candidates by expanding the columns |
615 | | // and adding new partitions from the partition sets in src_sets. |
616 | | // Src_sets may be equal to column_candidates, in which case it will |
617 | | // use them as a source to improve themselves. |
618 | 0 | void ColumnFinder::ImproveColumnCandidates(PartSetVector *src_sets, PartSetVector *column_sets) { |
619 | | // TODO: optimize. |
620 | 0 | PartSetVector temp_cols = *column_sets; |
621 | 0 | column_sets->clear(); |
622 | 0 | if (src_sets == column_sets) { |
623 | 0 | src_sets = &temp_cols; |
624 | 0 | } |
625 | 0 | int set_size = temp_cols.size(); |
626 | | // Try using only the good parts first. |
627 | 0 | bool good_only = true; |
628 | 0 | do { |
629 | 0 | for (int i = 0; i < set_size; ++i) { |
630 | 0 | ColPartitionSet *column_candidate = temp_cols.at(i); |
631 | 0 | ASSERT_HOST(column_candidate != nullptr); |
632 | 0 | ColPartitionSet *improved = column_candidate->Copy(good_only); |
633 | 0 | if (improved != nullptr) { |
634 | 0 | improved->ImproveColumnCandidate(WidthCB(), src_sets); |
635 | 0 | improved->AddToColumnSetsIfUnique(column_sets, WidthCB()); |
636 | 0 | } |
637 | 0 | } |
638 | 0 | good_only = !good_only; |
639 | 0 | } while (column_sets->empty() && !good_only); |
640 | 0 | if (column_sets->empty()) { |
641 | | // TODO: optimize. |
642 | 0 | *column_sets = temp_cols; |
643 | 0 | temp_cols.clear(); |
644 | 0 | } else { |
645 | 0 | for (auto data : temp_cols) { |
646 | 0 | delete data; |
647 | 0 | } |
648 | 0 | } |
649 | 0 | } |
650 | | |
651 | | // Prints debug information on the column candidates. |
652 | 0 | void ColumnFinder::PrintColumnCandidates(const char *title) { |
653 | 0 | int set_size = column_sets_.size(); |
654 | 0 | tprintf("Found %d %s:\n", set_size, title); |
655 | 0 | if (textord_debug_tabfind >= 3) { |
656 | 0 | for (int i = 0; i < set_size; ++i) { |
657 | 0 | ColPartitionSet *column_set = column_sets_.at(i); |
658 | 0 | column_set->Print(); |
659 | 0 | } |
660 | 0 | } |
661 | 0 | } |
662 | | |
663 | | // Finds the optimal set of columns that cover the entire image with as |
664 | | // few changes in column partition as possible. |
665 | | // NOTE: this could be thought of as an optimization problem, but a simple |
666 | | // greedy algorithm is used instead. The algorithm repeatedly finds the modal |
667 | | // compatible column in an unassigned region and uses that with the extra |
668 | | // tweak of extending the modal region over small breaks in compatibility. |
669 | | // Where modal regions overlap, the boundary is chosen so as to minimize |
670 | | // the cost in terms of ColPartitions not fitting an approved column. |
671 | | // Returns true if any part of the page is multi-column. |
672 | 0 | bool ColumnFinder::AssignColumns(const PartSetVector &part_sets) { |
673 | 0 | int set_count = part_sets.size(); |
674 | 0 | ASSERT_HOST(set_count == gridheight()); |
675 | | // Allocate and init the best_columns_. |
676 | 0 | best_columns_ = new ColPartitionSet *[set_count]; |
677 | 0 | for (int y = 0; y < set_count; ++y) { |
678 | 0 | best_columns_[y] = nullptr; |
679 | 0 | } |
680 | 0 | int column_count = column_sets_.size(); |
681 | | // column_set_costs[part_sets_ index][column_sets_ index] is |
682 | | // < INT32_MAX if the partition set is compatible with the column set, |
683 | | // in which case its value is the cost for that set used in deciding |
684 | | // which competing set to assign. |
685 | | // any_columns_possible[part_sets_ index] is true if any of |
686 | | // possible_column_sets[part_sets_ index][*] is < INT32_MAX. |
687 | | // assigned_costs[part_sets_ index] is set to the column_set_costs |
688 | | // of the assigned column_sets_ index or INT32_MAX if none is set. |
689 | | // On return the best_columns_ member is set. |
690 | 0 | bool *any_columns_possible = new bool[set_count]; |
691 | 0 | int *assigned_costs = new int[set_count]; |
692 | 0 | int **column_set_costs = new int *[set_count]; |
693 | | // Set possible column_sets to indicate whether each set is compatible |
694 | | // with each column. |
695 | 0 | for (int part_i = 0; part_i < set_count; ++part_i) { |
696 | 0 | ColPartitionSet *line_set = part_sets.at(part_i); |
697 | 0 | bool debug = line_set != nullptr && WithinTestRegion(2, line_set->bounding_box().left(), |
698 | 0 | line_set->bounding_box().bottom()); |
699 | 0 | column_set_costs[part_i] = new int[column_count]; |
700 | 0 | any_columns_possible[part_i] = false; |
701 | 0 | assigned_costs[part_i] = INT32_MAX; |
702 | 0 | for (int col_i = 0; col_i < column_count; ++col_i) { |
703 | 0 | if (line_set != nullptr && |
704 | 0 | column_sets_.at(col_i)->CompatibleColumns(debug, line_set, WidthCB())) { |
705 | 0 | column_set_costs[part_i][col_i] = column_sets_.at(col_i)->UnmatchedWidth(line_set); |
706 | 0 | any_columns_possible[part_i] = true; |
707 | 0 | } else { |
708 | 0 | column_set_costs[part_i][col_i] = INT32_MAX; |
709 | 0 | if (debug) { |
710 | 0 | tprintf("Set id %d did not match at y=%d, lineset =%p\n", |
711 | 0 | col_i, part_i, static_cast<void *>(line_set)); |
712 | 0 | } |
713 | 0 | } |
714 | 0 | } |
715 | 0 | } |
716 | 0 | bool any_multi_column = false; |
717 | | // Assign a column set to each vertical grid position. |
718 | | // While there is an unassigned range, find its mode. |
719 | 0 | int start, end; |
720 | 0 | while (BiggestUnassignedRange(set_count, any_columns_possible, &start, &end)) { |
721 | 0 | if (textord_debug_tabfind >= 2) { |
722 | 0 | tprintf("Biggest unassigned range = %d- %d\n", start, end); |
723 | 0 | } |
724 | | // Find the modal column_set_id in the range. |
725 | 0 | int column_set_id = RangeModalColumnSet(column_set_costs, assigned_costs, start, end); |
726 | 0 | if (textord_debug_tabfind >= 2) { |
727 | 0 | tprintf("Range modal column id = %d\n", column_set_id); |
728 | 0 | column_sets_.at(column_set_id)->Print(); |
729 | 0 | } |
730 | | // Now find the longest run of the column_set_id in the range. |
731 | 0 | ShrinkRangeToLongestRun(column_set_costs, assigned_costs, any_columns_possible, column_set_id, |
732 | 0 | &start, &end); |
733 | 0 | if (textord_debug_tabfind >= 2) { |
734 | 0 | tprintf("Shrunk range = %d- %d\n", start, end); |
735 | 0 | } |
736 | | // Extend the start and end past the longest run, while there are |
737 | | // only small gaps in compatibility that can be overcome by larger |
738 | | // regions of compatibility beyond. |
739 | 0 | ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, |
740 | 0 | -1, -1, &start); |
741 | 0 | --end; |
742 | 0 | ExtendRangePastSmallGaps(column_set_costs, assigned_costs, any_columns_possible, column_set_id, |
743 | 0 | 1, set_count, &end); |
744 | 0 | ++end; |
745 | 0 | if (textord_debug_tabfind) { |
746 | 0 | tprintf("Column id %d applies to range = %d - %d\n", column_set_id, start, end); |
747 | 0 | } |
748 | | // Assign the column to the range, which now may overlap with other ranges. |
749 | 0 | AssignColumnToRange(column_set_id, start, end, column_set_costs, assigned_costs); |
750 | 0 | if (column_sets_.at(column_set_id)->GoodColumnCount() > 1) { |
751 | 0 | any_multi_column = true; |
752 | 0 | } |
753 | 0 | } |
754 | | // If anything remains unassigned, the whole lot is unassigned, so |
755 | | // arbitrarily assign id 0. |
756 | 0 | if (best_columns_[0] == nullptr) { |
757 | 0 | AssignColumnToRange(0, 0, gridheight_, column_set_costs, assigned_costs); |
758 | 0 | } |
759 | | // Free memory. |
760 | 0 | for (int i = 0; i < set_count; ++i) { |
761 | 0 | delete[] column_set_costs[i]; |
762 | 0 | } |
763 | 0 | delete[] assigned_costs; |
764 | 0 | delete[] any_columns_possible; |
765 | 0 | delete[] column_set_costs; |
766 | 0 | return any_multi_column; |
767 | 0 | } |
768 | | |
769 | | // Finds the biggest range in part_sets_ that has no assigned column, but |
770 | | // column assignment is possible. |
771 | | bool ColumnFinder::BiggestUnassignedRange(int set_count, const bool *any_columns_possible, |
772 | 0 | int *best_start, int *best_end) { |
773 | 0 | int best_range_size = 0; |
774 | 0 | *best_start = set_count; |
775 | 0 | *best_end = set_count; |
776 | 0 | int end = set_count; |
777 | 0 | for (int start = 0; start < gridheight_; start = end) { |
778 | | // Find the first unassigned index in start. |
779 | 0 | while (start < set_count) { |
780 | 0 | if (best_columns_[start] == nullptr && any_columns_possible[start]) { |
781 | 0 | break; |
782 | 0 | } |
783 | 0 | ++start; |
784 | 0 | } |
785 | | // Find the first past the end and count the good ones in between. |
786 | 0 | int range_size = 1; // Number of non-null, but unassigned line sets. |
787 | 0 | end = start + 1; |
788 | 0 | while (end < set_count) { |
789 | 0 | if (best_columns_[end] != nullptr) { |
790 | 0 | break; |
791 | 0 | } |
792 | 0 | if (any_columns_possible[end]) { |
793 | 0 | ++range_size; |
794 | 0 | } |
795 | 0 | ++end; |
796 | 0 | } |
797 | 0 | if (start < set_count && range_size > best_range_size) { |
798 | 0 | best_range_size = range_size; |
799 | 0 | *best_start = start; |
800 | 0 | *best_end = end; |
801 | 0 | } |
802 | 0 | } |
803 | 0 | return *best_start < *best_end; |
804 | 0 | } |
805 | | |
806 | | // Finds the modal compatible column_set_ index within the given range. |
807 | | int ColumnFinder::RangeModalColumnSet(int **column_set_costs, const int *assigned_costs, int start, |
808 | 0 | int end) { |
809 | 0 | int column_count = column_sets_.size(); |
810 | 0 | STATS column_stats(0, column_count - 1); |
811 | 0 | for (int part_i = start; part_i < end; ++part_i) { |
812 | 0 | for (int col_j = 0; col_j < column_count; ++col_j) { |
813 | 0 | if (column_set_costs[part_i][col_j] < assigned_costs[part_i]) { |
814 | 0 | column_stats.add(col_j, 1); |
815 | 0 | } |
816 | 0 | } |
817 | 0 | } |
818 | 0 | ASSERT_HOST(column_stats.get_total() > 0); |
819 | 0 | return column_stats.mode(); |
820 | 0 | } |
821 | | |
822 | | // Given that there are many column_set_id compatible columns in the range, |
823 | | // shrinks the range to the longest contiguous run of compatibility, allowing |
824 | | // gaps where no columns are possible, but not where competing columns are |
825 | | // possible. |
826 | | void ColumnFinder::ShrinkRangeToLongestRun(int **column_set_costs, const int *assigned_costs, |
827 | | const bool *any_columns_possible, int column_set_id, |
828 | 0 | int *best_start, int *best_end) { |
829 | | // orig_start and orig_end are the maximum range we will look at. |
830 | 0 | int orig_start = *best_start; |
831 | 0 | int orig_end = *best_end; |
832 | 0 | int best_range_size = 0; |
833 | 0 | *best_start = orig_end; |
834 | 0 | *best_end = orig_end; |
835 | 0 | int end = orig_end; |
836 | 0 | for (int start = orig_start; start < orig_end; start = end) { |
837 | | // Find the first possible |
838 | 0 | while (start < orig_end) { |
839 | 0 | if (column_set_costs[start][column_set_id] < assigned_costs[start] || |
840 | 0 | !any_columns_possible[start]) { |
841 | 0 | break; |
842 | 0 | } |
843 | 0 | ++start; |
844 | 0 | } |
845 | | // Find the first past the end. |
846 | 0 | end = start + 1; |
847 | 0 | while (end < orig_end) { |
848 | 0 | if (column_set_costs[end][column_set_id] >= assigned_costs[start] && |
849 | 0 | any_columns_possible[end]) { |
850 | 0 | break; |
851 | 0 | } |
852 | 0 | ++end; |
853 | 0 | } |
854 | 0 | if (start < orig_end && end - start > best_range_size) { |
855 | 0 | best_range_size = end - start; |
856 | 0 | *best_start = start; |
857 | 0 | *best_end = end; |
858 | 0 | } |
859 | 0 | } |
860 | 0 | } |
861 | | |
862 | | // Moves start in the direction of step, up to, but not including end while |
863 | | // the only incompatible regions are no more than kMaxIncompatibleColumnCount |
864 | | // in size, and the compatible regions beyond are bigger. |
865 | | void ColumnFinder::ExtendRangePastSmallGaps(int **column_set_costs, const int *assigned_costs, |
866 | | const bool *any_columns_possible, int column_set_id, |
867 | 0 | int step, int end, int *start) { |
868 | 0 | if (textord_debug_tabfind > 2) { |
869 | 0 | tprintf("Starting expansion at %d, step=%d, limit=%d\n", *start, step, end); |
870 | 0 | } |
871 | 0 | if (*start == end) { |
872 | 0 | return; // Cannot be expanded. |
873 | 0 | } |
874 | | |
875 | 0 | int barrier_size = 0; |
876 | 0 | int good_size = 0; |
877 | 0 | do { |
878 | | // Find the size of the incompatible barrier. |
879 | 0 | barrier_size = 0; |
880 | 0 | int i; |
881 | 0 | for (i = *start + step; i != end; i += step) { |
882 | 0 | if (column_set_costs[i][column_set_id] < assigned_costs[i]) { |
883 | 0 | break; // We are back on. |
884 | 0 | } |
885 | | // Locations where none are possible don't count. |
886 | 0 | if (any_columns_possible[i]) { |
887 | 0 | ++barrier_size; |
888 | 0 | } |
889 | 0 | } |
890 | 0 | if (textord_debug_tabfind > 2) { |
891 | 0 | tprintf("At %d, Barrier size=%d\n", i, barrier_size); |
892 | 0 | } |
893 | 0 | if (barrier_size > kMaxIncompatibleColumnCount) { |
894 | 0 | return; // Barrier too big. |
895 | 0 | } |
896 | 0 | if (i == end) { |
897 | | // We can't go any further, but the barrier was small, so go to the end. |
898 | 0 | *start = i - step; |
899 | 0 | return; |
900 | 0 | } |
901 | | // Now find the size of the good region on the other side. |
902 | 0 | good_size = 1; |
903 | 0 | for (i += step; i != end; i += step) { |
904 | 0 | if (column_set_costs[i][column_set_id] < assigned_costs[i]) { |
905 | 0 | ++good_size; |
906 | 0 | } else if (any_columns_possible[i]) { |
907 | 0 | break; |
908 | 0 | } |
909 | 0 | } |
910 | 0 | if (textord_debug_tabfind > 2) { |
911 | 0 | tprintf("At %d, good size = %d\n", i, good_size); |
912 | 0 | } |
913 | | // If we had enough good ones we can extend the start and keep looking. |
914 | 0 | if (good_size >= barrier_size) { |
915 | 0 | *start = i - step; |
916 | 0 | } |
917 | 0 | } while (good_size >= barrier_size); |
918 | 0 | } |
919 | | |
920 | | // Assigns the given column_set_id to the given range. |
921 | | void ColumnFinder::AssignColumnToRange(int column_set_id, int start, int end, |
922 | 0 | int **column_set_costs, int *assigned_costs) { |
923 | 0 | ColPartitionSet *column_set = column_sets_.at(column_set_id); |
924 | 0 | for (int i = start; i < end; ++i) { |
925 | 0 | assigned_costs[i] = column_set_costs[i][column_set_id]; |
926 | 0 | best_columns_[i] = column_set; |
927 | 0 | } |
928 | 0 | } |
929 | | |
930 | | // Computes the mean_column_gap_. |
931 | 0 | void ColumnFinder::ComputeMeanColumnGap(bool any_multi_column) { |
932 | 0 | int total_gap = 0; |
933 | 0 | int total_width = 0; |
934 | 0 | int gap_samples = 0; |
935 | 0 | int width_samples = 0; |
936 | 0 | for (int i = 0; i < gridheight_; ++i) { |
937 | 0 | ASSERT_HOST(best_columns_[i] != nullptr); |
938 | 0 | best_columns_[i]->AccumulateColumnWidthsAndGaps(&total_width, &width_samples, &total_gap, |
939 | 0 | &gap_samples); |
940 | 0 | } |
941 | 0 | mean_column_gap_ = any_multi_column && gap_samples > 0 |
942 | 0 | ? total_gap / gap_samples |
943 | 0 | : width_samples > 0 ? total_width / width_samples : 0; |
944 | 0 | } |
945 | | |
946 | | //////// Functions that manipulate ColPartitions in the part_grid_ ///// |
947 | | //////// to split, merge, find margins, and find types. ////////////// |
948 | | |
949 | | // Helper to delete all the deletable blobs on the list. Owned blobs are |
950 | | // extracted from the list, but not deleted, leaving them owned by the owner(). |
951 | 0 | static void ReleaseAllBlobsAndDeleteUnused(BLOBNBOX_LIST *blobs) { |
952 | 0 | for (BLOBNBOX_IT blob_it(blobs); !blob_it.empty(); blob_it.forward()) { |
953 | 0 | BLOBNBOX *blob = blob_it.extract(); |
954 | 0 | if (blob->owner() == nullptr) { |
955 | 0 | delete blob; |
956 | 0 | } |
957 | 0 | } |
958 | 0 | } |
959 | | |
960 | | // Hoovers up all un-owned blobs and deletes them. |
961 | | // The rest get released from the block so the ColPartitions can pass |
962 | | // ownership to the output blocks. |
963 | 0 | void ColumnFinder::ReleaseBlobsAndCleanupUnused(TO_BLOCK *block) { |
964 | 0 | ReleaseAllBlobsAndDeleteUnused(&block->blobs); |
965 | 0 | ReleaseAllBlobsAndDeleteUnused(&block->small_blobs); |
966 | 0 | ReleaseAllBlobsAndDeleteUnused(&block->noise_blobs); |
967 | 0 | ReleaseAllBlobsAndDeleteUnused(&block->large_blobs); |
968 | 0 | ReleaseAllBlobsAndDeleteUnused(&image_bblobs_); |
969 | 0 | } |
970 | | |
971 | | // Splits partitions that cross columns where they have nothing in the gap. |
972 | 0 | void ColumnFinder::GridSplitPartitions() { |
973 | | // Iterate the ColPartitions in the grid. |
974 | 0 | ColPartitionGridSearch gsearch(&part_grid_); |
975 | 0 | gsearch.StartFullSearch(); |
976 | 0 | ColPartition *dont_repeat = nullptr; |
977 | 0 | ColPartition *part; |
978 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
979 | 0 | if (part->blob_type() < BRT_UNKNOWN || part == dont_repeat) { |
980 | 0 | continue; // Only applies to text partitions. |
981 | 0 | } |
982 | 0 | ColPartitionSet *column_set = best_columns_[gsearch.GridY()]; |
983 | 0 | int first_col = -1; |
984 | 0 | int last_col = -1; |
985 | | // Find which columns the partition spans. |
986 | 0 | part->ColumnRange(resolution_, column_set, &first_col, &last_col); |
987 | 0 | if (first_col > 0) { |
988 | 0 | --first_col; |
989 | 0 | } |
990 | | // Convert output column indices to physical column indices. |
991 | 0 | first_col /= 2; |
992 | 0 | last_col /= 2; |
993 | | // We will only consider cases where a partition spans two columns, |
994 | | // since a heading that spans more columns than that is most likely |
995 | | // genuine. |
996 | 0 | if (last_col != first_col + 1) { |
997 | 0 | continue; |
998 | 0 | } |
999 | | // Set up a rectangle search x-bounded by the column gap and y by the part. |
1000 | 0 | int y = part->MidY(); |
1001 | 0 | TBOX margin_box = part->bounding_box(); |
1002 | 0 | bool debug = AlignedBlob::WithinTestRegion(2, margin_box.left(), margin_box.bottom()); |
1003 | 0 | if (debug) { |
1004 | 0 | tprintf("Considering partition for GridSplit:"); |
1005 | 0 | part->Print(); |
1006 | 0 | } |
1007 | 0 | ColPartition *column = column_set->GetColumnByIndex(first_col); |
1008 | 0 | if (column == nullptr) { |
1009 | 0 | continue; |
1010 | 0 | } |
1011 | 0 | margin_box.set_left(column->RightAtY(y) + 2); |
1012 | 0 | column = column_set->GetColumnByIndex(last_col); |
1013 | 0 | if (column == nullptr) { |
1014 | 0 | continue; |
1015 | 0 | } |
1016 | 0 | margin_box.set_right(column->LeftAtY(y) - 2); |
1017 | | // TODO(rays) Decide whether to keep rectangular filling or not in the |
1018 | | // main grid and therefore whether we need a fancier search here. |
1019 | | // Now run the rect search on the main blob grid. |
1020 | 0 | GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> rectsearch(this); |
1021 | 0 | if (debug) { |
1022 | 0 | tprintf("Searching box (%d,%d)->(%d,%d)\n", margin_box.left(), margin_box.bottom(), |
1023 | 0 | margin_box.right(), margin_box.top()); |
1024 | 0 | part->Print(); |
1025 | 0 | } |
1026 | 0 | rectsearch.StartRectSearch(margin_box); |
1027 | 0 | BLOBNBOX *bbox; |
1028 | 0 | while ((bbox = rectsearch.NextRectSearch()) != nullptr) { |
1029 | 0 | if (bbox->bounding_box().overlap(margin_box)) { |
1030 | 0 | break; |
1031 | 0 | } |
1032 | 0 | } |
1033 | 0 | if (bbox == nullptr) { |
1034 | | // There seems to be nothing in the hole, so split the partition. |
1035 | 0 | gsearch.RemoveBBox(); |
1036 | 0 | int x_middle = (margin_box.left() + margin_box.right()) / 2; |
1037 | 0 | if (debug) { |
1038 | 0 | tprintf("Splitting part at %d:", x_middle); |
1039 | 0 | part->Print(); |
1040 | 0 | } |
1041 | 0 | ColPartition *split_part = part->SplitAt(x_middle); |
1042 | 0 | if (split_part != nullptr) { |
1043 | 0 | if (debug) { |
1044 | 0 | tprintf("Split result:"); |
1045 | 0 | part->Print(); |
1046 | 0 | split_part->Print(); |
1047 | 0 | } |
1048 | 0 | part_grid_.InsertBBox(true, true, split_part); |
1049 | 0 | } else { |
1050 | | // Split had no effect |
1051 | 0 | if (debug) { |
1052 | 0 | tprintf("Split had no effect\n"); |
1053 | 0 | } |
1054 | 0 | dont_repeat = part; |
1055 | 0 | } |
1056 | 0 | part_grid_.InsertBBox(true, true, part); |
1057 | 0 | gsearch.RepositionIterator(); |
1058 | 0 | } else if (debug) { |
1059 | 0 | tprintf("Part cannot be split: blob (%d,%d)->(%d,%d) in column gap\n", |
1060 | 0 | bbox->bounding_box().left(), bbox->bounding_box().bottom(), |
1061 | 0 | bbox->bounding_box().right(), bbox->bounding_box().top()); |
1062 | 0 | } |
1063 | 0 | } |
1064 | 0 | } |
1065 | | |
1066 | | // Merges partitions where there is vertical overlap, within a single column, |
1067 | | // and the horizontal gap is small enough. |
1068 | 0 | void ColumnFinder::GridMergePartitions() { |
1069 | | // Iterate the ColPartitions in the grid. |
1070 | 0 | GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); |
1071 | 0 | gsearch.StartFullSearch(); |
1072 | 0 | ColPartition *part; |
1073 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1074 | 0 | if (part->IsUnMergeableType()) { |
1075 | 0 | continue; |
1076 | 0 | } |
1077 | | // Set up a rectangle search x-bounded by the column and y by the part. |
1078 | 0 | ColPartitionSet *columns = best_columns_[gsearch.GridY()]; |
1079 | 0 | TBOX box = part->bounding_box(); |
1080 | 0 | bool debug = AlignedBlob::WithinTestRegion(1, box.left(), box.bottom()); |
1081 | 0 | if (debug) { |
1082 | 0 | tprintf("Considering part for merge at:"); |
1083 | 0 | part->Print(); |
1084 | 0 | } |
1085 | 0 | int y = part->MidY(); |
1086 | 0 | ColPartition *left_column = columns->ColumnContaining(box.left(), y); |
1087 | 0 | ColPartition *right_column = columns->ColumnContaining(box.right(), y); |
1088 | 0 | if (left_column == nullptr || right_column != left_column) { |
1089 | 0 | if (debug) { |
1090 | 0 | tprintf("In different columns\n"); |
1091 | 0 | } |
1092 | 0 | continue; |
1093 | 0 | } |
1094 | 0 | box.set_left(left_column->LeftAtY(y)); |
1095 | 0 | box.set_right(right_column->RightAtY(y)); |
1096 | | // Now run the rect search. |
1097 | 0 | bool modified_box = false; |
1098 | 0 | GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> rsearch(&part_grid_); |
1099 | 0 | rsearch.SetUniqueMode(true); |
1100 | 0 | rsearch.StartRectSearch(box); |
1101 | 0 | ColPartition *neighbour; |
1102 | |
|
1103 | 0 | while ((neighbour = rsearch.NextRectSearch()) != nullptr) { |
1104 | 0 | if (neighbour == part || neighbour->IsUnMergeableType()) { |
1105 | 0 | continue; |
1106 | 0 | } |
1107 | 0 | const TBOX &neighbour_box = neighbour->bounding_box(); |
1108 | 0 | if (debug) { |
1109 | 0 | tprintf("Considering merge with neighbour at:"); |
1110 | 0 | neighbour->Print(); |
1111 | 0 | } |
1112 | 0 | if (neighbour_box.right() < box.left() || neighbour_box.left() > box.right()) { |
1113 | 0 | continue; // Not within the same column. |
1114 | 0 | } |
1115 | 0 | if (part->VSignificantCoreOverlap(*neighbour) && part->TypesMatch(*neighbour)) { |
1116 | | // There is vertical overlap and the gross types match, but only |
1117 | | // merge if the horizontal gap is small enough, as one of the |
1118 | | // partitions may be a figure caption within a column. |
1119 | | // If there is only one column, then the mean_column_gap_ is large |
1120 | | // enough to allow almost any merge, by being the mean column width. |
1121 | 0 | const TBOX &part_box = part->bounding_box(); |
1122 | | // Don't merge if there is something else in the way. Use the margin |
1123 | | // to decide, and check both to allow a bit of overlap. |
1124 | 0 | if (neighbour_box.left() > part->right_margin() && |
1125 | 0 | part_box.right() < neighbour->left_margin()) { |
1126 | 0 | continue; // Neighbour is too far to the right. |
1127 | 0 | } |
1128 | 0 | if (neighbour_box.right() < part->left_margin() && |
1129 | 0 | part_box.left() > neighbour->right_margin()) { |
1130 | 0 | continue; // Neighbour is too far to the left. |
1131 | 0 | } |
1132 | 0 | int h_gap = std::max(part_box.left(), neighbour_box.left()) - |
1133 | 0 | std::min(part_box.right(), neighbour_box.right()); |
1134 | 0 | if (h_gap < mean_column_gap_ * kHorizontalGapMergeFraction || |
1135 | 0 | part_box.width() < mean_column_gap_ || neighbour_box.width() < mean_column_gap_) { |
1136 | 0 | if (debug) { |
1137 | 0 | tprintf("Running grid-based merge between:\n"); |
1138 | 0 | part->Print(); |
1139 | 0 | neighbour->Print(); |
1140 | 0 | } |
1141 | 0 | rsearch.RemoveBBox(); |
1142 | 0 | if (!modified_box) { |
1143 | | // We are going to modify part, so remove it and re-insert it after. |
1144 | 0 | gsearch.RemoveBBox(); |
1145 | 0 | rsearch.RepositionIterator(); |
1146 | 0 | modified_box = true; |
1147 | 0 | } |
1148 | 0 | part->Absorb(neighbour, WidthCB()); |
1149 | 0 | } else if (debug) { |
1150 | 0 | tprintf("Neighbour failed hgap test\n"); |
1151 | 0 | } |
1152 | 0 | } else if (debug) { |
1153 | 0 | tprintf("Neighbour failed overlap or typesmatch test\n"); |
1154 | 0 | } |
1155 | 0 | } |
1156 | 0 | if (modified_box) { |
1157 | | // We modified the box of part, so re-insert it into the grid. |
1158 | | // This does no harm in the current cell, as it already exists there, |
1159 | | // but it needs to exist in all the cells covered by its bounding box, |
1160 | | // or it will never be found by a full search. |
1161 | | // Because the box has changed, it has to be removed first, otherwise |
1162 | | // add_sorted may fail to keep a single copy of the pointer. |
1163 | 0 | part_grid_.InsertBBox(true, true, part); |
1164 | 0 | gsearch.RepositionIterator(); |
1165 | 0 | } |
1166 | 0 | } |
1167 | 0 | } |
1168 | | |
1169 | | // Inserts remaining noise blobs into the most applicable partition if any. |
1170 | | // If there is no applicable partition, then the blobs are deleted. |
1171 | 0 | void ColumnFinder::InsertRemainingNoise(TO_BLOCK *block) { |
1172 | 0 | BLOBNBOX_IT blob_it(&block->noise_blobs); |
1173 | 0 | for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) { |
1174 | 0 | BLOBNBOX *blob = blob_it.data(); |
1175 | 0 | if (blob->owner() != nullptr) { |
1176 | 0 | continue; |
1177 | 0 | } |
1178 | 0 | TBOX search_box(blob->bounding_box()); |
1179 | 0 | bool debug = WithinTestRegion(2, search_box.left(), search_box.bottom()); |
1180 | 0 | search_box.pad(gridsize(), gridsize()); |
1181 | | // Setup a rectangle search to find the best partition to merge with. |
1182 | 0 | ColPartitionGridSearch rsearch(&part_grid_); |
1183 | 0 | rsearch.SetUniqueMode(true); |
1184 | 0 | rsearch.StartRectSearch(search_box); |
1185 | 0 | ColPartition *part; |
1186 | 0 | ColPartition *best_part = nullptr; |
1187 | 0 | int best_distance = 0; |
1188 | 0 | while ((part = rsearch.NextRectSearch()) != nullptr) { |
1189 | 0 | if (part->IsUnMergeableType()) { |
1190 | 0 | continue; |
1191 | 0 | } |
1192 | 0 | int distance = |
1193 | 0 | projection_.DistanceOfBoxFromPartition(blob->bounding_box(), *part, denorm_, debug); |
1194 | 0 | if (best_part == nullptr || distance < best_distance) { |
1195 | 0 | best_part = part; |
1196 | 0 | best_distance = distance; |
1197 | 0 | } |
1198 | 0 | } |
1199 | 0 | if (best_part != nullptr && |
1200 | 0 | best_distance < kMaxDistToPartSizeRatio * best_part->median_height()) { |
1201 | | // Close enough to merge. |
1202 | 0 | if (debug) { |
1203 | 0 | tprintf("Adding noise blob with distance %d, thr=%g:box:", best_distance, |
1204 | 0 | kMaxDistToPartSizeRatio * best_part->median_height()); |
1205 | 0 | blob->bounding_box().print(); |
1206 | 0 | tprintf("To partition:"); |
1207 | 0 | best_part->Print(); |
1208 | 0 | } |
1209 | 0 | part_grid_.RemoveBBox(best_part); |
1210 | 0 | best_part->AddBox(blob); |
1211 | 0 | part_grid_.InsertBBox(true, true, best_part); |
1212 | 0 | blob->set_owner(best_part); |
1213 | 0 | blob->set_flow(best_part->flow()); |
1214 | 0 | blob->set_region_type(best_part->blob_type()); |
1215 | 0 | } else { |
1216 | | // Mark the blob for deletion. |
1217 | 0 | blob->set_region_type(BRT_NOISE); |
1218 | 0 | } |
1219 | 0 | } |
1220 | | // Delete the marked blobs, clearing neighbour references. |
1221 | 0 | block->DeleteUnownedNoise(); |
1222 | 0 | } |
1223 | | |
1224 | | // Helper makes a box from a horizontal line. |
1225 | 0 | static TBOX BoxFromHLine(const TabVector *hline) { |
1226 | 0 | int top = std::max(hline->startpt().y(), hline->endpt().y()); |
1227 | 0 | int bottom = std::min(hline->startpt().y(), hline->endpt().y()); |
1228 | 0 | top += hline->mean_width(); |
1229 | 0 | if (top == bottom) { |
1230 | 0 | if (bottom > 0) { |
1231 | 0 | --bottom; |
1232 | 0 | } else { |
1233 | 0 | ++top; |
1234 | 0 | } |
1235 | 0 | } |
1236 | 0 | return TBOX(hline->startpt().x(), bottom, hline->endpt().x(), top); |
1237 | 0 | } |
1238 | | |
1239 | | // Remove partitions that come from horizontal lines that look like |
1240 | | // underlines, but are not part of a table. |
1241 | 0 | void ColumnFinder::GridRemoveUnderlinePartitions() { |
1242 | 0 | TabVector_IT hline_it(&horizontal_lines_); |
1243 | 0 | for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { |
1244 | 0 | TabVector *hline = hline_it.data(); |
1245 | 0 | if (hline->intersects_other_lines()) { |
1246 | 0 | continue; |
1247 | 0 | } |
1248 | 0 | TBOX line_box = BoxFromHLine(hline); |
1249 | 0 | TBOX search_box = line_box; |
1250 | 0 | search_box.pad(0, line_box.height()); |
1251 | 0 | ColPartitionGridSearch part_search(&part_grid_); |
1252 | 0 | part_search.SetUniqueMode(true); |
1253 | 0 | part_search.StartRectSearch(search_box); |
1254 | 0 | ColPartition *covered; |
1255 | 0 | bool touched_table = false; |
1256 | 0 | bool touched_text = false; |
1257 | 0 | ColPartition *line_part = nullptr; |
1258 | 0 | while ((covered = part_search.NextRectSearch()) != nullptr) { |
1259 | 0 | if (covered->type() == PT_TABLE) { |
1260 | 0 | touched_table = true; |
1261 | 0 | break; |
1262 | 0 | } else if (covered->IsTextType()) { |
1263 | | // TODO(rays) Add a list of underline sections to ColPartition. |
1264 | 0 | int text_bottom = covered->median_bottom(); |
1265 | 0 | if (line_box.bottom() <= text_bottom && text_bottom <= search_box.top()) { |
1266 | 0 | touched_text = true; |
1267 | 0 | } |
1268 | 0 | } else if (covered->blob_type() == BRT_HLINE && line_box.contains(covered->bounding_box()) && |
1269 | | // not if same instance (identical to hline) |
1270 | 0 | !TBOX(covered->bounding_box()).contains(line_box)) { |
1271 | 0 | line_part = covered; |
1272 | 0 | } |
1273 | 0 | } |
1274 | 0 | if (line_part != nullptr && !touched_table && touched_text) { |
1275 | 0 | part_grid_.RemoveBBox(line_part); |
1276 | 0 | delete line_part; |
1277 | 0 | } |
1278 | 0 | } |
1279 | 0 | } |
1280 | | |
1281 | | // Add horizontal line separators as partitions. |
1282 | 0 | void ColumnFinder::GridInsertHLinePartitions() { |
1283 | 0 | TabVector_IT hline_it(&horizontal_lines_); |
1284 | 0 | for (hline_it.mark_cycle_pt(); !hline_it.cycled_list(); hline_it.forward()) { |
1285 | 0 | TabVector *hline = hline_it.data(); |
1286 | 0 | TBOX line_box = BoxFromHLine(hline); |
1287 | 0 | ColPartition *part = |
1288 | 0 | ColPartition::MakeLinePartition(BRT_HLINE, vertical_skew_, line_box.left(), |
1289 | 0 | line_box.bottom(), line_box.right(), line_box.top()); |
1290 | 0 | part->set_type(PT_HORZ_LINE); |
1291 | 0 | bool any_image = false; |
1292 | 0 | ColPartitionGridSearch part_search(&part_grid_); |
1293 | 0 | part_search.SetUniqueMode(true); |
1294 | 0 | part_search.StartRectSearch(line_box); |
1295 | 0 | ColPartition *covered; |
1296 | 0 | while ((covered = part_search.NextRectSearch()) != nullptr) { |
1297 | 0 | if (covered->IsImageType()) { |
1298 | 0 | any_image = true; |
1299 | 0 | break; |
1300 | 0 | } |
1301 | 0 | } |
1302 | 0 | if (!any_image) { |
1303 | 0 | part_grid_.InsertBBox(true, true, part); |
1304 | 0 | } else { |
1305 | 0 | delete part; |
1306 | 0 | } |
1307 | 0 | } |
1308 | 0 | } |
1309 | | |
1310 | | // Add horizontal line separators as partitions. |
1311 | 0 | void ColumnFinder::GridInsertVLinePartitions() { |
1312 | 0 | TabVector_IT vline_it(dead_vectors()); |
1313 | 0 | for (vline_it.mark_cycle_pt(); !vline_it.cycled_list(); vline_it.forward()) { |
1314 | 0 | TabVector *vline = vline_it.data(); |
1315 | 0 | if (!vline->IsSeparator()) { |
1316 | 0 | continue; |
1317 | 0 | } |
1318 | 0 | int left = std::min(vline->startpt().x(), vline->endpt().x()); |
1319 | 0 | int right = std::max(vline->startpt().x(), vline->endpt().x()); |
1320 | 0 | right += vline->mean_width(); |
1321 | 0 | if (left == right) { |
1322 | 0 | if (left > 0) { |
1323 | 0 | --left; |
1324 | 0 | } else { |
1325 | 0 | ++right; |
1326 | 0 | } |
1327 | 0 | } |
1328 | 0 | ColPartition *part = ColPartition::MakeLinePartition( |
1329 | 0 | BRT_VLINE, vertical_skew_, left, vline->startpt().y(), right, vline->endpt().y()); |
1330 | 0 | part->set_type(PT_VERT_LINE); |
1331 | 0 | bool any_image = false; |
1332 | 0 | ColPartitionGridSearch part_search(&part_grid_); |
1333 | 0 | part_search.SetUniqueMode(true); |
1334 | 0 | part_search.StartRectSearch(part->bounding_box()); |
1335 | 0 | ColPartition *covered; |
1336 | 0 | while ((covered = part_search.NextRectSearch()) != nullptr) { |
1337 | 0 | if (covered->IsImageType()) { |
1338 | 0 | any_image = true; |
1339 | 0 | break; |
1340 | 0 | } |
1341 | 0 | } |
1342 | 0 | if (!any_image) { |
1343 | 0 | part_grid_.InsertBBox(true, true, part); |
1344 | 0 | } else { |
1345 | 0 | delete part; |
1346 | 0 | } |
1347 | 0 | } |
1348 | 0 | } |
1349 | | |
1350 | | // For every ColPartition in the grid, sets its type based on position |
1351 | | // in the columns. |
1352 | 0 | void ColumnFinder::SetPartitionTypes() { |
1353 | 0 | GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); |
1354 | 0 | gsearch.StartFullSearch(); |
1355 | 0 | ColPartition *part; |
1356 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1357 | 0 | part->SetPartitionType(resolution_, best_columns_[gsearch.GridY()]); |
1358 | 0 | } |
1359 | 0 | } |
1360 | | |
1361 | | // Only images remain with multiple types in a run of partners. |
1362 | | // Sets the type of all in the group to the maximum of the group. |
1363 | 0 | void ColumnFinder::SmoothPartnerRuns() { |
1364 | | // Iterate the ColPartitions in the grid. |
1365 | 0 | GridSearch<ColPartition, ColPartition_CLIST, ColPartition_C_IT> gsearch(&part_grid_); |
1366 | 0 | gsearch.StartFullSearch(); |
1367 | 0 | ColPartition *part; |
1368 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1369 | 0 | ColPartition *partner = part->SingletonPartner(true); |
1370 | 0 | if (partner != nullptr) { |
1371 | 0 | if (partner->SingletonPartner(false) != part) { |
1372 | 0 | tprintf("Ooops! Partition:(%d partners)", part->upper_partners()->length()); |
1373 | 0 | part->Print(); |
1374 | 0 | tprintf("has singleton partner:(%d partners", partner->lower_partners()->length()); |
1375 | 0 | partner->Print(); |
1376 | 0 | tprintf("but its singleton partner is:"); |
1377 | 0 | if (partner->SingletonPartner(false) == nullptr) { |
1378 | 0 | tprintf("NULL\n"); |
1379 | 0 | } else { |
1380 | 0 | partner->SingletonPartner(false)->Print(); |
1381 | 0 | } |
1382 | 0 | } |
1383 | 0 | ASSERT_HOST(partner->SingletonPartner(false) == part); |
1384 | 0 | } else if (part->SingletonPartner(false) != nullptr) { |
1385 | 0 | ColPartitionSet *column_set = best_columns_[gsearch.GridY()]; |
1386 | 0 | int column_count = column_set->ColumnCount(); |
1387 | 0 | part->SmoothPartnerRun(column_count * 2 + 1); |
1388 | 0 | } |
1389 | 0 | } |
1390 | 0 | } |
1391 | | |
1392 | | // Helper functions for TransformToBlocks. |
1393 | | // Add the part to the temp list in the correct order. |
1394 | 0 | void ColumnFinder::AddToTempPartList(ColPartition *part, ColPartition_CLIST *temp_list) { |
1395 | 0 | int mid_y = part->MidY(); |
1396 | 0 | ColPartition_C_IT it(temp_list); |
1397 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
1398 | 0 | ColPartition *test_part = it.data(); |
1399 | 0 | if (part->type() == PT_NOISE || test_part->type() == PT_NOISE) { |
1400 | 0 | continue; // Noise stays in sequence. |
1401 | 0 | } |
1402 | 0 | if (test_part == part->SingletonPartner(false)) { |
1403 | 0 | break; // Insert before its lower partner. |
1404 | 0 | } |
1405 | 0 | int neighbour_bottom = test_part->median_bottom(); |
1406 | 0 | int neighbour_top = test_part->median_top(); |
1407 | 0 | int neighbour_y = (neighbour_bottom + neighbour_top) / 2; |
1408 | 0 | if (neighbour_y < mid_y) { |
1409 | 0 | break; // part is above test_part so insert it. |
1410 | 0 | } |
1411 | 0 | if (!part->HOverlaps(*test_part) && !part->WithinSameMargins(*test_part)) { |
1412 | 0 | continue; // Incompatibles stay in order |
1413 | 0 | } |
1414 | 0 | } |
1415 | 0 | if (it.cycled_list()) { |
1416 | 0 | it.add_to_end(part); |
1417 | 0 | } else { |
1418 | 0 | it.add_before_stay_put(part); |
1419 | 0 | } |
1420 | 0 | } |
1421 | | |
1422 | | // Add everything from the temp list to the work_set assuming correct order. |
1423 | 0 | void ColumnFinder::EmptyTempPartList(ColPartition_CLIST *temp_list, WorkingPartSet_LIST *work_set) { |
1424 | 0 | ColPartition_C_IT it(temp_list); |
1425 | 0 | while (!it.empty()) { |
1426 | 0 | it.extract()->AddToWorkingSet(bleft_, tright_, resolution_, &good_parts_, work_set); |
1427 | 0 | it.forward(); |
1428 | 0 | } |
1429 | 0 | } |
1430 | | |
1431 | | // Transform the grid of partitions to the output blocks. |
1432 | 0 | void ColumnFinder::TransformToBlocks(BLOCK_LIST *blocks, TO_BLOCK_LIST *to_blocks) { |
1433 | 0 | WorkingPartSet_LIST work_set; |
1434 | 0 | ColPartitionSet *column_set = nullptr; |
1435 | 0 | ColPartition_IT noise_it(&noise_parts_); |
1436 | | // The temp_part_list holds a list of parts at the same grid y coord |
1437 | | // so they can be added in the correct order. This prevents thin objects |
1438 | | // like horizontal lines going before the text lines above them. |
1439 | 0 | ColPartition_CLIST temp_part_list; |
1440 | | // Iterate the ColPartitions in the grid. It starts at the top |
1441 | 0 | ColPartitionGridSearch gsearch(&part_grid_); |
1442 | 0 | gsearch.StartFullSearch(); |
1443 | 0 | int prev_grid_y = -1; |
1444 | 0 | ColPartition *part; |
1445 | 0 | while ((part = gsearch.NextFullSearch()) != nullptr) { |
1446 | 0 | int grid_y = gsearch.GridY(); |
1447 | 0 | if (grid_y != prev_grid_y) { |
1448 | 0 | EmptyTempPartList(&temp_part_list, &work_set); |
1449 | 0 | prev_grid_y = grid_y; |
1450 | 0 | } |
1451 | 0 | if (best_columns_[grid_y] != column_set) { |
1452 | 0 | column_set = best_columns_[grid_y]; |
1453 | | // Every line should have a non-null best column. |
1454 | 0 | ASSERT_HOST(column_set != nullptr); |
1455 | 0 | column_set->ChangeWorkColumns(bleft_, tright_, resolution_, &good_parts_, &work_set); |
1456 | 0 | if (textord_debug_tabfind) { |
1457 | 0 | tprintf("Changed column groups at grid index %d, y=%d\n", gsearch.GridY(), |
1458 | 0 | gsearch.GridY() * gridsize()); |
1459 | 0 | } |
1460 | 0 | } |
1461 | 0 | if (part->type() == PT_NOISE) { |
1462 | 0 | noise_it.add_to_end(part); |
1463 | 0 | } else { |
1464 | 0 | AddToTempPartList(part, &temp_part_list); |
1465 | 0 | } |
1466 | 0 | } |
1467 | 0 | EmptyTempPartList(&temp_part_list, &work_set); |
1468 | | // Now finish all working sets and transfer ColPartitionSets to block_sets. |
1469 | 0 | WorkingPartSet_IT work_it(&work_set); |
1470 | 0 | while (!work_it.empty()) { |
1471 | 0 | WorkingPartSet *working_set = work_it.extract(); |
1472 | 0 | working_set->ExtractCompletedBlocks(bleft_, tright_, resolution_, &good_parts_, blocks, |
1473 | 0 | to_blocks); |
1474 | 0 | delete working_set; |
1475 | 0 | work_it.forward(); |
1476 | 0 | } |
1477 | 0 | } |
1478 | | |
1479 | | // Helper reflects a list of blobs in the y-axis. |
1480 | | // Only reflects the BLOBNBOX bounding box. Not the blobs or outlines below. |
1481 | 0 | static void ReflectBlobList(BLOBNBOX_LIST *bblobs) { |
1482 | 0 | BLOBNBOX_IT it(bblobs); |
1483 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
1484 | 0 | it.data()->reflect_box_in_y_axis(); |
1485 | 0 | } |
1486 | 0 | } |
1487 | | |
1488 | | // Reflect the blob boxes (but not the outlines) in the y-axis so that |
1489 | | // the blocks get created in the correct RTL order. Reflects the blobs |
1490 | | // in the input_block and the bblobs list. |
1491 | | // The reflection is undone in RotateAndReskewBlocks by |
1492 | | // reflecting the blocks themselves, and then recomputing the blob bounding |
1493 | | // boxes. |
1494 | 0 | void ColumnFinder::ReflectForRtl(TO_BLOCK *input_block, BLOBNBOX_LIST *bblobs) { |
1495 | 0 | ReflectBlobList(bblobs); |
1496 | 0 | ReflectBlobList(&input_block->blobs); |
1497 | 0 | ReflectBlobList(&input_block->small_blobs); |
1498 | 0 | ReflectBlobList(&input_block->noise_blobs); |
1499 | 0 | ReflectBlobList(&input_block->large_blobs); |
1500 | | // Update the denorm with the reflection. |
1501 | 0 | auto *new_denorm = new DENORM; |
1502 | 0 | new_denorm->SetupNormalization(nullptr, nullptr, denorm_, 0.0f, 0.0f, -1.0f, 1.0f, 0.0f, 0.0f); |
1503 | 0 | denorm_ = new_denorm; |
1504 | 0 | } |
1505 | | |
1506 | | // Helper fixes up blobs and cblobs to match the desired rotation, |
1507 | | // exploding multi-outline blobs back to single blobs and accumulating |
1508 | | // the bounding box widths and heights. |
1509 | | static void RotateAndExplodeBlobList(const FCOORD &blob_rotation, BLOBNBOX_LIST *bblobs, |
1510 | 0 | STATS *widths, STATS *heights) { |
1511 | 0 | BLOBNBOX_IT it(bblobs); |
1512 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
1513 | 0 | BLOBNBOX *blob = it.data(); |
1514 | 0 | C_BLOB *cblob = blob->cblob(); |
1515 | 0 | C_OUTLINE_LIST *outlines = cblob->out_list(); |
1516 | 0 | C_OUTLINE_IT ol_it(outlines); |
1517 | 0 | if (!outlines->singleton()) { |
1518 | | // This blob has multiple outlines from CJK repair. |
1519 | | // Explode the blob back into individual outlines. |
1520 | 0 | for (; !ol_it.empty(); ol_it.forward()) { |
1521 | 0 | C_OUTLINE *outline = ol_it.extract(); |
1522 | 0 | BLOBNBOX *new_blob = BLOBNBOX::RealBlob(outline); |
1523 | | // This blob will be revisited later since we add_after_stay_put here. |
1524 | | // This means it will get rotated and have its width/height added to |
1525 | | // the stats below. |
1526 | 0 | it.add_after_stay_put(new_blob); |
1527 | 0 | } |
1528 | 0 | it.extract(); |
1529 | 0 | delete blob; |
1530 | 0 | } else { |
1531 | 0 | if (blob_rotation.x() != 1.0f || blob_rotation.y() != 0.0f) { |
1532 | 0 | cblob->rotate(blob_rotation); |
1533 | 0 | } |
1534 | 0 | blob->compute_bounding_box(); |
1535 | 0 | widths->add(blob->bounding_box().width(), 1); |
1536 | 0 | heights->add(blob->bounding_box().height(), 1); |
1537 | 0 | } |
1538 | 0 | } |
1539 | 0 | } |
1540 | | |
1541 | | // Undo the deskew that was done in FindTabVectors, as recognition is done |
1542 | | // without correcting blobs or blob outlines for skew. |
1543 | | // Reskew the completed blocks to put them back to the original rotated coords |
1544 | | // that were created by CorrectOrientation. |
1545 | | // If the input_is_rtl, then reflect the blocks in the y-axis to undo the |
1546 | | // reflection that was done before FindTabVectors. |
1547 | | // Blocks that were identified as vertical text (relative to the rotated |
1548 | | // coordinates) are further rotated so the text lines are horizontal. |
1549 | | // blob polygonal outlines are rotated to match the position of the blocks |
1550 | | // that they are in, and their bounding boxes are recalculated to be accurate. |
1551 | | // Record appropriate inverse transformations and required |
1552 | | // classifier transformation in the blocks. |
1553 | 0 | void ColumnFinder::RotateAndReskewBlocks(bool input_is_rtl, TO_BLOCK_LIST *blocks) { |
1554 | 0 | if (input_is_rtl) { |
1555 | | // The skew is backwards because of the reflection. |
1556 | 0 | FCOORD tmp = deskew_; |
1557 | 0 | deskew_ = reskew_; |
1558 | 0 | reskew_ = tmp; |
1559 | 0 | } |
1560 | 0 | TO_BLOCK_IT it(blocks); |
1561 | 0 | int block_index = 1; |
1562 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
1563 | 0 | TO_BLOCK *to_block = it.data(); |
1564 | 0 | BLOCK *block = to_block->block; |
1565 | | // Blocks are created on the deskewed blob outlines in TransformToBlocks() |
1566 | | // so we need to reskew them back to page coordinates. |
1567 | 0 | if (input_is_rtl) { |
1568 | 0 | block->reflect_polygon_in_y_axis(); |
1569 | 0 | } |
1570 | 0 | block->rotate(reskew_); |
1571 | | // Copy the right_to_left flag to the created block. |
1572 | 0 | block->set_right_to_left(input_is_rtl); |
1573 | | // Save the skew angle in the block for baseline computations. |
1574 | 0 | block->set_skew(reskew_); |
1575 | 0 | block->pdblk.set_index(block_index++); |
1576 | 0 | FCOORD blob_rotation = ComputeBlockAndClassifyRotation(block); |
1577 | | // Rotate all the blobs if needed and recompute the bounding boxes. |
1578 | | // Compute the block median blob width and height as we go. |
1579 | 0 | STATS widths(0, block->pdblk.bounding_box().width() - 1); |
1580 | 0 | STATS heights(0, block->pdblk.bounding_box().height() - 1); |
1581 | 0 | RotateAndExplodeBlobList(blob_rotation, &to_block->blobs, &widths, &heights); |
1582 | 0 | TO_ROW_IT row_it(to_block->get_rows()); |
1583 | 0 | for (row_it.mark_cycle_pt(); !row_it.cycled_list(); row_it.forward()) { |
1584 | 0 | TO_ROW *row = row_it.data(); |
1585 | 0 | RotateAndExplodeBlobList(blob_rotation, row->blob_list(), &widths, &heights); |
1586 | 0 | } |
1587 | 0 | block->set_median_size(static_cast<int>(widths.median() + 0.5), |
1588 | 0 | static_cast<int>(heights.median() + 0.5)); |
1589 | 0 | if (textord_debug_tabfind >= 2) { |
1590 | 0 | tprintf("Block median size = (%d, %d)\n", block->median_size().x(), block->median_size().y()); |
1591 | 0 | } |
1592 | 0 | } |
1593 | 0 | } |
1594 | | |
1595 | | // Computes the rotations for the block (to make textlines horizontal) and |
1596 | | // for the blobs (for classification) and sets the appropriate members |
1597 | | // of the given block. |
1598 | | // Returns the rotation that needs to be applied to the blobs to make |
1599 | | // them sit in the rotated block. |
1600 | 0 | FCOORD ColumnFinder::ComputeBlockAndClassifyRotation(BLOCK *block) { |
1601 | | // The text_rotation_ tells us the gross page text rotation that needs |
1602 | | // to be applied for classification |
1603 | | // TODO(rays) find block-level classify rotation by orientation detection. |
1604 | | // In the mean time, assume that "up" for text printed in the minority |
1605 | | // direction (PT_VERTICAL_TEXT) is perpendicular to the line of reading. |
1606 | | // Accomplish this by zero-ing out the text rotation. This covers the |
1607 | | // common cases of image credits in documents written in Latin scripts |
1608 | | // and page headings for predominantly vertically written CJK books. |
1609 | 0 | FCOORD classify_rotation(text_rotation_); |
1610 | 0 | FCOORD block_rotation(1.0f, 0.0f); |
1611 | 0 | if (block->pdblk.poly_block()->isA() == PT_VERTICAL_TEXT) { |
1612 | | // Vertical text needs to be 90 degrees rotated relative to the rest. |
1613 | | // If the rest has a 90 degree rotation already, use the inverse, making |
1614 | | // the vertical text the original way up. Otherwise use 90 degrees |
1615 | | // clockwise. |
1616 | 0 | if (rerotate_.x() == 0.0f) { |
1617 | 0 | block_rotation = rerotate_; |
1618 | 0 | } else { |
1619 | 0 | block_rotation = FCOORD(0.0f, -1.0f); |
1620 | 0 | } |
1621 | 0 | block->rotate(block_rotation); |
1622 | 0 | classify_rotation = FCOORD(1.0f, 0.0f); |
1623 | 0 | } |
1624 | 0 | block_rotation.rotate(rotation_); |
1625 | | // block_rotation is now what we have done to the blocks. Now do the same |
1626 | | // thing to the blobs, but save the inverse rotation in the block, as that |
1627 | | // is what we need to DENORM back to the image coordinates. |
1628 | 0 | FCOORD blob_rotation(block_rotation); |
1629 | 0 | block_rotation.set_y(-block_rotation.y()); |
1630 | 0 | block->set_re_rotation(block_rotation); |
1631 | 0 | block->set_classify_rotation(classify_rotation); |
1632 | 0 | if (textord_debug_tabfind) { |
1633 | 0 | tprintf("Blk %d, type %d rerotation(%.2f, %.2f), char(%.2f,%.2f), box:", block->pdblk.index(), |
1634 | 0 | block->pdblk.poly_block()->isA(), block->re_rotation().x(), block->re_rotation().y(), |
1635 | 0 | classify_rotation.x(), classify_rotation.y()); |
1636 | 0 | block->pdblk.bounding_box().print(); |
1637 | 0 | } |
1638 | 0 | return blob_rotation; |
1639 | 0 | } |
1640 | | |
1641 | | } // namespace tesseract. |