/src/tesseract/src/textord/alignedblob.cpp
Line | Count | Source (jump to first uncovered line) |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: alignedblob.cpp |
3 | | // Description: Subclass of BBGrid to find vertically aligned blobs. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2008, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifdef HAVE_CONFIG_H |
20 | | # include "config_auto.h" |
21 | | #endif |
22 | | |
23 | | #include "alignedblob.h" |
24 | | |
25 | | #include <algorithm> |
26 | | |
27 | | namespace tesseract { |
28 | | |
29 | | INT_VAR(textord_debug_tabfind, 0, "Debug tab finding"); |
30 | | INT_VAR(textord_debug_bugs, 0, "Turn on output related to bugs in tab finding"); |
31 | | static INT_VAR(textord_testregion_left, -1, |
32 | | "Left edge of debug reporting rectangle in Leptonica coords " |
33 | | "(bottom=0/top=height), with horizontal lines x/y-flipped"); |
34 | | static INT_VAR(textord_testregion_top, INT32_MAX, |
35 | | "Top edge of debug reporting rectangle in Leptonica coords " |
36 | | "(bottom=0/top=height), with horizontal lines x/y-flipped"); |
37 | | static INT_VAR(textord_testregion_right, INT32_MAX, |
38 | | "Right edge of debug rectangle in Leptonica coords " |
39 | | "(bottom=0/top=height), with horizontal lines x/y-flipped"); |
40 | | static INT_VAR(textord_testregion_bottom, -1, |
41 | | "Bottom edge of debug rectangle in Leptonica coords " |
42 | | "(bottom=0/top=height), with horizontal lines x/y-flipped"); |
43 | | BOOL_VAR(textord_debug_printable, false, "Make debug windows printable"); |
44 | | |
45 | | // Fraction of resolution used as alignment tolerance for aligned tabs. |
46 | | const double kAlignedFraction = 0.03125; |
47 | | // Fraction of resolution used as alignment tolerance for ragged tabs. |
48 | | const double kRaggedFraction = 2.5; |
49 | | // Fraction of height used as a minimum gutter gap for aligned blobs. |
50 | | const double kAlignedGapFraction = 0.75; |
51 | | // Fraction of height used as a minimum gutter gap for ragged tabs. |
52 | | const double kRaggedGapFraction = 1.0; |
53 | | // Constant number of pixels used as alignment tolerance for line finding. |
54 | | const int kVLineAlignment = 3; |
55 | | // Constant number of pixels used as gutter gap tolerance for line finding. |
56 | | const int kVLineGutter = 1; |
57 | | // Constant number of pixels used as the search size for line finding. |
58 | | const int kVLineSearchSize = 150; |
59 | | // Min number of points to accept for a ragged tab stop. |
60 | | const int kMinRaggedTabs = 5; |
61 | | // Min number of points to accept for an aligned tab stop. |
62 | | const int kMinAlignedTabs = 4; |
63 | | // Constant number of pixels minimum height of a vertical line. |
64 | | const int kVLineMinLength = 300; |
65 | | // Minimum gradient for a vertical tab vector. Used to prune away junk |
66 | | // tab vectors with what would be a ridiculously large skew angle. |
67 | | // Value corresponds to tan(90 - max allowed skew angle) |
68 | | const double kMinTabGradient = 4.0; |
69 | | // Tolerance to skew on top of current estimate of skew. Divide x or y length |
70 | | // by kMaxSkewFactor to get the y or x skew distance. |
71 | | // If the angle is small, the angle in degrees is roughly 60/kMaxSkewFactor. |
72 | | const int kMaxSkewFactor = 15; |
73 | | |
74 | | // Constructor to set the parameters for finding aligned and ragged tabs. |
75 | | // Vertical_x and vertical_y are the current estimates of the true vertical |
76 | | // direction (up) in the image. Height is the height of the starter blob. |
77 | | // v_gap_multiple is the multiple of height that will be used as a limit |
78 | | // on vertical gap before giving up and calling the line ended. |
79 | | // resolution is the original image resolution, and align0 indicates the |
80 | | // type of tab stop to be found. |
81 | | AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int height, int v_gap_multiple, |
82 | | int min_gutter_width, int resolution, TabAlignment align0) |
83 | 0 | : right_tab(align0 == TA_RIGHT_RAGGED || align0 == TA_RIGHT_ALIGNED) |
84 | 0 | , ragged(align0 == TA_LEFT_RAGGED || align0 == TA_RIGHT_RAGGED) |
85 | 0 | , alignment(align0) |
86 | 0 | , confirmed_type(TT_CONFIRMED) |
87 | 0 | , min_length(0) { |
88 | | // Set the tolerances according to the type of line sought. |
89 | | // For tab search, these are based on the image resolution for most, or |
90 | | // the height of the starting blob for the maximum vertical gap. |
91 | 0 | max_v_gap = height * v_gap_multiple; |
92 | 0 | if (ragged) { |
93 | | // In the case of a ragged edge, we are much more generous with the |
94 | | // inside alignment fraction, but also require a much bigger gutter. |
95 | 0 | gutter_fraction = kRaggedGapFraction; |
96 | 0 | if (alignment == TA_RIGHT_RAGGED) { |
97 | 0 | l_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); |
98 | 0 | r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
99 | 0 | } else { |
100 | 0 | l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
101 | 0 | r_align_tolerance = static_cast<int>(resolution * kRaggedFraction + 0.5); |
102 | 0 | } |
103 | 0 | min_points = kMinRaggedTabs; |
104 | 0 | } else { |
105 | 0 | gutter_fraction = kAlignedGapFraction; |
106 | 0 | l_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
107 | 0 | r_align_tolerance = static_cast<int>(resolution * kAlignedFraction + 0.5); |
108 | 0 | min_points = kMinAlignedTabs; |
109 | 0 | } |
110 | 0 | min_gutter = static_cast<int>(height * gutter_fraction + 0.5); |
111 | 0 | if (min_gutter < min_gutter_width) { |
112 | 0 | min_gutter = min_gutter_width; |
113 | 0 | } |
114 | | // Fit the vertical vector into an ICOORD, which is 16 bit. |
115 | 0 | set_vertical(vertical_x, vertical_y); |
116 | 0 | } |
117 | | |
118 | | // Constructor to set the parameters for finding vertical lines. |
119 | | // Vertical_x and vertical_y are the current estimates of the true vertical |
120 | | // direction (up) in the image. Width is the width of the starter blob. |
121 | | AlignedBlobParams::AlignedBlobParams(int vertical_x, int vertical_y, int width) |
122 | 0 | : gutter_fraction(0.0) |
123 | 0 | , right_tab(false) |
124 | 0 | , ragged(false) |
125 | 0 | , alignment(TA_SEPARATOR) |
126 | 0 | , confirmed_type(TT_VLINE) |
127 | 0 | , max_v_gap(kVLineSearchSize) |
128 | 0 | , min_gutter(kVLineGutter) |
129 | 0 | , min_points(1) |
130 | 0 | , min_length(kVLineMinLength) { |
131 | | // Compute threshold for left and right alignment. |
132 | 0 | l_align_tolerance = std::max(kVLineAlignment, width); |
133 | 0 | r_align_tolerance = std::max(kVLineAlignment, width); |
134 | | |
135 | | // Fit the vertical vector into an ICOORD, which is 16 bit. |
136 | 0 | set_vertical(vertical_x, vertical_y); |
137 | 0 | } |
138 | | |
139 | | // Fit the vertical vector into an ICOORD, which is 16 bit. |
140 | 0 | void AlignedBlobParams::set_vertical(int vertical_x, int vertical_y) { |
141 | 0 | int factor = 1; |
142 | 0 | if (vertical_y > INT16_MAX) { |
143 | 0 | factor = vertical_y / INT16_MAX + 1; |
144 | 0 | } |
145 | 0 | vertical.set_x(vertical_x / factor); |
146 | 0 | vertical.set_y(vertical_y / factor); |
147 | 0 | } |
148 | | |
149 | | AlignedBlob::AlignedBlob(int gridsize, const ICOORD &bleft, const ICOORD &tright) |
150 | 0 | : BlobGrid(gridsize, bleft, tright) {} |
151 | | |
152 | | // Return true if the given coordinates are within the test rectangle |
153 | | // and the debug level is at least the given detail level. |
154 | 0 | bool AlignedBlob::WithinTestRegion(int detail_level, int x, int y) { |
155 | 0 | if (textord_debug_tabfind < detail_level) { |
156 | 0 | return false; |
157 | 0 | } |
158 | 0 | return x >= textord_testregion_left && x <= textord_testregion_right && |
159 | 0 | y <= textord_testregion_top && y >= textord_testregion_bottom; |
160 | 0 | } |
161 | | |
162 | | #ifndef GRAPHICS_DISABLED |
163 | | |
164 | | // Display the tab codes of the BLOBNBOXes in this grid. |
165 | | ScrollView *AlignedBlob::DisplayTabs(const char *window_name, ScrollView *tab_win) { |
166 | | if (tab_win == nullptr) { |
167 | | tab_win = MakeWindow(0, 50, window_name); |
168 | | } |
169 | | // For every tab in the grid, display it. |
170 | | BlobGridSearch gsearch(this); |
171 | | gsearch.StartFullSearch(); |
172 | | BLOBNBOX *bbox; |
173 | | while ((bbox = gsearch.NextFullSearch()) != nullptr) { |
174 | | const TBOX &box = bbox->bounding_box(); |
175 | | int left_x = box.left(); |
176 | | int right_x = box.right(); |
177 | | int top_y = box.top(); |
178 | | int bottom_y = box.bottom(); |
179 | | TabType tabtype = bbox->left_tab_type(); |
180 | | if (tabtype != TT_NONE) { |
181 | | if (tabtype == TT_MAYBE_ALIGNED) { |
182 | | tab_win->Pen(ScrollView::BLUE); |
183 | | } else if (tabtype == TT_MAYBE_RAGGED) { |
184 | | tab_win->Pen(ScrollView::YELLOW); |
185 | | } else if (tabtype == TT_CONFIRMED) { |
186 | | tab_win->Pen(ScrollView::GREEN); |
187 | | } else { |
188 | | tab_win->Pen(ScrollView::GREY); |
189 | | } |
190 | | tab_win->Line(left_x, top_y, left_x, bottom_y); |
191 | | } |
192 | | tabtype = bbox->right_tab_type(); |
193 | | if (tabtype != TT_NONE) { |
194 | | if (tabtype == TT_MAYBE_ALIGNED) { |
195 | | tab_win->Pen(ScrollView::MAGENTA); |
196 | | } else if (tabtype == TT_MAYBE_RAGGED) { |
197 | | tab_win->Pen(ScrollView::ORANGE); |
198 | | } else if (tabtype == TT_CONFIRMED) { |
199 | | tab_win->Pen(ScrollView::RED); |
200 | | } else { |
201 | | tab_win->Pen(ScrollView::GREY); |
202 | | } |
203 | | tab_win->Line(right_x, top_y, right_x, bottom_y); |
204 | | } |
205 | | } |
206 | | tab_win->Update(); |
207 | | return tab_win; |
208 | | } |
209 | | |
210 | | #endif // !GRAPHICS_DISABLED |
211 | | |
212 | | // Helper returns true if the total number of line_crossings of all the blobs |
213 | | // in the list is at least 2. |
214 | 0 | static bool AtLeast2LineCrossings(BLOBNBOX_CLIST *blobs) { |
215 | 0 | BLOBNBOX_C_IT it(blobs); |
216 | 0 | int total_crossings = 0; |
217 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
218 | 0 | total_crossings += it.data()->line_crossings(); |
219 | 0 | } |
220 | 0 | return total_crossings >= 2; |
221 | 0 | } |
222 | | |
223 | | // Destructor. |
224 | | // It is defined here, so the compiler can create a single vtable |
225 | | // instead of weak vtables in every compilation unit. |
226 | 0 | AlignedBlob::~AlignedBlob() = default; |
227 | | |
228 | | // Finds a vector corresponding to a set of vertically aligned blob edges |
229 | | // running through the given box. The type of vector returned and the |
230 | | // search parameters are determined by the AlignedBlobParams. |
231 | | // vertical_x and y are updated with an estimate of the real |
232 | | // vertical direction. (skew finding.) |
233 | | // Returns nullptr if no decent vector can be found. |
234 | | TabVector *AlignedBlob::FindVerticalAlignment(AlignedBlobParams align_params, BLOBNBOX *bbox, |
235 | 0 | int *vertical_x, int *vertical_y) { |
236 | 0 | int ext_start_y, ext_end_y; |
237 | 0 | BLOBNBOX_CLIST good_points; |
238 | | // Search up and then down from the starting bbox. |
239 | 0 | TBOX box = bbox->bounding_box(); |
240 | 0 | bool debug = WithinTestRegion(2, box.left(), box.bottom()); |
241 | 0 | int pt_count = AlignTabs(align_params, false, bbox, &good_points, &ext_end_y); |
242 | 0 | pt_count += AlignTabs(align_params, true, bbox, &good_points, &ext_start_y); |
243 | 0 | BLOBNBOX_C_IT it(&good_points); |
244 | 0 | it.move_to_last(); |
245 | 0 | box = it.data()->bounding_box(); |
246 | 0 | int end_y = box.top(); |
247 | 0 | int end_x = align_params.right_tab ? box.right() : box.left(); |
248 | 0 | it.move_to_first(); |
249 | 0 | box = it.data()->bounding_box(); |
250 | 0 | int start_x = align_params.right_tab ? box.right() : box.left(); |
251 | 0 | int start_y = box.bottom(); |
252 | | // Acceptable tab vectors must have a minimum number of points, |
253 | | // have a minimum acceptable length, and have a minimum gradient. |
254 | | // The gradient corresponds to the skew angle. |
255 | | // Ragged tabs don't need to satisfy the gradient condition, as they |
256 | | // will always end up parallel to the vertical direction. |
257 | 0 | bool at_least_2_crossings = AtLeast2LineCrossings(&good_points); |
258 | 0 | if ((pt_count >= align_params.min_points && end_y - start_y >= align_params.min_length && |
259 | 0 | (align_params.ragged || end_y - start_y >= abs(end_x - start_x) * kMinTabGradient)) || |
260 | 0 | at_least_2_crossings) { |
261 | 0 | int confirmed_points = 0; |
262 | | // Count existing confirmed points to see if vector is acceptable. |
263 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
264 | 0 | bbox = it.data(); |
265 | 0 | if (align_params.right_tab) { |
266 | 0 | if (bbox->right_tab_type() == align_params.confirmed_type) { |
267 | 0 | ++confirmed_points; |
268 | 0 | } |
269 | 0 | } else { |
270 | 0 | if (bbox->left_tab_type() == align_params.confirmed_type) { |
271 | 0 | ++confirmed_points; |
272 | 0 | } |
273 | 0 | } |
274 | 0 | } |
275 | | // Ragged vectors are not allowed to use too many already used points. |
276 | 0 | if (!align_params.ragged || confirmed_points + confirmed_points < pt_count) { |
277 | 0 | const TBOX &box = bbox->bounding_box(); |
278 | 0 | if (debug) { |
279 | 0 | tprintf("Confirming tab vector of %d pts starting at %d,%d\n", pt_count, box.left(), |
280 | 0 | box.bottom()); |
281 | 0 | } |
282 | | // Flag all the aligned neighbours as confirmed . |
283 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
284 | 0 | bbox = it.data(); |
285 | 0 | if (align_params.right_tab) { |
286 | 0 | bbox->set_right_tab_type(align_params.confirmed_type); |
287 | 0 | } else { |
288 | 0 | bbox->set_left_tab_type(align_params.confirmed_type); |
289 | 0 | } |
290 | 0 | if (debug) { |
291 | 0 | bbox->bounding_box().print(); |
292 | 0 | } |
293 | 0 | } |
294 | | // Now make the vector and return it. |
295 | 0 | TabVector *result = |
296 | 0 | TabVector::FitVector(align_params.alignment, align_params.vertical, ext_start_y, |
297 | 0 | ext_end_y, &good_points, vertical_x, vertical_y); |
298 | 0 | result->set_intersects_other_lines(at_least_2_crossings); |
299 | 0 | if (debug) { |
300 | 0 | tprintf("Box was %d, %d\n", box.left(), box.bottom()); |
301 | 0 | result->Print("After fitting"); |
302 | 0 | } |
303 | 0 | return result; |
304 | 0 | } else if (debug) { |
305 | 0 | tprintf("Ragged tab used too many used points: %d out of %d\n", confirmed_points, pt_count); |
306 | 0 | } |
307 | 0 | } else if (debug) { |
308 | 0 | tprintf( |
309 | 0 | "Tab vector failed basic tests: pt count %d vs min %d, " |
310 | 0 | "length %d vs min %d, min grad %g\n", |
311 | 0 | pt_count, align_params.min_points, end_y - start_y, align_params.min_length, |
312 | 0 | abs(end_x - start_x) * kMinTabGradient); |
313 | 0 | } |
314 | 0 | return nullptr; |
315 | 0 | } |
316 | | |
317 | | // Find a set of blobs that are aligned in the given vertical |
318 | | // direction with the given blob. Returns a list of aligned |
319 | | // blobs and the number in the list. |
320 | | // For other parameters see FindAlignedBlob below. |
321 | | int AlignedBlob::AlignTabs(const AlignedBlobParams ¶ms, bool top_to_bottom, BLOBNBOX *bbox, |
322 | 0 | BLOBNBOX_CLIST *good_points, int *end_y) { |
323 | 0 | int ptcount = 0; |
324 | 0 | BLOBNBOX_C_IT it(good_points); |
325 | |
|
326 | 0 | TBOX box = bbox->bounding_box(); |
327 | 0 | bool debug = WithinTestRegion(2, box.left(), box.bottom()); |
328 | 0 | if (debug) { |
329 | 0 | tprintf("Starting alignment run at blob:"); |
330 | 0 | box.print(); |
331 | 0 | } |
332 | 0 | int x_start = params.right_tab ? box.right() : box.left(); |
333 | 0 | while (bbox != nullptr) { |
334 | | // Add the blob to the list if the appropriate side is a tab candidate, |
335 | | // or if we are working on a ragged tab. |
336 | 0 | TabType type = params.right_tab ? bbox->right_tab_type() : bbox->left_tab_type(); |
337 | 0 | if (((type != TT_NONE && type != TT_MAYBE_RAGGED) || params.ragged) && |
338 | 0 | (it.empty() || it.data() != bbox)) { |
339 | 0 | if (top_to_bottom) { |
340 | 0 | it.add_before_then_move(bbox); |
341 | 0 | } else { |
342 | 0 | it.add_after_then_move(bbox); |
343 | 0 | } |
344 | 0 | ++ptcount; |
345 | 0 | } |
346 | | // Find the next blob that is aligned with the current one. |
347 | | // FindAlignedBlob guarantees that forward progress will be made in the |
348 | | // top_to_bottom direction, and therefore eventually it will return nullptr, |
349 | | // making this while (bbox != nullptr) loop safe. |
350 | 0 | bbox = FindAlignedBlob(params, top_to_bottom, bbox, x_start, end_y); |
351 | 0 | if (bbox != nullptr) { |
352 | 0 | box = bbox->bounding_box(); |
353 | 0 | if (!params.ragged) { |
354 | 0 | x_start = params.right_tab ? box.right() : box.left(); |
355 | 0 | } |
356 | 0 | } |
357 | 0 | } |
358 | 0 | if (debug) { |
359 | 0 | tprintf("Alignment run ended with %d pts at blob:", ptcount); |
360 | 0 | box.print(); |
361 | 0 | } |
362 | 0 | return ptcount; |
363 | 0 | } |
364 | | |
365 | | // Search vertically for a blob that is aligned with the input bbox. |
366 | | // The search parameters are determined by AlignedBlobParams. |
367 | | // top_to_bottom tells whether to search down or up. |
368 | | // The return value is nullptr if nothing was found in the search box |
369 | | // or if a blob was found in the gutter. On a nullptr return, end_y |
370 | | // is set to the edge of the search box or the leading edge of the |
371 | | // gutter blob if one was found. |
372 | | BLOBNBOX *AlignedBlob::FindAlignedBlob(const AlignedBlobParams &p, bool top_to_bottom, |
373 | 0 | BLOBNBOX *bbox, int x_start, int *end_y) { |
374 | 0 | TBOX box = bbox->bounding_box(); |
375 | | // If there are separator lines, get the column edges. |
376 | 0 | int left_column_edge = bbox->left_rule(); |
377 | 0 | int right_column_edge = bbox->right_rule(); |
378 | | // start_y is used to guarantee that forward progress is made and the |
379 | | // search does not go into an infinite loop. New blobs must extend the |
380 | | // line beyond start_y. |
381 | 0 | int start_y = top_to_bottom ? box.bottom() : box.top(); |
382 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
383 | 0 | tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", box.left(), box.top(), |
384 | 0 | box.right(), box.bottom(), left_column_edge, right_column_edge); |
385 | 0 | } |
386 | | // Compute skew tolerance. |
387 | 0 | int skew_tolerance = p.max_v_gap / kMaxSkewFactor; |
388 | | // Calculate xmin and xmax of the search box so that it contains |
389 | | // all possibly relevant boxes up to p.max_v_gap above or below according |
390 | | // to top_to_bottom. |
391 | | // Start with a notion of vertical with the current estimate. |
392 | 0 | int x2 = (p.max_v_gap * p.vertical.x() + p.vertical.y() / 2) / p.vertical.y(); |
393 | 0 | if (top_to_bottom) { |
394 | 0 | x2 = x_start - x2; |
395 | 0 | *end_y = start_y - p.max_v_gap; |
396 | 0 | } else { |
397 | 0 | x2 = x_start + x2; |
398 | 0 | *end_y = start_y + p.max_v_gap; |
399 | 0 | } |
400 | | // Expand the box by an additional skew tolerance |
401 | 0 | int xmin = std::min(x_start, x2) - skew_tolerance; |
402 | 0 | int xmax = std::max(x_start, x2) + skew_tolerance; |
403 | | // Now add direction-specific tolerances. |
404 | 0 | if (p.right_tab) { |
405 | 0 | xmax += p.min_gutter; |
406 | 0 | xmin -= p.l_align_tolerance; |
407 | 0 | } else { |
408 | 0 | xmax += p.r_align_tolerance; |
409 | 0 | xmin -= p.min_gutter; |
410 | 0 | } |
411 | | // Setup a vertical search for an aligned blob. |
412 | 0 | BlobGridSearch vsearch(this); |
413 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
414 | 0 | tprintf("Starting %s %s search at %d-%d,%d, search_size=%d, gutter=%d\n", |
415 | 0 | p.ragged ? "Ragged" : "Aligned", p.right_tab ? "Right" : "Left", xmin, xmax, start_y, |
416 | 0 | p.max_v_gap, p.min_gutter); |
417 | 0 | } |
418 | 0 | vsearch.StartVerticalSearch(xmin, xmax, start_y); |
419 | | // result stores the best real return value. |
420 | 0 | BLOBNBOX *result = nullptr; |
421 | | // The backup_result is not a tab candidate and can be used if no |
422 | | // real tab candidate result is found. |
423 | 0 | BLOBNBOX *backup_result = nullptr; |
424 | | // neighbour is the blob that is currently being investigated. |
425 | 0 | BLOBNBOX *neighbour = nullptr; |
426 | 0 | while ((neighbour = vsearch.NextVerticalSearch(top_to_bottom)) != nullptr) { |
427 | 0 | if (neighbour == bbox) { |
428 | 0 | continue; |
429 | 0 | } |
430 | 0 | TBOX nbox = neighbour->bounding_box(); |
431 | 0 | int n_y = (nbox.top() + nbox.bottom()) / 2; |
432 | 0 | if ((!top_to_bottom && n_y > start_y + p.max_v_gap) || |
433 | 0 | (top_to_bottom && n_y < start_y - p.max_v_gap)) { |
434 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
435 | 0 | tprintf("Neighbour too far at (%d,%d)->(%d,%d)\n", nbox.left(), nbox.bottom(), nbox.right(), |
436 | 0 | nbox.top()); |
437 | 0 | } |
438 | 0 | break; // Gone far enough. |
439 | 0 | } |
440 | | // It is CRITICAL to ensure that forward progress is made, (strictly |
441 | | // in/decreasing n_y) or the caller could loop infinitely, while |
442 | | // waiting for a sequence of blobs in a line to end. |
443 | | // NextVerticalSearch alone does not guarantee this, as there may be |
444 | | // more than one blob in a grid cell. See comment in AlignTabs. |
445 | 0 | if ((n_y < start_y) != top_to_bottom || nbox.y_overlap(box)) { |
446 | 0 | continue; // Only look in the required direction. |
447 | 0 | } |
448 | 0 | if (result != nullptr && result->bounding_box().y_gap(nbox) > gridsize()) { |
449 | 0 | return result; // This result is clear. |
450 | 0 | } |
451 | 0 | if (backup_result != nullptr && p.ragged && result == nullptr && |
452 | 0 | backup_result->bounding_box().y_gap(nbox) > gridsize()) { |
453 | 0 | return backup_result; // This result is clear. |
454 | 0 | } |
455 | | |
456 | | // If the neighbouring blob is the wrong side of a separator line, then it |
457 | | // "doesn't exist" as far as we are concerned. |
458 | 0 | int x_at_n_y = x_start + (n_y - start_y) * p.vertical.x() / p.vertical.y(); |
459 | 0 | if (x_at_n_y < neighbour->left_crossing_rule() || x_at_n_y > neighbour->right_crossing_rule()) { |
460 | 0 | continue; // Separator line in the way. |
461 | 0 | } |
462 | 0 | int n_left = nbox.left(); |
463 | 0 | int n_right = nbox.right(); |
464 | 0 | int n_x = p.right_tab ? n_right : n_left; |
465 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
466 | 0 | tprintf("neighbour at (%d,%d)->(%d,%d), n_x=%d, n_y=%d, xatn=%d\n", nbox.left(), |
467 | 0 | nbox.bottom(), nbox.right(), nbox.top(), n_x, n_y, x_at_n_y); |
468 | 0 | } |
469 | 0 | if (p.right_tab && n_left < x_at_n_y + p.min_gutter && |
470 | 0 | n_right > x_at_n_y + p.r_align_tolerance && |
471 | 0 | (p.ragged || n_left < x_at_n_y + p.gutter_fraction * nbox.height())) { |
472 | | // In the gutter so end of line. |
473 | 0 | if (bbox->right_tab_type() >= TT_MAYBE_ALIGNED) { |
474 | 0 | bbox->set_right_tab_type(TT_DELETED); |
475 | 0 | } |
476 | 0 | *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); |
477 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
478 | 0 | tprintf("gutter\n"); |
479 | 0 | } |
480 | 0 | return nullptr; |
481 | 0 | } |
482 | 0 | if (!p.right_tab && n_left < x_at_n_y - p.l_align_tolerance && |
483 | 0 | n_right > x_at_n_y - p.min_gutter && |
484 | 0 | (p.ragged || n_right > x_at_n_y - p.gutter_fraction * nbox.height())) { |
485 | | // In the gutter so end of line. |
486 | 0 | if (bbox->left_tab_type() >= TT_MAYBE_ALIGNED) { |
487 | 0 | bbox->set_left_tab_type(TT_DELETED); |
488 | 0 | } |
489 | 0 | *end_y = top_to_bottom ? nbox.top() : nbox.bottom(); |
490 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
491 | 0 | tprintf("gutter\n"); |
492 | 0 | } |
493 | 0 | return nullptr; |
494 | 0 | } |
495 | 0 | if ((p.right_tab && neighbour->leader_on_right()) || |
496 | 0 | (!p.right_tab && neighbour->leader_on_left())) { |
497 | 0 | continue; // Neighbours of leaders are not allowed to be used. |
498 | 0 | } |
499 | 0 | if (n_x <= x_at_n_y + p.r_align_tolerance && n_x >= x_at_n_y - p.l_align_tolerance) { |
500 | | // Aligned so keep it. If it is a marked tab save it as result, |
501 | | // otherwise keep it as backup_result to return in case of later failure. |
502 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
503 | 0 | tprintf("aligned, seeking%d, l=%d, r=%d\n", p.right_tab, neighbour->left_tab_type(), |
504 | 0 | neighbour->right_tab_type()); |
505 | 0 | } |
506 | 0 | TabType n_type = p.right_tab ? neighbour->right_tab_type() : neighbour->left_tab_type(); |
507 | 0 | if (n_type != TT_NONE && (p.ragged || n_type != TT_MAYBE_RAGGED)) { |
508 | 0 | if (result == nullptr) { |
509 | 0 | result = neighbour; |
510 | 0 | } else { |
511 | | // Keep the closest neighbour by Euclidean distance. |
512 | | // This prevents it from picking a tab blob in another column. |
513 | 0 | const TBOX &old_box = result->bounding_box(); |
514 | 0 | int x_diff = p.right_tab ? old_box.right() : old_box.left(); |
515 | 0 | x_diff -= x_at_n_y; |
516 | 0 | int y_diff = (old_box.top() + old_box.bottom()) / 2 - start_y; |
517 | 0 | int old_dist = x_diff * x_diff + y_diff * y_diff; |
518 | 0 | x_diff = n_x - x_at_n_y; |
519 | 0 | y_diff = n_y - start_y; |
520 | 0 | int new_dist = x_diff * x_diff + y_diff * y_diff; |
521 | 0 | if (new_dist < old_dist) { |
522 | 0 | result = neighbour; |
523 | 0 | } |
524 | 0 | } |
525 | 0 | } else if (backup_result == nullptr) { |
526 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
527 | 0 | tprintf("Backup\n"); |
528 | 0 | } |
529 | 0 | backup_result = neighbour; |
530 | 0 | } else { |
531 | 0 | TBOX backup_box = backup_result->bounding_box(); |
532 | 0 | if ((p.right_tab && backup_box.right() < nbox.right()) || |
533 | 0 | (!p.right_tab && backup_box.left() > nbox.left())) { |
534 | 0 | if (WithinTestRegion(2, x_start, start_y)) { |
535 | 0 | tprintf("Better backup\n"); |
536 | 0 | } |
537 | 0 | backup_result = neighbour; |
538 | 0 | } |
539 | 0 | } |
540 | 0 | } |
541 | 0 | } |
542 | 0 | return result != nullptr ? result : backup_result; |
543 | 0 | } |
544 | | |
545 | | } // namespace tesseract. |