/src/tesseract/src/textord/tabvector.cpp
Line | Count | Source |
1 | | /////////////////////////////////////////////////////////////////////// |
2 | | // File: tabvector.cpp |
3 | | // Description: Class to hold a near-vertical vector representing a tab-stop. |
4 | | // Author: Ray Smith |
5 | | // |
6 | | // (C) Copyright 2008, Google Inc. |
7 | | // Licensed under the Apache License, Version 2.0 (the "License"); |
8 | | // you may not use this file except in compliance with the License. |
9 | | // You may obtain a copy of the License at |
10 | | // http://www.apache.org/licenses/LICENSE-2.0 |
11 | | // Unless required by applicable law or agreed to in writing, software |
12 | | // distributed under the License is distributed on an "AS IS" BASIS, |
13 | | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
14 | | // See the License for the specific language governing permissions and |
15 | | // limitations under the License. |
16 | | // |
17 | | /////////////////////////////////////////////////////////////////////// |
18 | | |
19 | | #ifdef HAVE_CONFIG_H |
20 | | # include "config_auto.h" |
21 | | #endif |
22 | | |
23 | | #include "blobbox.h" |
24 | | #include "colfind.h" |
25 | | #include "colpartitionset.h" |
26 | | #include "detlinefit.h" |
27 | | #include "helpers.h" // for IntCastRounded |
28 | | #include "statistc.h" |
29 | | #include "tabvector.h" |
30 | | |
31 | | #include <algorithm> |
32 | | |
33 | | namespace tesseract { |
34 | | |
35 | | // Multiple of height used as a gutter for evaluation search. |
36 | | const int kGutterMultiple = 4; |
37 | | // Multiple of neighbour gap that we expect the gutter gap to be at minimum. |
38 | | const int kGutterToNeighbourRatio = 3; |
39 | | // Pixel distance for tab vectors to be considered the same. |
40 | | const int kSimilarVectorDist = 10; |
41 | | // Pixel distance for ragged tab vectors to be considered the same if there |
42 | | // is nothing in the overlap box |
43 | | const int kSimilarRaggedDist = 50; |
44 | | // Max multiple of height to allow filling in between blobs when evaluating. |
45 | | const int kMaxFillinMultiple = 11; |
46 | | // Min fraction of mean gutter size to allow a gutter on a good tab blob. |
47 | | const double kMinGutterFraction = 0.5; |
48 | | // Multiple of 1/n lines as a minimum gutter in evaluation. |
49 | | const double kLineCountReciprocal = 4.0; |
50 | | // Constant add-on for minimum gutter for aligned tabs. |
51 | | const double kMinAlignedGutter = 0.25; |
52 | | // Constant add-on for minimum gutter for ragged tabs. |
53 | | const double kMinRaggedGutter = 1.5; |
54 | | |
55 | | double_VAR(textord_tabvector_vertical_gap_fraction, 0.5, |
56 | | "max fraction of mean blob width allowed for vertical gaps in " |
57 | | "vertical text"); |
58 | | |
59 | | double_VAR(textord_tabvector_vertical_box_ratio, 0.5, |
60 | | "Fraction of box matches required to declare a line vertical"); |
61 | | |
62 | | // Create a constraint for the top or bottom of this TabVector. |
63 | 0 | void TabConstraint::CreateConstraint(TabVector *vector, bool is_top) { |
64 | 0 | auto *constraint = new TabConstraint(vector, is_top); |
65 | 0 | auto *constraints = new TabConstraint_LIST; |
66 | 0 | TabConstraint_IT it(constraints); |
67 | 0 | it.add_to_end(constraint); |
68 | 0 | if (is_top) { |
69 | 0 | vector->set_top_constraints(constraints); |
70 | 0 | } else { |
71 | 0 | vector->set_bottom_constraints(constraints); |
72 | 0 | } |
73 | 0 | } |
74 | | |
75 | | // Test to see if the constraints are compatible enough to merge. |
76 | 0 | bool TabConstraint::CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) { |
77 | 0 | if (list1 == list2) { |
78 | 0 | return false; |
79 | 0 | } |
80 | 0 | int y_min = -INT32_MAX; |
81 | 0 | int y_max = INT32_MAX; |
82 | 0 | if (textord_debug_tabfind > 3) { |
83 | 0 | tprintf("Testing constraint compatibility\n"); |
84 | 0 | } |
85 | 0 | GetConstraints(list1, &y_min, &y_max); |
86 | 0 | GetConstraints(list2, &y_min, &y_max); |
87 | 0 | if (textord_debug_tabfind > 3) { |
88 | 0 | tprintf("Resulting range = [%d,%d]\n", y_min, y_max); |
89 | 0 | } |
90 | 0 | return y_max >= y_min; |
91 | 0 | } |
92 | | |
93 | | // Merge the lists of constraints and update the TabVector pointers. |
94 | | // The second list is deleted. |
95 | 0 | void TabConstraint::MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) { |
96 | 0 | if (list1 == list2) { |
97 | 0 | return; |
98 | 0 | } |
99 | 0 | TabConstraint_IT it(list2); |
100 | 0 | if (textord_debug_tabfind > 3) { |
101 | 0 | tprintf("Merging constraints\n"); |
102 | 0 | } |
103 | | // The vectors of all constraints on list2 are now going to be on list1. |
104 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
105 | 0 | TabConstraint *constraint = it.data(); |
106 | 0 | if (textord_debug_tabfind > 3) { |
107 | 0 | constraint->vector_->Print("Merge"); |
108 | 0 | } |
109 | 0 | if (constraint->is_top_) { |
110 | 0 | constraint->vector_->set_top_constraints(list1); |
111 | 0 | } else { |
112 | 0 | constraint->vector_->set_bottom_constraints(list1); |
113 | 0 | } |
114 | 0 | } |
115 | 0 | it = list1; |
116 | 0 | it.add_list_before(list2); |
117 | 0 | delete list2; |
118 | 0 | } |
119 | | |
120 | | // Set all the tops and bottoms as appropriate to a mean of the |
121 | | // constrained range. Delete all the constraints and list. |
122 | 0 | void TabConstraint::ApplyConstraints(TabConstraint_LIST *constraints) { |
123 | 0 | int y_min = -INT32_MAX; |
124 | 0 | int y_max = INT32_MAX; |
125 | 0 | GetConstraints(constraints, &y_min, &y_max); |
126 | 0 | int y = (y_min + y_max) / 2; |
127 | 0 | TabConstraint_IT it(constraints); |
128 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
129 | 0 | TabConstraint *constraint = it.data(); |
130 | 0 | TabVector *v = constraint->vector_; |
131 | 0 | if (constraint->is_top_) { |
132 | 0 | v->SetYEnd(y); |
133 | 0 | v->set_top_constraints(nullptr); |
134 | 0 | } else { |
135 | 0 | v->SetYStart(y); |
136 | 0 | v->set_bottom_constraints(nullptr); |
137 | 0 | } |
138 | 0 | } |
139 | 0 | delete constraints; |
140 | 0 | } |
141 | | |
142 | 0 | TabConstraint::TabConstraint(TabVector *vector, bool is_top) : vector_(vector), is_top_(is_top) { |
143 | 0 | if (is_top) { |
144 | 0 | y_min_ = vector->endpt().y(); |
145 | 0 | y_max_ = vector->extended_ymax(); |
146 | 0 | } else { |
147 | 0 | y_max_ = vector->startpt().y(); |
148 | 0 | y_min_ = vector->extended_ymin(); |
149 | 0 | } |
150 | 0 | } |
151 | | |
152 | | // Get the max of the mins and the min of the maxes. |
153 | 0 | void TabConstraint::GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max) { |
154 | 0 | TabConstraint_IT it(constraints); |
155 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
156 | 0 | TabConstraint *constraint = it.data(); |
157 | 0 | if (textord_debug_tabfind > 3) { |
158 | 0 | tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_); |
159 | 0 | constraint->vector_->Print(" for"); |
160 | 0 | } |
161 | 0 | *y_min = std::max(*y_min, constraint->y_min_); |
162 | 0 | *y_max = std::min(*y_max, constraint->y_max_); |
163 | 0 | } |
164 | 0 | } |
165 | | |
166 | | // The constructor is private. See the bottom of the file... |
167 | | |
168 | | // Public factory to build a TabVector from a list of boxes. |
169 | | // The TabVector will be of the given alignment type. |
170 | | // The input vertical vector is used in fitting, and the output |
171 | | // vertical_x, vertical_y have the resulting line vector added to them |
172 | | // if the alignment is not ragged. |
173 | | // The extended_start_y and extended_end_y are the maximum possible |
174 | | // extension to the line segment that can be used to align with others. |
175 | | // The input CLIST of BLOBNBOX good_points is consumed and taken over. |
176 | | TabVector *TabVector::FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y, |
177 | | int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x, |
178 | 0 | int *vertical_y) { |
179 | 0 | auto *vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points); |
180 | 0 | if (!vector->Fit(vertical, false)) { |
181 | 0 | delete vector; |
182 | 0 | return nullptr; |
183 | 0 | } |
184 | 0 | if (!vector->IsRagged()) { |
185 | 0 | vertical = vector->endpt_ - vector->startpt_; |
186 | 0 | int weight = vector->BoxCount(); |
187 | 0 | *vertical_x += vertical.x() * weight; |
188 | 0 | *vertical_y += vertical.y() * weight; |
189 | 0 | } |
190 | 0 | return vector; |
191 | 0 | } |
192 | | |
193 | | // Build a ragged TabVector by copying another's direction, shifting it |
194 | | // to match the given blob, and making its initial extent the height |
195 | | // of the blob, but its extended bounds from the bounds of the original. |
196 | | TabVector::TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew, |
197 | | BLOBNBOX *blob) |
198 | 0 | : extended_ymin_(src.extended_ymin_) |
199 | 0 | , extended_ymax_(src.extended_ymax_) |
200 | 0 | , needs_refit_(true) |
201 | 0 | , needs_evaluation_(true) |
202 | 0 | , alignment_(alignment) { |
203 | 0 | BLOBNBOX_C_IT it(&boxes_); |
204 | 0 | it.add_to_end(blob); |
205 | 0 | TBOX box = blob->bounding_box(); |
206 | 0 | if (IsLeftTab()) { |
207 | 0 | startpt_ = box.botleft(); |
208 | 0 | endpt_ = box.topleft(); |
209 | 0 | } else { |
210 | 0 | startpt_ = box.botright(); |
211 | 0 | endpt_ = box.topright(); |
212 | 0 | } |
213 | 0 | sort_key_ = |
214 | 0 | SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2); |
215 | 0 | if (textord_debug_tabfind > 3) { |
216 | 0 | Print("Constructed a new tab vector:"); |
217 | 0 | } |
218 | 0 | } |
219 | | |
220 | | // Copies basic attributes of a tab vector for simple operations. |
221 | | // Copies things such startpt, endpt, range. |
222 | | // Does not copy things such as partners, boxes, or constraints. |
223 | | // This is useful if you only need vector information for processing, such |
224 | | // as in the table detection code. |
225 | 0 | TabVector *TabVector::ShallowCopy() const { |
226 | 0 | auto *copy = new TabVector(); |
227 | 0 | copy->startpt_ = startpt_; |
228 | 0 | copy->endpt_ = endpt_; |
229 | 0 | copy->alignment_ = alignment_; |
230 | 0 | copy->extended_ymax_ = extended_ymax_; |
231 | 0 | copy->extended_ymin_ = extended_ymin_; |
232 | 0 | copy->intersects_other_lines_ = intersects_other_lines_; |
233 | 0 | return copy; |
234 | 0 | } |
235 | | |
236 | | // Extend this vector to include the supplied blob if it doesn't |
237 | | // already have it. |
238 | 0 | void TabVector::ExtendToBox(BLOBNBOX *new_blob) { |
239 | 0 | TBOX new_box = new_blob->bounding_box(); |
240 | 0 | BLOBNBOX_C_IT it(&boxes_); |
241 | 0 | if (!it.empty()) { |
242 | 0 | BLOBNBOX *blob = it.data(); |
243 | 0 | TBOX box = blob->bounding_box(); |
244 | 0 | while (!it.at_last() && box.top() <= new_box.top()) { |
245 | 0 | if (blob == new_blob) { |
246 | 0 | return; // We have it already. |
247 | 0 | } |
248 | 0 | it.forward(); |
249 | 0 | blob = it.data(); |
250 | 0 | box = blob->bounding_box(); |
251 | 0 | } |
252 | 0 | if (box.top() >= new_box.top()) { |
253 | 0 | it.add_before_stay_put(new_blob); |
254 | 0 | needs_refit_ = true; |
255 | 0 | return; |
256 | 0 | } |
257 | 0 | } |
258 | 0 | needs_refit_ = true; |
259 | 0 | it.add_after_stay_put(new_blob); |
260 | 0 | } |
261 | | |
262 | | // Set the ycoord of the start and move the xcoord to match. |
263 | 0 | void TabVector::SetYStart(int start_y) { |
264 | 0 | startpt_.set_x(XAtY(start_y)); |
265 | 0 | startpt_.set_y(start_y); |
266 | 0 | } |
267 | | // Set the ycoord of the end and move the xcoord to match. |
268 | 0 | void TabVector::SetYEnd(int end_y) { |
269 | 0 | endpt_.set_x(XAtY(end_y)); |
270 | 0 | endpt_.set_y(end_y); |
271 | 0 | } |
272 | | |
273 | | // Rotate the ends by the given vector. Auto flip start and end if needed. |
274 | 0 | void TabVector::Rotate(const FCOORD &rotation) { |
275 | 0 | startpt_.rotate(rotation); |
276 | 0 | endpt_.rotate(rotation); |
277 | 0 | int dx = endpt_.x() - startpt_.x(); |
278 | 0 | int dy = endpt_.y() - startpt_.y(); |
279 | 0 | if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) { |
280 | | // Need to flip start/end. |
281 | 0 | ICOORD tmp = startpt_; |
282 | 0 | startpt_ = endpt_; |
283 | 0 | endpt_ = tmp; |
284 | 0 | } |
285 | 0 | } |
286 | | |
287 | | // Setup the initial constraints, being the limits of |
288 | | // the vector and the extended ends. |
289 | 0 | void TabVector::SetupConstraints() { |
290 | 0 | TabConstraint::CreateConstraint(this, false); |
291 | 0 | TabConstraint::CreateConstraint(this, true); |
292 | 0 | } |
293 | | |
294 | | // Setup the constraints between the partners of this TabVector. |
295 | 0 | void TabVector::SetupPartnerConstraints() { |
296 | | // With the first and last partner, we want a common bottom and top, |
297 | | // respectively, and for each change of partner, we want a common |
298 | | // top of first with bottom of next. |
299 | 0 | TabVector_C_IT it(&partners_); |
300 | 0 | TabVector *prev_partner = nullptr; |
301 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
302 | 0 | TabVector *partner = it.data(); |
303 | 0 | if (partner->top_constraints_ == nullptr || partner->bottom_constraints_ == nullptr) { |
304 | 0 | partner->Print("Impossible: has no constraints"); |
305 | 0 | Print("This vector has it as a partner"); |
306 | 0 | continue; |
307 | 0 | } |
308 | 0 | if (prev_partner == nullptr) { |
309 | | // This is the first partner, so common bottom. |
310 | 0 | if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) { |
311 | 0 | TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); |
312 | 0 | } |
313 | 0 | } else { |
314 | | // We need prev top to be common with partner bottom. |
315 | 0 | if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_, |
316 | 0 | partner->bottom_constraints_)) { |
317 | 0 | TabConstraint::MergeConstraints(prev_partner->top_constraints_, |
318 | 0 | partner->bottom_constraints_); |
319 | 0 | } |
320 | 0 | } |
321 | 0 | prev_partner = partner; |
322 | 0 | if (it.at_last()) { |
323 | | // This is the last partner, so common top. |
324 | 0 | if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) { |
325 | 0 | TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); |
326 | 0 | } |
327 | 0 | } |
328 | 0 | } |
329 | 0 | } |
330 | | |
331 | | // Setup the constraints between this and its partner. |
332 | 0 | void TabVector::SetupPartnerConstraints(TabVector *partner) { |
333 | 0 | if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) { |
334 | 0 | TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_); |
335 | 0 | } |
336 | 0 | if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) { |
337 | 0 | TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_); |
338 | 0 | } |
339 | 0 | } |
340 | | |
341 | | // Use the constraints to modify the top and bottom. |
342 | 0 | void TabVector::ApplyConstraints() { |
343 | 0 | if (top_constraints_ != nullptr) { |
344 | 0 | TabConstraint::ApplyConstraints(top_constraints_); |
345 | 0 | } |
346 | 0 | if (bottom_constraints_ != nullptr) { |
347 | 0 | TabConstraint::ApplyConstraints(bottom_constraints_); |
348 | 0 | } |
349 | 0 | } |
350 | | |
351 | | // Merge close tab vectors of the same side that overlap. |
352 | | void TabVector::MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors, |
353 | 0 | BlobGrid *grid) { |
354 | 0 | TabVector_IT it1(vectors); |
355 | 0 | for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) { |
356 | 0 | TabVector *v1 = it1.data(); |
357 | 0 | TabVector_IT it2(it1); |
358 | 0 | for (it2.forward(); !it2.at_first(); it2.forward()) { |
359 | 0 | TabVector *v2 = it2.data(); |
360 | 0 | if (v2->SimilarTo(vertical, *v1, grid)) { |
361 | | // Merge into the forward one, in case the combined vector now |
362 | | // overlaps one in between. |
363 | 0 | if (textord_debug_tabfind) { |
364 | 0 | v2->Print("Merging"); |
365 | 0 | v1->Print("by deleting"); |
366 | 0 | } |
367 | 0 | v2->MergeWith(vertical, it1.extract()); |
368 | 0 | if (textord_debug_tabfind) { |
369 | 0 | v2->Print("Producing"); |
370 | 0 | } |
371 | 0 | ICOORD merged_vector = v2->endpt(); |
372 | 0 | merged_vector -= v2->startpt(); |
373 | 0 | if (textord_debug_tabfind && abs(merged_vector.x()) > 100) { |
374 | 0 | v2->Print("Garbage result of merge?"); |
375 | 0 | } |
376 | 0 | break; |
377 | 0 | } |
378 | 0 | } |
379 | 0 | } |
380 | 0 | } |
381 | | |
382 | | // Return true if this vector is the same side, overlaps, and close |
383 | | // enough to the other to be merged. |
384 | 0 | bool TabVector::SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const { |
385 | 0 | if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) { |
386 | | // If they don't overlap, at least in extensions, then there is no chance. |
387 | 0 | if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) { |
388 | 0 | return false; |
389 | 0 | } |
390 | | // A fast approximation to the scale factor of the sort_key_. |
391 | 0 | int v_scale = abs(vertical.y()); |
392 | 0 | if (v_scale == 0) { |
393 | 0 | v_scale = 1; |
394 | 0 | } |
395 | | // If they are close enough, then OK. |
396 | 0 | if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ && |
397 | 0 | sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) { |
398 | 0 | return true; |
399 | 0 | } |
400 | | // Ragged tabs get a bigger threshold. |
401 | 0 | if (!IsRagged() || !other.IsRagged() || |
402 | 0 | sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ || |
403 | 0 | sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) { |
404 | 0 | return false; |
405 | 0 | } |
406 | 0 | if (grid == nullptr) { |
407 | | // There is nothing else to test! |
408 | 0 | return true; |
409 | 0 | } |
410 | | // If there is nothing in the rectangle between the vector that is going to |
411 | | // move, and the place it is moving to, then they can be merged. |
412 | | // Setup a vertical search for any blob. |
413 | 0 | const TabVector *mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other; |
414 | 0 | int top_y = mover->endpt_.y(); |
415 | 0 | int bottom_y = mover->startpt_.y(); |
416 | 0 | int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y)); |
417 | 0 | int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y)); |
418 | 0 | int shift = abs(sort_key_ - other.sort_key_) / v_scale; |
419 | 0 | if (IsRightTab()) { |
420 | 0 | right += shift; |
421 | 0 | } else { |
422 | 0 | left -= shift; |
423 | 0 | } |
424 | |
|
425 | 0 | GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid); |
426 | 0 | vsearch.StartVerticalSearch(left, right, top_y); |
427 | 0 | BLOBNBOX *blob; |
428 | 0 | while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) { |
429 | 0 | const TBOX &box = blob->bounding_box(); |
430 | 0 | if (box.top() > bottom_y) { |
431 | 0 | return true; // Nothing found. |
432 | 0 | } |
433 | 0 | if (box.bottom() < top_y) { |
434 | 0 | continue; // Doesn't overlap. |
435 | 0 | } |
436 | 0 | int left_at_box = XAtY(box.bottom()); |
437 | 0 | int right_at_box = left_at_box; |
438 | 0 | if (IsRightTab()) { |
439 | 0 | right_at_box += shift; |
440 | 0 | } else { |
441 | 0 | left_at_box -= shift; |
442 | 0 | } |
443 | 0 | if (std::min(right_at_box, static_cast<int>(box.right())) > |
444 | 0 | std::max(left_at_box, static_cast<int>(box.left()))) { |
445 | 0 | return false; |
446 | 0 | } |
447 | 0 | } |
448 | 0 | return true; // Nothing found. |
449 | 0 | } |
450 | 0 | return false; |
451 | 0 | } |
452 | | |
453 | | // Eat the other TabVector into this and delete it. |
454 | 0 | void TabVector::MergeWith(const ICOORD &vertical, TabVector *other) { |
455 | 0 | extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_); |
456 | 0 | extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_); |
457 | 0 | if (other->IsRagged()) { |
458 | 0 | alignment_ = other->alignment_; |
459 | 0 | } |
460 | | // Merge sort the two lists of boxes. |
461 | 0 | BLOBNBOX_C_IT it1(&boxes_); |
462 | 0 | BLOBNBOX_C_IT it2(&other->boxes_); |
463 | 0 | while (!it2.empty()) { |
464 | 0 | BLOBNBOX *bbox2 = it2.extract(); |
465 | 0 | it2.forward(); |
466 | 0 | TBOX box2 = bbox2->bounding_box(); |
467 | 0 | BLOBNBOX *bbox1 = it1.data(); |
468 | 0 | TBOX box1 = bbox1->bounding_box(); |
469 | 0 | while (box1.bottom() < box2.bottom() && !it1.at_last()) { |
470 | 0 | it1.forward(); |
471 | 0 | bbox1 = it1.data(); |
472 | 0 | box1 = bbox1->bounding_box(); |
473 | 0 | } |
474 | 0 | if (box1.bottom() < box2.bottom()) { |
475 | 0 | it1.add_to_end(bbox2); |
476 | 0 | } else if (bbox1 != bbox2) { |
477 | 0 | it1.add_before_stay_put(bbox2); |
478 | 0 | } |
479 | 0 | } |
480 | 0 | Fit(vertical, true); |
481 | 0 | other->Delete(this); |
482 | 0 | } |
483 | | |
484 | | // Add a new element to the list of partner TabVectors. |
485 | | // Partners must be added in order of increasing y coordinate of the text line |
486 | | // that makes them partners. |
487 | | // Groups of identical partners are merged into one. |
488 | 0 | void TabVector::AddPartner(TabVector *partner) { |
489 | 0 | if (IsSeparator() || partner->IsSeparator()) { |
490 | 0 | return; |
491 | 0 | } |
492 | 0 | TabVector_C_IT it(&partners_); |
493 | 0 | if (!it.empty()) { |
494 | 0 | it.move_to_last(); |
495 | 0 | if (it.data() == partner) { |
496 | 0 | return; |
497 | 0 | } |
498 | 0 | } |
499 | 0 | it.add_after_then_move(partner); |
500 | 0 | } |
501 | | |
502 | | // Return true if other is a partner of this. |
503 | 0 | bool TabVector::IsAPartner(const TabVector *other) { |
504 | 0 | TabVector_C_IT it(&partners_); |
505 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
506 | 0 | if (it.data() == other) { |
507 | 0 | return true; |
508 | 0 | } |
509 | 0 | } |
510 | 0 | return false; |
511 | 0 | } |
512 | | |
513 | | // These names must be synced with the TabAlignment enum in tabvector.h. |
514 | | static const char *const kAlignmentNames[] = {"Left Aligned", "Left Ragged", "Center", |
515 | | "Right Aligned", "Right Ragged", "Separator"}; |
516 | | |
517 | | // Print basic information about this tab vector. |
518 | 0 | void TabVector::Print(const char *prefix) { |
519 | 0 | tprintf( |
520 | 0 | "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d," |
521 | 0 | " partners=%d\n", |
522 | 0 | prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(), |
523 | 0 | mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length()); |
524 | 0 | } |
525 | | |
526 | | // Print basic information about this tab vector and every box in it. |
527 | 0 | void TabVector::Debug(const char *prefix) { |
528 | 0 | Print(prefix); |
529 | 0 | BLOBNBOX_C_IT it(&boxes_); |
530 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
531 | 0 | BLOBNBOX *bbox = it.data(); |
532 | 0 | const TBOX &box = bbox->bounding_box(); |
533 | 0 | tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), box.top()); |
534 | 0 | } |
535 | 0 | } |
536 | | |
537 | | #ifndef GRAPHICS_DISABLED |
538 | | |
539 | | // Draw this tabvector in place in the given window. |
540 | | void TabVector::Display(ScrollView *tab_win) { |
541 | | if (textord_debug_printable) { |
542 | | tab_win->Pen(ScrollView::BLUE); |
543 | | } else if (alignment_ == TA_LEFT_ALIGNED) { |
544 | | tab_win->Pen(ScrollView::LIME_GREEN); |
545 | | } else if (alignment_ == TA_LEFT_RAGGED) { |
546 | | tab_win->Pen(ScrollView::DARK_GREEN); |
547 | | } else if (alignment_ == TA_RIGHT_ALIGNED) { |
548 | | tab_win->Pen(ScrollView::PINK); |
549 | | } else if (alignment_ == TA_RIGHT_RAGGED) { |
550 | | tab_win->Pen(ScrollView::CORAL); |
551 | | } else { |
552 | | tab_win->Pen(ScrollView::WHITE); |
553 | | } |
554 | | tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y()); |
555 | | tab_win->Pen(ScrollView::GREY); |
556 | | tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_); |
557 | | tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y()); |
558 | | auto score_string = std::to_string(percent_score_); |
559 | | tab_win->TextAttributes("Times", 50, false, false, false); |
560 | | tab_win->Text(startpt_.x(), startpt_.y(), score_string.c_str()); |
561 | | } |
562 | | |
563 | | #endif |
564 | | |
565 | | // Refit the line and/or re-evaluate the vector if the dirty flags are set. |
566 | 0 | void TabVector::FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder) { |
567 | 0 | if (needs_refit_) { |
568 | 0 | Fit(vertical, true); |
569 | 0 | } |
570 | 0 | if (needs_evaluation_) { |
571 | 0 | Evaluate(vertical, finder); |
572 | 0 | } |
573 | 0 | } |
574 | | |
575 | | // Evaluate the vector in terms of coverage of its length by good-looking |
576 | | // box edges. A good looking box is one where its nearest neighbour on the |
577 | | // inside is nearer than half the distance its nearest neighbour on the |
578 | | // outside of the putative column. Bad boxes are removed from the line. |
579 | | // A second pass then further filters boxes by requiring that the gutter |
580 | | // width be a minimum fraction of the mean gutter along the line. |
581 | 0 | void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) { |
582 | 0 | bool debug = false; |
583 | 0 | needs_evaluation_ = false; |
584 | 0 | int length = endpt_.y() - startpt_.y(); |
585 | 0 | if (length == 0 || boxes_.empty()) { |
586 | 0 | percent_score_ = 0; |
587 | 0 | Print("Zero length in evaluate"); |
588 | 0 | return; |
589 | 0 | } |
590 | | // Compute the mean box height. |
591 | 0 | BLOBNBOX_C_IT it(&boxes_); |
592 | 0 | int mean_height = 0; |
593 | 0 | int height_count = 0; |
594 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
595 | 0 | BLOBNBOX *bbox = it.data(); |
596 | 0 | const TBOX &box = bbox->bounding_box(); |
597 | 0 | int height = box.height(); |
598 | 0 | mean_height += height; |
599 | 0 | ++height_count; |
600 | 0 | } |
601 | 0 | if (height_count > 0) { |
602 | 0 | mean_height /= height_count; |
603 | 0 | } |
604 | 0 | int max_gutter = kGutterMultiple * mean_height; |
605 | 0 | if (IsRagged()) { |
606 | | // Ragged edges face a tougher test in that the gap must always be within |
607 | | // the height of the blob. |
608 | 0 | max_gutter = kGutterToNeighbourRatio * mean_height; |
609 | 0 | } |
610 | |
|
611 | 0 | STATS gutters(0, max_gutter); |
612 | | // Evaluate the boxes for their goodness, calculating the coverage as we go. |
613 | | // Remove boxes that are not good and shorten the list to the first and |
614 | | // last good boxes. |
615 | 0 | int num_deleted_boxes = 0; |
616 | 0 | bool text_on_image = false; |
617 | 0 | int good_length = 0; |
618 | 0 | const TBOX *prev_good_box = nullptr; |
619 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
620 | 0 | BLOBNBOX *bbox = it.data(); |
621 | 0 | const TBOX &box = bbox->bounding_box(); |
622 | 0 | int mid_y = (box.top() + box.bottom()) / 2; |
623 | 0 | if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) { |
624 | 0 | if (!debug) { |
625 | 0 | tprintf("After already deleting %d boxes, ", num_deleted_boxes); |
626 | 0 | Print("Starting evaluation"); |
627 | 0 | } |
628 | 0 | debug = true; |
629 | 0 | } |
630 | | // A good box is one where the nearest neighbour on the inside is closer |
631 | | // than half the distance to the nearest neighbour on the outside |
632 | | // (of the putative column). |
633 | 0 | bool left = IsLeftTab(); |
634 | 0 | int tab_x = XAtY(mid_y); |
635 | 0 | int gutter_width; |
636 | 0 | int neighbour_gap; |
637 | 0 | finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width, |
638 | 0 | &neighbour_gap); |
639 | 0 | if (debug) { |
640 | 0 | tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), box.bottom(), |
641 | 0 | box.right(), box.top(), gutter_width, neighbour_gap); |
642 | 0 | } |
643 | | // Now we can make the test. |
644 | 0 | if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) { |
645 | | // A good box contributes its height to the good_length. |
646 | 0 | good_length += box.top() - box.bottom(); |
647 | 0 | gutters.add(gutter_width, 1); |
648 | | // Two good boxes together contribute the gap between them |
649 | | // to the good_length as well, as long as the gap is not |
650 | | // too big. |
651 | 0 | if (prev_good_box != nullptr) { |
652 | 0 | int vertical_gap = box.bottom() - prev_good_box->top(); |
653 | 0 | double size1 = sqrt(static_cast<double>(prev_good_box->area())); |
654 | 0 | double size2 = sqrt(static_cast<double>(box.area())); |
655 | 0 | if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) { |
656 | 0 | good_length += vertical_gap; |
657 | 0 | } |
658 | 0 | if (debug) { |
659 | 0 | tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap, |
660 | 0 | kMaxFillinMultiple * std::min(size1, size2), good_length); |
661 | 0 | } |
662 | 0 | } else { |
663 | | // Adjust the start to the first good box. |
664 | 0 | SetYStart(box.bottom()); |
665 | 0 | } |
666 | 0 | prev_good_box = &box; |
667 | 0 | if (bbox->flow() == BTFT_TEXT_ON_IMAGE) { |
668 | 0 | text_on_image = true; |
669 | 0 | } |
670 | 0 | } else { |
671 | | // Get rid of boxes that are not good. |
672 | 0 | if (debug) { |
673 | 0 | tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(), |
674 | 0 | box.right(), box.top(), gutter_width, neighbour_gap); |
675 | 0 | } |
676 | 0 | it.extract(); |
677 | 0 | ++num_deleted_boxes; |
678 | 0 | } |
679 | 0 | } |
680 | 0 | if (debug) { |
681 | 0 | Print("Evaluating:"); |
682 | 0 | } |
683 | | // If there are any good boxes, do it again, except this time get rid of |
684 | | // boxes that have a gutter that is a small fraction of the mean gutter. |
685 | | // This filters out ends that run into a coincidental gap in the text. |
686 | 0 | int search_top = endpt_.y(); |
687 | 0 | int search_bottom = startpt_.y(); |
688 | 0 | int median_gutter = IntCastRounded(gutters.median()); |
689 | 0 | if (gutters.get_total() > 0) { |
690 | 0 | prev_good_box = nullptr; |
691 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
692 | 0 | BLOBNBOX *bbox = it.data(); |
693 | 0 | const TBOX &box = bbox->bounding_box(); |
694 | 0 | int mid_y = (box.top() + box.bottom()) / 2; |
695 | | // A good box is one where the gutter width is at least some constant |
696 | | // fraction of the mean gutter width. |
697 | 0 | bool left = IsLeftTab(); |
698 | 0 | int tab_x = XAtY(mid_y); |
699 | 0 | int max_gutter = kGutterMultiple * mean_height; |
700 | 0 | if (IsRagged()) { |
701 | | // Ragged edges face a tougher test in that the gap must always be |
702 | | // within the height of the blob. |
703 | 0 | max_gutter = kGutterToNeighbourRatio * mean_height; |
704 | 0 | } |
705 | 0 | int gutter_width; |
706 | 0 | int neighbour_gap; |
707 | 0 | finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width, |
708 | 0 | &neighbour_gap); |
709 | | // Now we can make the test. |
710 | 0 | if (gutter_width >= median_gutter * kMinGutterFraction) { |
711 | 0 | if (prev_good_box == nullptr) { |
712 | | // Adjust the start to the first good box. |
713 | 0 | SetYStart(box.bottom()); |
714 | 0 | search_bottom = box.top(); |
715 | 0 | } |
716 | 0 | prev_good_box = &box; |
717 | 0 | search_top = box.bottom(); |
718 | 0 | } else { |
719 | | // Get rid of boxes that are not good. |
720 | 0 | if (debug) { |
721 | 0 | tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.left(), |
722 | 0 | box.bottom(), box.right(), box.top(), gutter_width, median_gutter); |
723 | 0 | } |
724 | 0 | it.extract(); |
725 | 0 | ++num_deleted_boxes; |
726 | 0 | } |
727 | 0 | } |
728 | 0 | } |
729 | | // If there has been a good box, adjust the end. |
730 | 0 | if (prev_good_box != nullptr) { |
731 | 0 | SetYEnd(prev_good_box->top()); |
732 | | // Compute the percentage of the vector that is occupied by good boxes. |
733 | 0 | int length = endpt_.y() - startpt_.y(); |
734 | 0 | percent_score_ = 100 * good_length / length; |
735 | 0 | if (num_deleted_boxes > 0) { |
736 | 0 | needs_refit_ = true; |
737 | 0 | FitAndEvaluateIfNeeded(vertical, finder); |
738 | 0 | if (boxes_.empty()) { |
739 | 0 | return; |
740 | 0 | } |
741 | 0 | } |
742 | | // Test the gutter over the whole vector, instead of just at the boxes. |
743 | 0 | int required_shift; |
744 | 0 | if (search_bottom > search_top) { |
745 | 0 | search_bottom = startpt_.y(); |
746 | 0 | search_top = endpt_.y(); |
747 | 0 | } |
748 | 0 | double min_gutter_width = kLineCountReciprocal / boxes_.length(); |
749 | 0 | min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter; |
750 | 0 | min_gutter_width *= mean_height; |
751 | 0 | int max_gutter_width = IntCastRounded(min_gutter_width) + 1; |
752 | 0 | if (median_gutter > max_gutter_width) { |
753 | 0 | max_gutter_width = median_gutter; |
754 | 0 | } |
755 | 0 | int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, text_on_image, |
756 | 0 | max_gutter_width, &required_shift); |
757 | 0 | if (gutter_width < min_gutter_width) { |
758 | 0 | if (debug) { |
759 | 0 | tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width, |
760 | 0 | min_gutter_width); |
761 | 0 | } |
762 | 0 | boxes_.shallow_clear(); |
763 | 0 | percent_score_ = 0; |
764 | 0 | } else if (debug) { |
765 | 0 | tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width, |
766 | 0 | min_gutter_width, required_shift); |
767 | 0 | } |
768 | 0 | } else { |
769 | | // There are no good boxes left, so score is 0. |
770 | 0 | percent_score_ = 0; |
771 | 0 | } |
772 | | |
773 | 0 | if (debug) { |
774 | 0 | Print("Evaluation complete:"); |
775 | 0 | } |
776 | 0 | } |
777 | | |
778 | | // (Re)Fit a line to the stored points. Returns false if the line |
779 | | // is degenerate. Although the TabVector code mostly doesn't care about the |
780 | | // direction of lines, XAtY would give silly results for a horizontal line. |
781 | | // The class is mostly aimed at use for vertical lines representing |
782 | | // horizontal tab stops. |
783 | 0 | bool TabVector::Fit(ICOORD vertical, bool force_parallel) { |
784 | 0 | needs_refit_ = false; |
785 | 0 | if (boxes_.empty()) { |
786 | | // Don't refit something with no boxes, as that only happens |
787 | | // in Evaluate, and we don't want to end up with a zero vector. |
788 | 0 | if (!force_parallel) { |
789 | 0 | return false; |
790 | 0 | } |
791 | | // If we are forcing parallel, then we just need to set the sort_key_. |
792 | 0 | ICOORD midpt = startpt_; |
793 | 0 | midpt += endpt_; |
794 | 0 | midpt /= 2; |
795 | 0 | sort_key_ = SortKey(vertical, midpt.x(), midpt.y()); |
796 | 0 | return startpt_.y() != endpt_.y(); |
797 | 0 | } |
798 | 0 | if (!force_parallel && !IsRagged()) { |
799 | | // Use a fitted line as the vertical. |
800 | 0 | DetLineFit linepoints; |
801 | 0 | BLOBNBOX_C_IT it(&boxes_); |
802 | | // Fit a line to all the boxes in the list. |
803 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
804 | 0 | BLOBNBOX *bbox = it.data(); |
805 | 0 | const TBOX &box = bbox->bounding_box(); |
806 | 0 | int x1 = IsRightTab() ? box.right() : box.left(); |
807 | 0 | ICOORD boxpt(x1, box.bottom()); |
808 | 0 | linepoints.Add(boxpt); |
809 | 0 | if (it.at_last()) { |
810 | 0 | ICOORD top_pt(x1, box.top()); |
811 | 0 | linepoints.Add(top_pt); |
812 | 0 | } |
813 | 0 | } |
814 | 0 | linepoints.Fit(&startpt_, &endpt_); |
815 | 0 | if (startpt_.y() != endpt_.y()) { |
816 | 0 | vertical = endpt_; |
817 | 0 | vertical -= startpt_; |
818 | 0 | } |
819 | 0 | } |
820 | 0 | int start_y = startpt_.y(); |
821 | 0 | int end_y = endpt_.y(); |
822 | 0 | sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX; |
823 | 0 | BLOBNBOX_C_IT it(&boxes_); |
824 | | // Choose a line parallel to the vertical such that all boxes are on the |
825 | | // correct side of it. |
826 | 0 | mean_width_ = 0; |
827 | 0 | int width_count = 0; |
828 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
829 | 0 | BLOBNBOX *bbox = it.data(); |
830 | 0 | const TBOX &box = bbox->bounding_box(); |
831 | 0 | mean_width_ += box.width(); |
832 | 0 | ++width_count; |
833 | 0 | int x1 = IsRightTab() ? box.right() : box.left(); |
834 | | // Test both the bottom and the top, as one will be more extreme, depending |
835 | | // on the direction of skew. |
836 | 0 | int bottom_y = box.bottom(); |
837 | 0 | int top_y = box.top(); |
838 | 0 | int key = SortKey(vertical, x1, bottom_y); |
839 | 0 | if (IsLeftTab() == (key < sort_key_)) { |
840 | 0 | sort_key_ = key; |
841 | 0 | startpt_ = ICOORD(x1, bottom_y); |
842 | 0 | } |
843 | 0 | key = SortKey(vertical, x1, top_y); |
844 | 0 | if (IsLeftTab() == (key < sort_key_)) { |
845 | 0 | sort_key_ = key; |
846 | 0 | startpt_ = ICOORD(x1, top_y); |
847 | 0 | } |
848 | 0 | if (it.at_first()) { |
849 | 0 | start_y = bottom_y; |
850 | 0 | } |
851 | 0 | if (it.at_last()) { |
852 | 0 | end_y = top_y; |
853 | 0 | } |
854 | 0 | } |
855 | 0 | if (width_count > 0) { |
856 | 0 | mean_width_ = (mean_width_ + width_count - 1) / width_count; |
857 | 0 | } |
858 | 0 | endpt_ = startpt_ + vertical; |
859 | 0 | needs_evaluation_ = true; |
860 | 0 | if (start_y != end_y) { |
861 | | // Set the ends of the vector to fully include the first and last blobs. |
862 | 0 | startpt_.set_x(XAtY(vertical, sort_key_, start_y)); |
863 | 0 | startpt_.set_y(start_y); |
864 | 0 | endpt_.set_x(XAtY(vertical, sort_key_, end_y)); |
865 | 0 | endpt_.set_y(end_y); |
866 | 0 | return true; |
867 | 0 | } |
868 | 0 | return false; |
869 | 0 | } |
870 | | |
871 | | // Returns the singleton partner if there is one, or nullptr otherwise. |
872 | 0 | TabVector *TabVector::GetSinglePartner() { |
873 | 0 | if (!partners_.singleton()) { |
874 | 0 | return nullptr; |
875 | 0 | } |
876 | 0 | TabVector_C_IT partner_it(&partners_); |
877 | 0 | TabVector *partner = partner_it.data(); |
878 | 0 | return partner; |
879 | 0 | } |
880 | | |
881 | | // Return the partner of this TabVector if the vector qualifies as |
882 | | // being a vertical text line, otherwise nullptr. |
883 | 0 | TabVector *TabVector::VerticalTextlinePartner() { |
884 | 0 | if (!partners_.singleton()) { |
885 | 0 | return nullptr; |
886 | 0 | } |
887 | 0 | TabVector_C_IT partner_it(&partners_); |
888 | 0 | TabVector *partner = partner_it.data(); |
889 | 0 | BLOBNBOX_C_IT box_it1(&boxes_); |
890 | 0 | BLOBNBOX_C_IT box_it2(&partner->boxes_); |
891 | | // Count how many boxes are also in the other list. |
892 | | // At the same time, gather the mean width and median vertical gap. |
893 | 0 | if (textord_debug_tabfind > 1) { |
894 | 0 | Print("Testing for vertical text"); |
895 | 0 | partner->Print(" partner"); |
896 | 0 | } |
897 | 0 | int num_matched = 0; |
898 | 0 | int num_unmatched = 0; |
899 | 0 | int total_widths = 0; |
900 | 0 | int width = startpt().x() - partner->startpt().x(); |
901 | 0 | if (width < 0) { |
902 | 0 | width = -width; |
903 | 0 | } |
904 | 0 | STATS gaps(0, width * 2 - 1); |
905 | 0 | BLOBNBOX *prev_bbox = nullptr; |
906 | 0 | box_it2.mark_cycle_pt(); |
907 | 0 | for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) { |
908 | 0 | BLOBNBOX *bbox = box_it1.data(); |
909 | 0 | TBOX box = bbox->bounding_box(); |
910 | 0 | if (prev_bbox != nullptr) { |
911 | 0 | gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1); |
912 | 0 | } |
913 | 0 | while (!box_it2.cycled_list() && box_it2.data() != bbox && |
914 | 0 | box_it2.data()->bounding_box().bottom() < box.bottom()) { |
915 | 0 | box_it2.forward(); |
916 | 0 | } |
917 | 0 | if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN && |
918 | 0 | (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) { |
919 | 0 | ++num_matched; |
920 | 0 | } else { |
921 | 0 | ++num_unmatched; |
922 | 0 | } |
923 | 0 | total_widths += box.width(); |
924 | 0 | prev_bbox = bbox; |
925 | 0 | } |
926 | 0 | if (num_unmatched + num_matched == 0) { |
927 | 0 | return nullptr; |
928 | 0 | } |
929 | 0 | double avg_width = total_widths * 1.0 / (num_unmatched + num_matched); |
930 | 0 | double max_gap = textord_tabvector_vertical_gap_fraction * avg_width; |
931 | 0 | int min_box_match = |
932 | 0 | static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio); |
933 | 0 | bool is_vertical = |
934 | 0 | (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap); |
935 | 0 | if (textord_debug_tabfind > 1) { |
936 | 0 | tprintf( |
937 | 0 | "gaps=%d, matched=%d, unmatched=%d, min_match=%d " |
938 | 0 | "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n", |
939 | 0 | gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width, |
940 | 0 | max_gap, is_vertical ? "Yes" : "No"); |
941 | 0 | } |
942 | 0 | return (is_vertical) ? partner : nullptr; |
943 | 0 | } |
944 | | |
945 | | // The constructor is private. |
946 | | TabVector::TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, |
947 | | BLOBNBOX_CLIST *boxes) |
948 | 0 | : extended_ymin_(extended_ymin) |
949 | 0 | , extended_ymax_(extended_ymax) |
950 | 0 | , sort_key_(0) |
951 | 0 | , percent_score_(0) |
952 | 0 | , mean_width_(0) |
953 | 0 | , needs_refit_(true) |
954 | 0 | , needs_evaluation_(true) |
955 | 0 | , alignment_(alignment) |
956 | 0 | , top_constraints_(nullptr) |
957 | 0 | , bottom_constraints_(nullptr) { |
958 | 0 | BLOBNBOX_C_IT it(&boxes_); |
959 | 0 | it.add_list_after(boxes); |
960 | 0 | } |
961 | | |
962 | | // Delete this, but first, repoint all the partners to point to |
963 | | // replacement. If replacement is nullptr, then partner relationships |
964 | | // are removed. |
965 | 0 | void TabVector::Delete(TabVector *replacement) { |
966 | 0 | TabVector_C_IT it(&partners_); |
967 | 0 | for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) { |
968 | 0 | TabVector *partner = it.data(); |
969 | 0 | TabVector_C_IT p_it(&partner->partners_); |
970 | | // If partner already has replacement in its list, then make |
971 | | // replacement null, and just remove this TabVector when we find it. |
972 | 0 | TabVector *partner_replacement = replacement; |
973 | 0 | for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { |
974 | 0 | TabVector *p_partner = p_it.data(); |
975 | 0 | if (p_partner == partner_replacement) { |
976 | 0 | partner_replacement = nullptr; |
977 | 0 | break; |
978 | 0 | } |
979 | 0 | } |
980 | | // Remove all references to this, and replace with replacement if not |
981 | | // nullptr. |
982 | 0 | for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) { |
983 | 0 | TabVector *p_partner = p_it.data(); |
984 | 0 | if (p_partner == this) { |
985 | 0 | p_it.extract(); |
986 | 0 | if (partner_replacement != nullptr) { |
987 | 0 | p_it.add_before_stay_put(partner_replacement); |
988 | 0 | } |
989 | 0 | } |
990 | 0 | } |
991 | 0 | if (partner_replacement != nullptr) { |
992 | 0 | partner_replacement->AddPartner(partner); |
993 | 0 | } |
994 | 0 | } |
995 | 0 | delete this; |
996 | 0 | } |
997 | | |
998 | | } // namespace tesseract. |