Coverage Report

Created: 2026-06-13 06:51

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/src/tesseract/src/textord/tabvector.cpp
Line
Count
Source
1
///////////////////////////////////////////////////////////////////////
2
// File:        tabvector.cpp
3
// Description: Class to hold a near-vertical vector representing a tab-stop.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2008, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifdef HAVE_CONFIG_H
20
#  include "config_auto.h"
21
#endif
22
23
#include "blobbox.h"
24
#include "colfind.h"
25
#include "colpartitionset.h"
26
#include "detlinefit.h"
27
#include "helpers.h" // for IntCastRounded
28
#include "statistc.h"
29
#include "tabvector.h"
30
31
#include <algorithm>
32
33
namespace tesseract {
34
35
// Multiple of height used as a gutter for evaluation search.
36
const int kGutterMultiple = 4;
37
// Multiple of neighbour gap that we expect the gutter gap to be at minimum.
38
const int kGutterToNeighbourRatio = 3;
39
// Pixel distance for tab vectors to be considered the same.
40
const int kSimilarVectorDist = 10;
41
// Pixel distance for ragged tab vectors to be considered the same if there
42
// is nothing in the overlap box
43
const int kSimilarRaggedDist = 50;
44
// Max multiple of height to allow filling in between blobs when evaluating.
45
const int kMaxFillinMultiple = 11;
46
// Min fraction of mean gutter size to allow a gutter on a good tab blob.
47
const double kMinGutterFraction = 0.5;
48
// Multiple of 1/n lines as a minimum gutter in evaluation.
49
const double kLineCountReciprocal = 4.0;
50
// Constant add-on for minimum gutter for aligned tabs.
51
const double kMinAlignedGutter = 0.25;
52
// Constant add-on for minimum gutter for ragged tabs.
53
const double kMinRaggedGutter = 1.5;
54
55
double_VAR(textord_tabvector_vertical_gap_fraction, 0.5,
56
           "max fraction of mean blob width allowed for vertical gaps in "
57
           "vertical text");
58
59
double_VAR(textord_tabvector_vertical_box_ratio, 0.5,
60
           "Fraction of box matches required to declare a line vertical");
61
62
// Create a constraint for the top or bottom of this TabVector.
63
0
void TabConstraint::CreateConstraint(TabVector *vector, bool is_top) {
64
0
  auto *constraint = new TabConstraint(vector, is_top);
65
0
  auto *constraints = new TabConstraint_LIST;
66
0
  TabConstraint_IT it(constraints);
67
0
  it.add_to_end(constraint);
68
0
  if (is_top) {
69
0
    vector->set_top_constraints(constraints);
70
0
  } else {
71
0
    vector->set_bottom_constraints(constraints);
72
0
  }
73
0
}
74
75
// Test to see if the constraints are compatible enough to merge.
76
0
bool TabConstraint::CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) {
77
0
  if (list1 == list2) {
78
0
    return false;
79
0
  }
80
0
  int y_min = -INT32_MAX;
81
0
  int y_max = INT32_MAX;
82
0
  if (textord_debug_tabfind > 3) {
83
0
    tprintf("Testing constraint compatibility\n");
84
0
  }
85
0
  GetConstraints(list1, &y_min, &y_max);
86
0
  GetConstraints(list2, &y_min, &y_max);
87
0
  if (textord_debug_tabfind > 3) {
88
0
    tprintf("Resulting range = [%d,%d]\n", y_min, y_max);
89
0
  }
90
0
  return y_max >= y_min;
91
0
}
92
93
// Merge the lists of constraints and update the TabVector pointers.
94
// The second list is deleted.
95
0
void TabConstraint::MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2) {
96
0
  if (list1 == list2) {
97
0
    return;
98
0
  }
99
0
  TabConstraint_IT it(list2);
100
0
  if (textord_debug_tabfind > 3) {
101
0
    tprintf("Merging constraints\n");
102
0
  }
103
  // The vectors of all constraints on list2 are now going to be on list1.
104
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
105
0
    TabConstraint *constraint = it.data();
106
0
    if (textord_debug_tabfind > 3) {
107
0
      constraint->vector_->Print("Merge");
108
0
    }
109
0
    if (constraint->is_top_) {
110
0
      constraint->vector_->set_top_constraints(list1);
111
0
    } else {
112
0
      constraint->vector_->set_bottom_constraints(list1);
113
0
    }
114
0
  }
115
0
  it = list1;
116
0
  it.add_list_before(list2);
117
0
  delete list2;
118
0
}
119
120
// Set all the tops and bottoms as appropriate to a mean of the
121
// constrained range. Delete all the constraints and list.
122
0
void TabConstraint::ApplyConstraints(TabConstraint_LIST *constraints) {
123
0
  int y_min = -INT32_MAX;
124
0
  int y_max = INT32_MAX;
125
0
  GetConstraints(constraints, &y_min, &y_max);
126
0
  int y = (y_min + y_max) / 2;
127
0
  TabConstraint_IT it(constraints);
128
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
129
0
    TabConstraint *constraint = it.data();
130
0
    TabVector *v = constraint->vector_;
131
0
    if (constraint->is_top_) {
132
0
      v->SetYEnd(y);
133
0
      v->set_top_constraints(nullptr);
134
0
    } else {
135
0
      v->SetYStart(y);
136
0
      v->set_bottom_constraints(nullptr);
137
0
    }
138
0
  }
139
0
  delete constraints;
140
0
}
141
142
0
TabConstraint::TabConstraint(TabVector *vector, bool is_top) : vector_(vector), is_top_(is_top) {
143
0
  if (is_top) {
144
0
    y_min_ = vector->endpt().y();
145
0
    y_max_ = vector->extended_ymax();
146
0
  } else {
147
0
    y_max_ = vector->startpt().y();
148
0
    y_min_ = vector->extended_ymin();
149
0
  }
150
0
}
151
152
// Get the max of the mins and the min of the maxes.
153
0
void TabConstraint::GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max) {
154
0
  TabConstraint_IT it(constraints);
155
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
156
0
    TabConstraint *constraint = it.data();
157
0
    if (textord_debug_tabfind > 3) {
158
0
      tprintf("Constraint is [%d,%d]", constraint->y_min_, constraint->y_max_);
159
0
      constraint->vector_->Print(" for");
160
0
    }
161
0
    *y_min = std::max(*y_min, constraint->y_min_);
162
0
    *y_max = std::min(*y_max, constraint->y_max_);
163
0
  }
164
0
}
165
166
// The constructor is private. See the bottom of the file...
167
168
// Public factory to build a TabVector from a list of boxes.
169
// The TabVector will be of the given alignment type.
170
// The input vertical vector is used in fitting, and the output
171
// vertical_x, vertical_y have the resulting line vector added to them
172
// if the alignment is not ragged.
173
// The extended_start_y and extended_end_y are the maximum possible
174
// extension to the line segment that can be used to align with others.
175
// The input CLIST of BLOBNBOX good_points is consumed and taken over.
176
TabVector *TabVector::FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
177
                                int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
178
0
                                int *vertical_y) {
179
0
  auto *vector = new TabVector(extended_start_y, extended_end_y, alignment, good_points);
180
0
  if (!vector->Fit(vertical, false)) {
181
0
    delete vector;
182
0
    return nullptr;
183
0
  }
184
0
  if (!vector->IsRagged()) {
185
0
    vertical = vector->endpt_ - vector->startpt_;
186
0
    int weight = vector->BoxCount();
187
0
    *vertical_x += vertical.x() * weight;
188
0
    *vertical_y += vertical.y() * weight;
189
0
  }
190
0
  return vector;
191
0
}
192
193
// Build a ragged TabVector by copying another's direction, shifting it
194
// to match the given blob, and making its initial extent the height
195
// of the blob, but its extended bounds from the bounds of the original.
196
TabVector::TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
197
                     BLOBNBOX *blob)
198
0
    : extended_ymin_(src.extended_ymin_)
199
0
    , extended_ymax_(src.extended_ymax_)
200
0
    , needs_refit_(true)
201
0
    , needs_evaluation_(true)
202
0
    , alignment_(alignment) {
203
0
  BLOBNBOX_C_IT it(&boxes_);
204
0
  it.add_to_end(blob);
205
0
  TBOX box = blob->bounding_box();
206
0
  if (IsLeftTab()) {
207
0
    startpt_ = box.botleft();
208
0
    endpt_ = box.topleft();
209
0
  } else {
210
0
    startpt_ = box.botright();
211
0
    endpt_ = box.topright();
212
0
  }
213
0
  sort_key_ =
214
0
      SortKey(vertical_skew, (startpt_.x() + endpt_.x()) / 2, (startpt_.y() + endpt_.y()) / 2);
215
0
  if (textord_debug_tabfind > 3) {
216
0
    Print("Constructed a new tab vector:");
217
0
  }
218
0
}
219
220
// Copies basic attributes of a tab vector for simple operations.
221
// Copies things such startpt, endpt, range.
222
// Does not copy things such as partners, boxes, or constraints.
223
// This is useful if you only need vector information for processing, such
224
// as in the table detection code.
225
0
TabVector *TabVector::ShallowCopy() const {
226
0
  auto *copy = new TabVector();
227
0
  copy->startpt_ = startpt_;
228
0
  copy->endpt_ = endpt_;
229
0
  copy->alignment_ = alignment_;
230
0
  copy->extended_ymax_ = extended_ymax_;
231
0
  copy->extended_ymin_ = extended_ymin_;
232
0
  copy->intersects_other_lines_ = intersects_other_lines_;
233
0
  return copy;
234
0
}
235
236
// Extend this vector to include the supplied blob if it doesn't
237
// already have it.
238
0
void TabVector::ExtendToBox(BLOBNBOX *new_blob) {
239
0
  TBOX new_box = new_blob->bounding_box();
240
0
  BLOBNBOX_C_IT it(&boxes_);
241
0
  if (!it.empty()) {
242
0
    BLOBNBOX *blob = it.data();
243
0
    TBOX box = blob->bounding_box();
244
0
    while (!it.at_last() && box.top() <= new_box.top()) {
245
0
      if (blob == new_blob) {
246
0
        return; // We have it already.
247
0
      }
248
0
      it.forward();
249
0
      blob = it.data();
250
0
      box = blob->bounding_box();
251
0
    }
252
0
    if (box.top() >= new_box.top()) {
253
0
      it.add_before_stay_put(new_blob);
254
0
      needs_refit_ = true;
255
0
      return;
256
0
    }
257
0
  }
258
0
  needs_refit_ = true;
259
0
  it.add_after_stay_put(new_blob);
260
0
}
261
262
// Set the ycoord of the start and move the xcoord to match.
263
0
void TabVector::SetYStart(int start_y) {
264
0
  startpt_.set_x(XAtY(start_y));
265
0
  startpt_.set_y(start_y);
266
0
}
267
// Set the ycoord of the end and move the xcoord to match.
268
0
void TabVector::SetYEnd(int end_y) {
269
0
  endpt_.set_x(XAtY(end_y));
270
0
  endpt_.set_y(end_y);
271
0
}
272
273
// Rotate the ends by the given vector. Auto flip start and end if needed.
274
0
void TabVector::Rotate(const FCOORD &rotation) {
275
0
  startpt_.rotate(rotation);
276
0
  endpt_.rotate(rotation);
277
0
  int dx = endpt_.x() - startpt_.x();
278
0
  int dy = endpt_.y() - startpt_.y();
279
0
  if ((dy < 0 && abs(dy) > abs(dx)) || (dx < 0 && abs(dx) > abs(dy))) {
280
    // Need to flip start/end.
281
0
    ICOORD tmp = startpt_;
282
0
    startpt_ = endpt_;
283
0
    endpt_ = tmp;
284
0
  }
285
0
}
286
287
// Setup the initial constraints, being the limits of
288
// the vector and the extended ends.
289
0
void TabVector::SetupConstraints() {
290
0
  TabConstraint::CreateConstraint(this, false);
291
0
  TabConstraint::CreateConstraint(this, true);
292
0
}
293
294
// Setup the constraints between the partners of this TabVector.
295
0
void TabVector::SetupPartnerConstraints() {
296
  // With the first and last partner, we want a common bottom and top,
297
  // respectively, and for each change of partner, we want a common
298
  // top of first with bottom of next.
299
0
  TabVector_C_IT it(&partners_);
300
0
  TabVector *prev_partner = nullptr;
301
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
302
0
    TabVector *partner = it.data();
303
0
    if (partner->top_constraints_ == nullptr || partner->bottom_constraints_ == nullptr) {
304
0
      partner->Print("Impossible: has no constraints");
305
0
      Print("This vector has it as a partner");
306
0
      continue;
307
0
    }
308
0
    if (prev_partner == nullptr) {
309
      // This is the first partner, so common bottom.
310
0
      if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
311
0
        TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
312
0
      }
313
0
    } else {
314
      // We need prev top to be common with partner bottom.
315
0
      if (TabConstraint::CompatibleConstraints(prev_partner->top_constraints_,
316
0
                                               partner->bottom_constraints_)) {
317
0
        TabConstraint::MergeConstraints(prev_partner->top_constraints_,
318
0
                                        partner->bottom_constraints_);
319
0
      }
320
0
    }
321
0
    prev_partner = partner;
322
0
    if (it.at_last()) {
323
      // This is the last partner, so common top.
324
0
      if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
325
0
        TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
326
0
      }
327
0
    }
328
0
  }
329
0
}
330
331
// Setup the constraints between this and its partner.
332
0
void TabVector::SetupPartnerConstraints(TabVector *partner) {
333
0
  if (TabConstraint::CompatibleConstraints(bottom_constraints_, partner->bottom_constraints_)) {
334
0
    TabConstraint::MergeConstraints(bottom_constraints_, partner->bottom_constraints_);
335
0
  }
336
0
  if (TabConstraint::CompatibleConstraints(top_constraints_, partner->top_constraints_)) {
337
0
    TabConstraint::MergeConstraints(top_constraints_, partner->top_constraints_);
338
0
  }
339
0
}
340
341
// Use the constraints to modify the top and bottom.
342
0
void TabVector::ApplyConstraints() {
343
0
  if (top_constraints_ != nullptr) {
344
0
    TabConstraint::ApplyConstraints(top_constraints_);
345
0
  }
346
0
  if (bottom_constraints_ != nullptr) {
347
0
    TabConstraint::ApplyConstraints(bottom_constraints_);
348
0
  }
349
0
}
350
351
// Merge close tab vectors of the same side that overlap.
352
void TabVector::MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
353
0
                                       BlobGrid *grid) {
354
0
  TabVector_IT it1(vectors);
355
0
  for (it1.mark_cycle_pt(); !it1.cycled_list(); it1.forward()) {
356
0
    TabVector *v1 = it1.data();
357
0
    TabVector_IT it2(it1);
358
0
    for (it2.forward(); !it2.at_first(); it2.forward()) {
359
0
      TabVector *v2 = it2.data();
360
0
      if (v2->SimilarTo(vertical, *v1, grid)) {
361
        // Merge into the forward one, in case the combined vector now
362
        // overlaps one in between.
363
0
        if (textord_debug_tabfind) {
364
0
          v2->Print("Merging");
365
0
          v1->Print("by deleting");
366
0
        }
367
0
        v2->MergeWith(vertical, it1.extract());
368
0
        if (textord_debug_tabfind) {
369
0
          v2->Print("Producing");
370
0
        }
371
0
        ICOORD merged_vector = v2->endpt();
372
0
        merged_vector -= v2->startpt();
373
0
        if (textord_debug_tabfind && abs(merged_vector.x()) > 100) {
374
0
          v2->Print("Garbage result of merge?");
375
0
        }
376
0
        break;
377
0
      }
378
0
    }
379
0
  }
380
0
}
381
382
// Return true if this vector is the same side, overlaps, and close
383
// enough to the other to be merged.
384
0
bool TabVector::SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const {
385
0
  if ((IsRightTab() && other.IsRightTab()) || (IsLeftTab() && other.IsLeftTab())) {
386
    // If they don't overlap, at least in extensions, then there is no chance.
387
0
    if (ExtendedOverlap(other.extended_ymax_, other.extended_ymin_) < 0) {
388
0
      return false;
389
0
    }
390
    // A fast approximation to the scale factor of the sort_key_.
391
0
    int v_scale = abs(vertical.y());
392
0
    if (v_scale == 0) {
393
0
      v_scale = 1;
394
0
    }
395
    // If they are close enough, then OK.
396
0
    if (sort_key_ + kSimilarVectorDist * v_scale >= other.sort_key_ &&
397
0
        sort_key_ - kSimilarVectorDist * v_scale <= other.sort_key_) {
398
0
      return true;
399
0
    }
400
    // Ragged tabs get a bigger threshold.
401
0
    if (!IsRagged() || !other.IsRagged() ||
402
0
        sort_key_ + kSimilarRaggedDist * v_scale < other.sort_key_ ||
403
0
        sort_key_ - kSimilarRaggedDist * v_scale > other.sort_key_) {
404
0
      return false;
405
0
    }
406
0
    if (grid == nullptr) {
407
      // There is nothing else to test!
408
0
      return true;
409
0
    }
410
    // If there is nothing in the rectangle between the vector that is going to
411
    // move, and the place it is moving to, then they can be merged.
412
    // Setup a vertical search for any blob.
413
0
    const TabVector *mover = (IsRightTab() && sort_key_ < other.sort_key_) ? this : &other;
414
0
    int top_y = mover->endpt_.y();
415
0
    int bottom_y = mover->startpt_.y();
416
0
    int left = std::min(mover->XAtY(top_y), mover->XAtY(bottom_y));
417
0
    int right = std::max(mover->XAtY(top_y), mover->XAtY(bottom_y));
418
0
    int shift = abs(sort_key_ - other.sort_key_) / v_scale;
419
0
    if (IsRightTab()) {
420
0
      right += shift;
421
0
    } else {
422
0
      left -= shift;
423
0
    }
424
425
0
    GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> vsearch(grid);
426
0
    vsearch.StartVerticalSearch(left, right, top_y);
427
0
    BLOBNBOX *blob;
428
0
    while ((blob = vsearch.NextVerticalSearch(true)) != nullptr) {
429
0
      const TBOX &box = blob->bounding_box();
430
0
      if (box.top() > bottom_y) {
431
0
        return true; // Nothing found.
432
0
      }
433
0
      if (box.bottom() < top_y) {
434
0
        continue; // Doesn't overlap.
435
0
      }
436
0
      int left_at_box = XAtY(box.bottom());
437
0
      int right_at_box = left_at_box;
438
0
      if (IsRightTab()) {
439
0
        right_at_box += shift;
440
0
      } else {
441
0
        left_at_box -= shift;
442
0
      }
443
0
      if (std::min(right_at_box, static_cast<int>(box.right())) >
444
0
          std::max(left_at_box, static_cast<int>(box.left()))) {
445
0
        return false;
446
0
      }
447
0
    }
448
0
    return true; // Nothing found.
449
0
  }
450
0
  return false;
451
0
}
452
453
// Eat the other TabVector into this and delete it.
454
0
void TabVector::MergeWith(const ICOORD &vertical, TabVector *other) {
455
0
  extended_ymin_ = std::min(extended_ymin_, other->extended_ymin_);
456
0
  extended_ymax_ = std::max(extended_ymax_, other->extended_ymax_);
457
0
  if (other->IsRagged()) {
458
0
    alignment_ = other->alignment_;
459
0
  }
460
  // Merge sort the two lists of boxes.
461
0
  BLOBNBOX_C_IT it1(&boxes_);
462
0
  BLOBNBOX_C_IT it2(&other->boxes_);
463
0
  while (!it2.empty()) {
464
0
    BLOBNBOX *bbox2 = it2.extract();
465
0
    it2.forward();
466
0
    TBOX box2 = bbox2->bounding_box();
467
0
    BLOBNBOX *bbox1 = it1.data();
468
0
    TBOX box1 = bbox1->bounding_box();
469
0
    while (box1.bottom() < box2.bottom() && !it1.at_last()) {
470
0
      it1.forward();
471
0
      bbox1 = it1.data();
472
0
      box1 = bbox1->bounding_box();
473
0
    }
474
0
    if (box1.bottom() < box2.bottom()) {
475
0
      it1.add_to_end(bbox2);
476
0
    } else if (bbox1 != bbox2) {
477
0
      it1.add_before_stay_put(bbox2);
478
0
    }
479
0
  }
480
0
  Fit(vertical, true);
481
0
  other->Delete(this);
482
0
}
483
484
// Add a new element to the list of partner TabVectors.
485
// Partners must be added in order of increasing y coordinate of the text line
486
// that makes them partners.
487
// Groups of identical partners are merged into one.
488
0
void TabVector::AddPartner(TabVector *partner) {
489
0
  if (IsSeparator() || partner->IsSeparator()) {
490
0
    return;
491
0
  }
492
0
  TabVector_C_IT it(&partners_);
493
0
  if (!it.empty()) {
494
0
    it.move_to_last();
495
0
    if (it.data() == partner) {
496
0
      return;
497
0
    }
498
0
  }
499
0
  it.add_after_then_move(partner);
500
0
}
501
502
// Return true if other is a partner of this.
503
0
bool TabVector::IsAPartner(const TabVector *other) {
504
0
  TabVector_C_IT it(&partners_);
505
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
506
0
    if (it.data() == other) {
507
0
      return true;
508
0
    }
509
0
  }
510
0
  return false;
511
0
}
512
513
// These names must be synced with the TabAlignment enum in tabvector.h.
514
static const char *const kAlignmentNames[] = {"Left Aligned",  "Left Ragged",  "Center",
515
                                              "Right Aligned", "Right Ragged", "Separator"};
516
517
// Print basic information about this tab vector.
518
0
void TabVector::Print(const char *prefix) {
519
0
  tprintf(
520
0
      "%s %s (%d,%d)->(%d,%d) w=%d s=%d, sort key=%d, boxes=%d,"
521
0
      " partners=%d\n",
522
0
      prefix, kAlignmentNames[alignment_], startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y(),
523
0
      mean_width_, percent_score_, sort_key_, boxes_.length(), partners_.length());
524
0
}
525
526
// Print basic information about this tab vector and every box in it.
527
0
void TabVector::Debug(const char *prefix) {
528
0
  Print(prefix);
529
0
  BLOBNBOX_C_IT it(&boxes_);
530
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
531
0
    BLOBNBOX *bbox = it.data();
532
0
    const TBOX &box = bbox->bounding_box();
533
0
    tprintf("Box at (%d,%d)->(%d,%d)\n", box.left(), box.bottom(), box.right(), box.top());
534
0
  }
535
0
}
536
537
#ifndef GRAPHICS_DISABLED
538
539
// Draw this tabvector in place in the given window.
540
void TabVector::Display(ScrollView *tab_win) {
541
  if (textord_debug_printable) {
542
    tab_win->Pen(ScrollView::BLUE);
543
  } else if (alignment_ == TA_LEFT_ALIGNED) {
544
    tab_win->Pen(ScrollView::LIME_GREEN);
545
  } else if (alignment_ == TA_LEFT_RAGGED) {
546
    tab_win->Pen(ScrollView::DARK_GREEN);
547
  } else if (alignment_ == TA_RIGHT_ALIGNED) {
548
    tab_win->Pen(ScrollView::PINK);
549
  } else if (alignment_ == TA_RIGHT_RAGGED) {
550
    tab_win->Pen(ScrollView::CORAL);
551
  } else {
552
    tab_win->Pen(ScrollView::WHITE);
553
  }
554
  tab_win->Line(startpt_.x(), startpt_.y(), endpt_.x(), endpt_.y());
555
  tab_win->Pen(ScrollView::GREY);
556
  tab_win->Line(startpt_.x(), startpt_.y(), startpt_.x(), extended_ymin_);
557
  tab_win->Line(endpt_.x(), extended_ymax_, endpt_.x(), endpt_.y());
558
  auto score_string = std::to_string(percent_score_);
559
  tab_win->TextAttributes("Times", 50, false, false, false);
560
  tab_win->Text(startpt_.x(), startpt_.y(), score_string.c_str());
561
}
562
563
#endif
564
565
// Refit the line and/or re-evaluate the vector if the dirty flags are set.
566
0
void TabVector::FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder) {
567
0
  if (needs_refit_) {
568
0
    Fit(vertical, true);
569
0
  }
570
0
  if (needs_evaluation_) {
571
0
    Evaluate(vertical, finder);
572
0
  }
573
0
}
574
575
// Evaluate the vector in terms of coverage of its length by good-looking
576
// box edges. A good looking box is one where its nearest neighbour on the
577
// inside is nearer than half the distance its nearest neighbour on the
578
// outside of the putative column. Bad boxes are removed from the line.
579
// A second pass then further filters boxes by requiring that the gutter
580
// width be a minimum fraction of the mean gutter along the line.
581
0
void TabVector::Evaluate(const ICOORD &vertical, TabFind *finder) {
582
0
  bool debug = false;
583
0
  needs_evaluation_ = false;
584
0
  int length = endpt_.y() - startpt_.y();
585
0
  if (length == 0 || boxes_.empty()) {
586
0
    percent_score_ = 0;
587
0
    Print("Zero length in evaluate");
588
0
    return;
589
0
  }
590
  // Compute the mean box height.
591
0
  BLOBNBOX_C_IT it(&boxes_);
592
0
  int mean_height = 0;
593
0
  int height_count = 0;
594
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
595
0
    BLOBNBOX *bbox = it.data();
596
0
    const TBOX &box = bbox->bounding_box();
597
0
    int height = box.height();
598
0
    mean_height += height;
599
0
    ++height_count;
600
0
  }
601
0
  if (height_count > 0) {
602
0
    mean_height /= height_count;
603
0
  }
604
0
  int max_gutter = kGutterMultiple * mean_height;
605
0
  if (IsRagged()) {
606
    // Ragged edges face a tougher test in that the gap must always be within
607
    // the height of the blob.
608
0
    max_gutter = kGutterToNeighbourRatio * mean_height;
609
0
  }
610
611
0
  STATS gutters(0, max_gutter);
612
  // Evaluate the boxes for their goodness, calculating the coverage as we go.
613
  // Remove boxes that are not good and shorten the list to the first and
614
  // last good boxes.
615
0
  int num_deleted_boxes = 0;
616
0
  bool text_on_image = false;
617
0
  int good_length = 0;
618
0
  const TBOX *prev_good_box = nullptr;
619
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
620
0
    BLOBNBOX *bbox = it.data();
621
0
    const TBOX &box = bbox->bounding_box();
622
0
    int mid_y = (box.top() + box.bottom()) / 2;
623
0
    if (TabFind::WithinTestRegion(2, XAtY(box.bottom()), box.bottom())) {
624
0
      if (!debug) {
625
0
        tprintf("After already deleting %d boxes, ", num_deleted_boxes);
626
0
        Print("Starting evaluation");
627
0
      }
628
0
      debug = true;
629
0
    }
630
    // A good box is one where the nearest neighbour on the inside is closer
631
    // than half the distance to the nearest neighbour on the outside
632
    // (of the putative column).
633
0
    bool left = IsLeftTab();
634
0
    int tab_x = XAtY(mid_y);
635
0
    int gutter_width;
636
0
    int neighbour_gap;
637
0
    finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
638
0
                                       &neighbour_gap);
639
0
    if (debug) {
640
0
      tprintf("Box (%d,%d)->(%d,%d) has gutter %d, ndist %d\n", box.left(), box.bottom(),
641
0
              box.right(), box.top(), gutter_width, neighbour_gap);
642
0
    }
643
    // Now we can make the test.
644
0
    if (neighbour_gap * kGutterToNeighbourRatio <= gutter_width) {
645
      // A good box contributes its height to the good_length.
646
0
      good_length += box.top() - box.bottom();
647
0
      gutters.add(gutter_width, 1);
648
      // Two good boxes together contribute the gap between them
649
      // to the good_length as well, as long as the gap is not
650
      // too big.
651
0
      if (prev_good_box != nullptr) {
652
0
        int vertical_gap = box.bottom() - prev_good_box->top();
653
0
        double size1 = sqrt(static_cast<double>(prev_good_box->area()));
654
0
        double size2 = sqrt(static_cast<double>(box.area()));
655
0
        if (vertical_gap < kMaxFillinMultiple * std::min(size1, size2)) {
656
0
          good_length += vertical_gap;
657
0
        }
658
0
        if (debug) {
659
0
          tprintf("Box and prev good, gap=%d, target %g, goodlength=%d\n", vertical_gap,
660
0
                  kMaxFillinMultiple * std::min(size1, size2), good_length);
661
0
        }
662
0
      } else {
663
        // Adjust the start to the first good box.
664
0
        SetYStart(box.bottom());
665
0
      }
666
0
      prev_good_box = &box;
667
0
      if (bbox->flow() == BTFT_TEXT_ON_IMAGE) {
668
0
        text_on_image = true;
669
0
      }
670
0
    } else {
671
      // Get rid of boxes that are not good.
672
0
      if (debug) {
673
0
        tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, ndist %d\n", box.left(), box.bottom(),
674
0
                box.right(), box.top(), gutter_width, neighbour_gap);
675
0
      }
676
0
      it.extract();
677
0
      ++num_deleted_boxes;
678
0
    }
679
0
  }
680
0
  if (debug) {
681
0
    Print("Evaluating:");
682
0
  }
683
  // If there are any good boxes, do it again, except this time get rid of
684
  // boxes that have a gutter that is a small fraction of the mean gutter.
685
  // This filters out ends that run into a coincidental gap in the text.
686
0
  int search_top = endpt_.y();
687
0
  int search_bottom = startpt_.y();
688
0
  int median_gutter = IntCastRounded(gutters.median());
689
0
  if (gutters.get_total() > 0) {
690
0
    prev_good_box = nullptr;
691
0
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
692
0
      BLOBNBOX *bbox = it.data();
693
0
      const TBOX &box = bbox->bounding_box();
694
0
      int mid_y = (box.top() + box.bottom()) / 2;
695
      // A good box is one where the gutter width is at least some constant
696
      // fraction of the mean gutter width.
697
0
      bool left = IsLeftTab();
698
0
      int tab_x = XAtY(mid_y);
699
0
      int max_gutter = kGutterMultiple * mean_height;
700
0
      if (IsRagged()) {
701
        // Ragged edges face a tougher test in that the gap must always be
702
        // within the height of the blob.
703
0
        max_gutter = kGutterToNeighbourRatio * mean_height;
704
0
      }
705
0
      int gutter_width;
706
0
      int neighbour_gap;
707
0
      finder->GutterWidthAndNeighbourGap(tab_x, mean_height, max_gutter, left, bbox, &gutter_width,
708
0
                                         &neighbour_gap);
709
      // Now we can make the test.
710
0
      if (gutter_width >= median_gutter * kMinGutterFraction) {
711
0
        if (prev_good_box == nullptr) {
712
          // Adjust the start to the first good box.
713
0
          SetYStart(box.bottom());
714
0
          search_bottom = box.top();
715
0
        }
716
0
        prev_good_box = &box;
717
0
        search_top = box.bottom();
718
0
      } else {
719
        // Get rid of boxes that are not good.
720
0
        if (debug) {
721
0
          tprintf("Bad Box (%d,%d)->(%d,%d) with gutter %d, mean gutter %d\n", box.left(),
722
0
                  box.bottom(), box.right(), box.top(), gutter_width, median_gutter);
723
0
        }
724
0
        it.extract();
725
0
        ++num_deleted_boxes;
726
0
      }
727
0
    }
728
0
  }
729
  // If there has been a good box, adjust the end.
730
0
  if (prev_good_box != nullptr) {
731
0
    SetYEnd(prev_good_box->top());
732
    // Compute the percentage of the vector that is occupied by good boxes.
733
0
    int length = endpt_.y() - startpt_.y();
734
0
    percent_score_ = 100 * good_length / length;
735
0
    if (num_deleted_boxes > 0) {
736
0
      needs_refit_ = true;
737
0
      FitAndEvaluateIfNeeded(vertical, finder);
738
0
      if (boxes_.empty()) {
739
0
        return;
740
0
      }
741
0
    }
742
    // Test the gutter over the whole vector, instead of just at the boxes.
743
0
    int required_shift;
744
0
    if (search_bottom > search_top) {
745
0
      search_bottom = startpt_.y();
746
0
      search_top = endpt_.y();
747
0
    }
748
0
    double min_gutter_width = kLineCountReciprocal / boxes_.length();
749
0
    min_gutter_width += IsRagged() ? kMinRaggedGutter : kMinAlignedGutter;
750
0
    min_gutter_width *= mean_height;
751
0
    int max_gutter_width = IntCastRounded(min_gutter_width) + 1;
752
0
    if (median_gutter > max_gutter_width) {
753
0
      max_gutter_width = median_gutter;
754
0
    }
755
0
    int gutter_width = finder->GutterWidth(search_bottom, search_top, *this, text_on_image,
756
0
                                           max_gutter_width, &required_shift);
757
0
    if (gutter_width < min_gutter_width) {
758
0
      if (debug) {
759
0
        tprintf("Rejecting bad tab Vector with %d gutter vs %g min\n", gutter_width,
760
0
                min_gutter_width);
761
0
      }
762
0
      boxes_.shallow_clear();
763
0
      percent_score_ = 0;
764
0
    } else if (debug) {
765
0
      tprintf("Final gutter %d, vs limit of %g, required shift = %d\n", gutter_width,
766
0
              min_gutter_width, required_shift);
767
0
    }
768
0
  } else {
769
    // There are no good boxes left, so score is 0.
770
0
    percent_score_ = 0;
771
0
  }
772
773
0
  if (debug) {
774
0
    Print("Evaluation complete:");
775
0
  }
776
0
}
777
778
// (Re)Fit a line to the stored points. Returns false if the line
779
// is degenerate. Although the TabVector code mostly doesn't care about the
780
// direction of lines, XAtY would give silly results for a horizontal line.
781
// The class is mostly aimed at use for vertical lines representing
782
// horizontal tab stops.
783
0
bool TabVector::Fit(ICOORD vertical, bool force_parallel) {
784
0
  needs_refit_ = false;
785
0
  if (boxes_.empty()) {
786
    // Don't refit something with no boxes, as that only happens
787
    // in Evaluate, and we don't want to end up with a zero vector.
788
0
    if (!force_parallel) {
789
0
      return false;
790
0
    }
791
    // If we are forcing parallel, then we just need to set the sort_key_.
792
0
    ICOORD midpt = startpt_;
793
0
    midpt += endpt_;
794
0
    midpt /= 2;
795
0
    sort_key_ = SortKey(vertical, midpt.x(), midpt.y());
796
0
    return startpt_.y() != endpt_.y();
797
0
  }
798
0
  if (!force_parallel && !IsRagged()) {
799
    // Use a fitted line as the vertical.
800
0
    DetLineFit linepoints;
801
0
    BLOBNBOX_C_IT it(&boxes_);
802
    // Fit a line to all the boxes in the list.
803
0
    for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
804
0
      BLOBNBOX *bbox = it.data();
805
0
      const TBOX &box = bbox->bounding_box();
806
0
      int x1 = IsRightTab() ? box.right() : box.left();
807
0
      ICOORD boxpt(x1, box.bottom());
808
0
      linepoints.Add(boxpt);
809
0
      if (it.at_last()) {
810
0
        ICOORD top_pt(x1, box.top());
811
0
        linepoints.Add(top_pt);
812
0
      }
813
0
    }
814
0
    linepoints.Fit(&startpt_, &endpt_);
815
0
    if (startpt_.y() != endpt_.y()) {
816
0
      vertical = endpt_;
817
0
      vertical -= startpt_;
818
0
    }
819
0
  }
820
0
  int start_y = startpt_.y();
821
0
  int end_y = endpt_.y();
822
0
  sort_key_ = IsLeftTab() ? INT32_MAX : -INT32_MAX;
823
0
  BLOBNBOX_C_IT it(&boxes_);
824
  // Choose a line parallel to the vertical such that all boxes are on the
825
  // correct side of it.
826
0
  mean_width_ = 0;
827
0
  int width_count = 0;
828
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
829
0
    BLOBNBOX *bbox = it.data();
830
0
    const TBOX &box = bbox->bounding_box();
831
0
    mean_width_ += box.width();
832
0
    ++width_count;
833
0
    int x1 = IsRightTab() ? box.right() : box.left();
834
    // Test both the bottom and the top, as one will be more extreme, depending
835
    // on the direction of skew.
836
0
    int bottom_y = box.bottom();
837
0
    int top_y = box.top();
838
0
    int key = SortKey(vertical, x1, bottom_y);
839
0
    if (IsLeftTab() == (key < sort_key_)) {
840
0
      sort_key_ = key;
841
0
      startpt_ = ICOORD(x1, bottom_y);
842
0
    }
843
0
    key = SortKey(vertical, x1, top_y);
844
0
    if (IsLeftTab() == (key < sort_key_)) {
845
0
      sort_key_ = key;
846
0
      startpt_ = ICOORD(x1, top_y);
847
0
    }
848
0
    if (it.at_first()) {
849
0
      start_y = bottom_y;
850
0
    }
851
0
    if (it.at_last()) {
852
0
      end_y = top_y;
853
0
    }
854
0
  }
855
0
  if (width_count > 0) {
856
0
    mean_width_ = (mean_width_ + width_count - 1) / width_count;
857
0
  }
858
0
  endpt_ = startpt_ + vertical;
859
0
  needs_evaluation_ = true;
860
0
  if (start_y != end_y) {
861
    // Set the ends of the vector to fully include the first and last blobs.
862
0
    startpt_.set_x(XAtY(vertical, sort_key_, start_y));
863
0
    startpt_.set_y(start_y);
864
0
    endpt_.set_x(XAtY(vertical, sort_key_, end_y));
865
0
    endpt_.set_y(end_y);
866
0
    return true;
867
0
  }
868
0
  return false;
869
0
}
870
871
// Returns the singleton partner if there is one, or nullptr otherwise.
872
0
TabVector *TabVector::GetSinglePartner() {
873
0
  if (!partners_.singleton()) {
874
0
    return nullptr;
875
0
  }
876
0
  TabVector_C_IT partner_it(&partners_);
877
0
  TabVector *partner = partner_it.data();
878
0
  return partner;
879
0
}
880
881
// Return the partner of this TabVector if the vector qualifies as
882
// being a vertical text line, otherwise nullptr.
883
0
TabVector *TabVector::VerticalTextlinePartner() {
884
0
  if (!partners_.singleton()) {
885
0
    return nullptr;
886
0
  }
887
0
  TabVector_C_IT partner_it(&partners_);
888
0
  TabVector *partner = partner_it.data();
889
0
  BLOBNBOX_C_IT box_it1(&boxes_);
890
0
  BLOBNBOX_C_IT box_it2(&partner->boxes_);
891
  // Count how many boxes are also in the other list.
892
  // At the same time, gather the mean width and median vertical gap.
893
0
  if (textord_debug_tabfind > 1) {
894
0
    Print("Testing for vertical text");
895
0
    partner->Print("           partner");
896
0
  }
897
0
  int num_matched = 0;
898
0
  int num_unmatched = 0;
899
0
  int total_widths = 0;
900
0
  int width = startpt().x() - partner->startpt().x();
901
0
  if (width < 0) {
902
0
    width = -width;
903
0
  }
904
0
  STATS gaps(0, width * 2 - 1);
905
0
  BLOBNBOX *prev_bbox = nullptr;
906
0
  box_it2.mark_cycle_pt();
907
0
  for (box_it1.mark_cycle_pt(); !box_it1.cycled_list(); box_it1.forward()) {
908
0
    BLOBNBOX *bbox = box_it1.data();
909
0
    TBOX box = bbox->bounding_box();
910
0
    if (prev_bbox != nullptr) {
911
0
      gaps.add(box.bottom() - prev_bbox->bounding_box().top(), 1);
912
0
    }
913
0
    while (!box_it2.cycled_list() && box_it2.data() != bbox &&
914
0
           box_it2.data()->bounding_box().bottom() < box.bottom()) {
915
0
      box_it2.forward();
916
0
    }
917
0
    if (!box_it2.cycled_list() && box_it2.data() == bbox && bbox->region_type() >= BRT_UNKNOWN &&
918
0
        (prev_bbox == nullptr || prev_bbox->region_type() >= BRT_UNKNOWN)) {
919
0
      ++num_matched;
920
0
    } else {
921
0
      ++num_unmatched;
922
0
    }
923
0
    total_widths += box.width();
924
0
    prev_bbox = bbox;
925
0
  }
926
0
  if (num_unmatched + num_matched == 0) {
927
0
    return nullptr;
928
0
  }
929
0
  double avg_width = total_widths * 1.0 / (num_unmatched + num_matched);
930
0
  double max_gap = textord_tabvector_vertical_gap_fraction * avg_width;
931
0
  int min_box_match =
932
0
      static_cast<int>((num_matched + num_unmatched) * textord_tabvector_vertical_box_ratio);
933
0
  bool is_vertical =
934
0
      (gaps.get_total() > 0 && num_matched >= min_box_match && gaps.median() <= max_gap);
935
0
  if (textord_debug_tabfind > 1) {
936
0
    tprintf(
937
0
        "gaps=%d, matched=%d, unmatched=%d, min_match=%d "
938
0
        "median gap=%.2f, width=%.2f max_gap=%.2f Vertical=%s\n",
939
0
        gaps.get_total(), num_matched, num_unmatched, min_box_match, gaps.median(), avg_width,
940
0
        max_gap, is_vertical ? "Yes" : "No");
941
0
  }
942
0
  return (is_vertical) ? partner : nullptr;
943
0
}
944
945
// The constructor is private.
946
TabVector::TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment,
947
                     BLOBNBOX_CLIST *boxes)
948
0
    : extended_ymin_(extended_ymin)
949
0
    , extended_ymax_(extended_ymax)
950
0
    , sort_key_(0)
951
0
    , percent_score_(0)
952
0
    , mean_width_(0)
953
0
    , needs_refit_(true)
954
0
    , needs_evaluation_(true)
955
0
    , alignment_(alignment)
956
0
    , top_constraints_(nullptr)
957
0
    , bottom_constraints_(nullptr) {
958
0
  BLOBNBOX_C_IT it(&boxes_);
959
0
  it.add_list_after(boxes);
960
0
}
961
962
// Delete this, but first, repoint all the partners to point to
963
// replacement. If replacement is nullptr, then partner relationships
964
// are removed.
965
0
void TabVector::Delete(TabVector *replacement) {
966
0
  TabVector_C_IT it(&partners_);
967
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
968
0
    TabVector *partner = it.data();
969
0
    TabVector_C_IT p_it(&partner->partners_);
970
    // If partner already has replacement in its list, then make
971
    // replacement null, and just remove this TabVector when we find it.
972
0
    TabVector *partner_replacement = replacement;
973
0
    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
974
0
      TabVector *p_partner = p_it.data();
975
0
      if (p_partner == partner_replacement) {
976
0
        partner_replacement = nullptr;
977
0
        break;
978
0
      }
979
0
    }
980
    // Remove all references to this, and replace with replacement if not
981
    // nullptr.
982
0
    for (p_it.mark_cycle_pt(); !p_it.cycled_list(); p_it.forward()) {
983
0
      TabVector *p_partner = p_it.data();
984
0
      if (p_partner == this) {
985
0
        p_it.extract();
986
0
        if (partner_replacement != nullptr) {
987
0
          p_it.add_before_stay_put(partner_replacement);
988
0
        }
989
0
      }
990
0
    }
991
0
    if (partner_replacement != nullptr) {
992
0
      partner_replacement->AddPartner(partner);
993
0
    }
994
0
  }
995
0
  delete this;
996
0
}
997
998
} // namespace tesseract.