Coverage Report

Created: 2025-06-13 07:15

/src/tesseract/src/textord/tabfind.cpp
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        tabfind.cpp
3
// Description: Subclass of BBGrid to find vertically aligned blobs.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2008, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifdef HAVE_CONFIG_H
20
#  include "config_auto.h"
21
#endif
22
23
#include "alignedblob.h"
24
#include "colpartitiongrid.h"
25
#include "detlinefit.h"
26
#include "host.h" // for NearlyEqual
27
#include "linefind.h"
28
#include "tabfind.h"
29
30
#include <algorithm>
31
32
namespace tesseract {
33
34
// Multiple of box size to search for initial gaps.
35
const int kTabRadiusFactor = 5;
36
// Min and Max multiple of height to search vertically when extrapolating.
37
const int kMinVerticalSearch = 3;
38
const int kMaxVerticalSearch = 12;
39
const int kMaxRaggedSearch = 25;
40
// Minimum number of lines in a column width to make it interesting.
41
const int kMinLinesInColumn = 10;
42
// Minimum width of a column to be interesting.
43
const int kMinColumnWidth = 200;
44
// Minimum fraction of total column lines for a column to be interesting.
45
const double kMinFractionalLinesInColumn = 0.125;
46
// Fraction of height used as alignment tolerance for aligned tabs.
47
const double kAlignedFraction = 0.03125;
48
// Maximum gutter width (in absolute inch) that we care about
49
const double kMaxGutterWidthAbsolute = 2.00;
50
// Multiplier of gridsize for min gutter width of TT_MAYBE_RAGGED blobs.
51
const int kRaggedGutterMultiple = 5;
52
// Min aspect ratio of tall objects to be considered a separator line.
53
// (These will be ignored in searching the gutter for obstructions.)
54
const double kLineFragmentAspectRatio = 10.0;
55
// Min number of points to accept after evaluation.
56
const int kMinEvaluatedTabs = 3;
57
// Up to 30 degrees is allowed for rotations of diacritic blobs.
58
// Keep this value slightly larger than kCosSmallAngle in blobbox.cpp
59
// so that the assert there never fails.
60
const double kCosMaxSkewAngle = 0.866025;
61
62
static BOOL_VAR(textord_tabfind_show_initialtabs, false, "Show tab candidates");
63
static BOOL_VAR(textord_tabfind_show_finaltabs, false, "Show tab vectors");
64
65
TabFind::TabFind(int gridsize, const ICOORD &bleft, const ICOORD &tright, TabVector_LIST *vlines,
66
                 int vertical_x, int vertical_y, int resolution)
67
0
    : AlignedBlob(gridsize, bleft, tright)
68
0
    , resolution_(resolution)
69
0
    , image_origin_(0, tright.y() - 1)
70
0
    , v_it_(&vectors_)
71
0
    , width_cb_(nullptr) {
72
0
  v_it_.add_list_after(vlines);
73
0
  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
74
0
  using namespace std::placeholders; // for _1
75
0
  width_cb_ = std::bind(&TabFind::CommonWidth, this, _1);
76
0
}
77
78
0
TabFind::~TabFind() = default;
79
80
///////////////// PUBLIC functions (mostly used by TabVector). //////////////
81
82
// Insert a list of blobs into the given grid (not necessarily this).
83
// If take_ownership is true, then the blobs are removed from the source list.
84
// See InsertBlob for the other arguments.
85
// It would seem to make more sense to swap this and grid, but this way
86
// around allows grid to not be derived from TabFind, eg a ColPartitionGrid,
87
// while the grid that provides the tab stops(this) has to be derived from
88
// TabFind.
89
void TabFind::InsertBlobsToGrid(bool h_spread, bool v_spread, BLOBNBOX_LIST *blobs,
90
0
                                BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> *grid) {
91
0
  BLOBNBOX_IT blob_it(blobs);
92
0
  int b_count = 0;
93
0
  int reject_count = 0;
94
0
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
95
0
    BLOBNBOX *blob = blob_it.data();
96
    //    if (InsertBlob(true, true, blob, grid)) {
97
0
    if (InsertBlob(h_spread, v_spread, blob, grid)) {
98
0
      ++b_count;
99
0
    } else {
100
0
      ++reject_count;
101
0
    }
102
0
  }
103
0
  if (textord_debug_tabfind) {
104
0
    tprintf("Inserted %d blobs into grid, %d rejected.\n", b_count, reject_count);
105
0
  }
106
0
}
107
108
// Insert a single blob into the given grid (not necessarily this).
109
// If h_spread, then all cells covered horizontally by the box are
110
// used, otherwise, just the bottom-left. Similarly for v_spread.
111
// A side effect is that the left and right rule edges of the blob are
112
// set according to the tab vectors in this (not grid).
113
bool TabFind::InsertBlob(bool h_spread, bool v_spread, BLOBNBOX *blob,
114
0
                         BBGrid<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> *grid) {
115
0
  TBOX box = blob->bounding_box();
116
0
  blob->set_left_rule(LeftEdgeForBox(box, false, false));
117
0
  blob->set_right_rule(RightEdgeForBox(box, false, false));
118
0
  blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
119
0
  blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
120
0
  if (blob->joined_to_prev()) {
121
0
    return false;
122
0
  }
123
0
  grid->InsertBBox(h_spread, v_spread, blob);
124
0
  return true;
125
0
}
126
127
// Calls SetBlobRuleEdges for all the blobs in the given block.
128
0
void TabFind::SetBlockRuleEdges(TO_BLOCK *block) {
129
0
  SetBlobRuleEdges(&block->blobs);
130
0
  SetBlobRuleEdges(&block->small_blobs);
131
0
  SetBlobRuleEdges(&block->noise_blobs);
132
0
  SetBlobRuleEdges(&block->large_blobs);
133
0
}
134
135
// Sets the left and right rule and crossing_rules for the blobs in the given
136
// list by fiding the next outermost tabvectors for each blob.
137
0
void TabFind::SetBlobRuleEdges(BLOBNBOX_LIST *blobs) {
138
0
  BLOBNBOX_IT blob_it(blobs);
139
0
  for (blob_it.mark_cycle_pt(); !blob_it.cycled_list(); blob_it.forward()) {
140
0
    BLOBNBOX *blob = blob_it.data();
141
0
    TBOX box = blob->bounding_box();
142
0
    blob->set_left_rule(LeftEdgeForBox(box, false, false));
143
0
    blob->set_right_rule(RightEdgeForBox(box, false, false));
144
0
    blob->set_left_crossing_rule(LeftEdgeForBox(box, true, false));
145
0
    blob->set_right_crossing_rule(RightEdgeForBox(box, true, false));
146
0
  }
147
0
}
148
149
// Returns the gutter width of the given TabVector between the given y limits.
150
// Also returns x-shift to be added to the vector to clear any intersecting
151
// blobs. The shift is deducted from the returned gutter.
152
// If ignore_unmergeables is true, then blobs of UnMergeableType are
153
// ignored as if they don't exist. (Used for text on image.)
154
// max_gutter_width is used as the maximum width worth searching for in case
155
// there is nothing near the TabVector.
156
int TabFind::GutterWidth(int bottom_y, int top_y, const TabVector &v, bool ignore_unmergeables,
157
0
                         int max_gutter_width, int *required_shift) {
158
0
  bool right_to_left = v.IsLeftTab();
159
0
  int bottom_x = v.XAtY(bottom_y);
160
0
  int top_x = v.XAtY(top_y);
161
0
  int start_x = right_to_left ? std::max(top_x, bottom_x) : std::min(top_x, bottom_x);
162
0
  BlobGridSearch sidesearch(this);
163
0
  sidesearch.StartSideSearch(start_x, bottom_y, top_y);
164
0
  int min_gap = max_gutter_width;
165
0
  *required_shift = 0;
166
0
  BLOBNBOX *blob = nullptr;
167
0
  while ((blob = sidesearch.NextSideSearch(right_to_left)) != nullptr) {
168
0
    const TBOX &box = blob->bounding_box();
169
0
    if (box.bottom() >= top_y || box.top() <= bottom_y) {
170
0
      continue; // Doesn't overlap enough.
171
0
    }
172
0
    if (box.height() >= gridsize() * 2 && box.height() > box.width() * kLineFragmentAspectRatio) {
173
      // Skip likely separator line residue.
174
0
      continue;
175
0
    }
176
0
    if (ignore_unmergeables && BLOBNBOX::UnMergeableType(blob->region_type())) {
177
0
      continue; // Skip non-text if required.
178
0
    }
179
0
    int mid_y = (box.bottom() + box.top()) / 2;
180
    // We use the x at the mid-y so that the required_shift guarantees
181
    // to clear all the blobs on the tab-stop. If we use the min/max
182
    // of x at top/bottom of the blob, then exactness would be required,
183
    // which is not a good thing.
184
0
    int tab_x = v.XAtY(mid_y);
185
0
    int gap;
186
0
    if (right_to_left) {
187
0
      gap = tab_x - box.right();
188
0
      if (gap < 0 && box.left() - tab_x < *required_shift) {
189
0
        *required_shift = box.left() - tab_x;
190
0
      }
191
0
    } else {
192
0
      gap = box.left() - tab_x;
193
0
      if (gap < 0 && box.right() - tab_x > *required_shift) {
194
0
        *required_shift = box.right() - tab_x;
195
0
      }
196
0
    }
197
0
    if (gap > 0 && gap < min_gap) {
198
0
      min_gap = gap;
199
0
    }
200
0
  }
201
  // Result may be negative, in which case,  this is a really bad tabstop.
202
0
  return min_gap - abs(*required_shift);
203
0
}
204
205
// Find the gutter width and distance to inner neighbour for the given blob.
206
void TabFind::GutterWidthAndNeighbourGap(int tab_x, int mean_height, int max_gutter, bool left,
207
0
                                         BLOBNBOX *bbox, int *gutter_width, int *neighbour_gap) {
208
0
  const TBOX &box = bbox->bounding_box();
209
  // The gutter and internal sides of the box.
210
0
  int gutter_x = left ? box.left() : box.right();
211
0
  int internal_x = left ? box.right() : box.left();
212
  // On ragged edges, the gutter side of the box is away from the tabstop.
213
0
  int tab_gap = left ? gutter_x - tab_x : tab_x - gutter_x;
214
0
  *gutter_width = max_gutter;
215
  // If the box is away from the tabstop, we need to increase
216
  // the allowed gutter width.
217
0
  if (tab_gap > 0) {
218
0
    *gutter_width += tab_gap;
219
0
  }
220
0
  bool debug = WithinTestRegion(2, box.left(), box.bottom());
221
0
  if (debug) {
222
0
    tprintf("Looking in gutter\n");
223
0
  }
224
  // Find the nearest blob on the outside of the column.
225
0
  BLOBNBOX *gutter_bbox = AdjacentBlob(bbox, left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
226
0
                                       *gutter_width, box.top(), box.bottom());
227
0
  if (gutter_bbox != nullptr) {
228
0
    const TBOX &gutter_box = gutter_bbox->bounding_box();
229
0
    *gutter_width = left ? tab_x - gutter_box.right() : gutter_box.left() - tab_x;
230
0
  }
231
0
  if (*gutter_width >= max_gutter) {
232
    // If there is no box because a tab was in the way, get the tab coord.
233
0
    TBOX gutter_box(box);
234
0
    if (left) {
235
0
      gutter_box.set_left(tab_x - max_gutter - 1);
236
0
      gutter_box.set_right(tab_x - max_gutter);
237
0
      int tab_gutter = RightEdgeForBox(gutter_box, true, false);
238
0
      if (tab_gutter < tab_x - 1) {
239
0
        *gutter_width = tab_x - tab_gutter;
240
0
      }
241
0
    } else {
242
0
      gutter_box.set_left(tab_x + max_gutter);
243
0
      gutter_box.set_right(tab_x + max_gutter + 1);
244
0
      int tab_gutter = LeftEdgeForBox(gutter_box, true, false);
245
0
      if (tab_gutter > tab_x + 1) {
246
0
        *gutter_width = tab_gutter - tab_x;
247
0
      }
248
0
    }
249
0
  }
250
0
  if (*gutter_width > max_gutter) {
251
0
    *gutter_width = max_gutter;
252
0
  }
253
  // Now look for a neighbour on the inside.
254
0
  if (debug) {
255
0
    tprintf("Looking for neighbour\n");
256
0
  }
257
0
  BLOBNBOX *neighbour = AdjacentBlob(bbox, !left, bbox->flow() == BTFT_TEXT_ON_IMAGE, 0.0,
258
0
                                     *gutter_width, box.top(), box.bottom());
259
0
  int neighbour_edge = left ? RightEdgeForBox(box, true, false) : LeftEdgeForBox(box, true, false);
260
0
  if (neighbour != nullptr) {
261
0
    const TBOX &n_box = neighbour->bounding_box();
262
0
    if (debug) {
263
0
      tprintf("Found neighbour:");
264
0
      n_box.print();
265
0
    }
266
0
    if (left && n_box.left() < neighbour_edge) {
267
0
      neighbour_edge = n_box.left();
268
0
    } else if (!left && n_box.right() > neighbour_edge) {
269
0
      neighbour_edge = n_box.right();
270
0
    }
271
0
  }
272
0
  *neighbour_gap = left ? neighbour_edge - internal_x : internal_x - neighbour_edge;
273
0
}
274
275
// Return the x-coord that corresponds to the right edge for the given
276
// box. If there is a rule line to the right that vertically overlaps it,
277
// then return the x-coord of the rule line, otherwise return the right
278
// edge of the page. For details see RightTabForBox below.
279
0
int TabFind::RightEdgeForBox(const TBOX &box, bool crossing, bool extended) {
280
0
  TabVector *v = RightTabForBox(box, crossing, extended);
281
0
  return v == nullptr ? tright_.x() : v->XAtY((box.top() + box.bottom()) / 2);
282
0
}
283
// As RightEdgeForBox, but finds the left Edge instead.
284
0
int TabFind::LeftEdgeForBox(const TBOX &box, bool crossing, bool extended) {
285
0
  TabVector *v = LeftTabForBox(box, crossing, extended);
286
0
  return v == nullptr ? bleft_.x() : v->XAtY((box.top() + box.bottom()) / 2);
287
0
}
288
289
// This comment documents how this function works.
290
// For its purpose and arguments, see the comment in tabfind.h.
291
// TabVectors are stored sorted by perpendicular distance of middle from
292
// the global mean vertical vector. Since the individual vectors can have
293
// differing directions, their XAtY for a given y is not necessarily in the
294
// right order. Therefore the search has to be run with a margin.
295
// The middle of a vector that passes through (x,y) cannot be higher than
296
// halfway from y to the top, or lower than halfway from y to the bottom
297
// of the coordinate range; therefore, the search margin is the range of
298
// sort keys between these halfway points. Any vector with a sort key greater
299
// than the upper margin must be to the right of x at y, and likewise any
300
// vector with a sort key less than the lower margin must pass to the left
301
// of x at y.
302
0
TabVector *TabFind::RightTabForBox(const TBOX &box, bool crossing, bool extended) {
303
0
  if (v_it_.empty()) {
304
0
    return nullptr;
305
0
  }
306
0
  int top_y = box.top();
307
0
  int bottom_y = box.bottom();
308
0
  int mid_y = (top_y + bottom_y) / 2;
309
0
  int right = crossing ? (box.left() + box.right()) / 2 : box.right();
310
0
  int min_key, max_key;
311
0
  SetupTabSearch(right, mid_y, &min_key, &max_key);
312
  // Position the iterator at the first TabVector with sort_key >= min_key.
313
0
  while (!v_it_.at_first() && v_it_.data()->sort_key() >= min_key) {
314
0
    v_it_.backward();
315
0
  }
316
0
  while (!v_it_.at_last() && v_it_.data()->sort_key() < min_key) {
317
0
    v_it_.forward();
318
0
  }
319
  // Find the leftmost tab vector that overlaps and has XAtY(mid_y) >= right.
320
0
  TabVector *best_v = nullptr;
321
0
  int best_x = -1;
322
0
  int key_limit = -1;
323
0
  do {
324
0
    TabVector *v = v_it_.data();
325
0
    int x = v->XAtY(mid_y);
326
0
    if (x >= right && (v->VOverlap(top_y, bottom_y) > 0 ||
327
0
                       (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
328
0
      if (best_v == nullptr || x < best_x) {
329
0
        best_v = v;
330
0
        best_x = x;
331
        // We can guarantee that no better vector can be found if the
332
        // sort key exceeds that of the best by max_key - min_key.
333
0
        key_limit = v->sort_key() + max_key - min_key;
334
0
      }
335
0
    }
336
    // Break when the search is done to avoid wrapping the iterator and
337
    // thereby potentially slowing the next search.
338
0
    if (v_it_.at_last() || (best_v != nullptr && v->sort_key() > key_limit)) {
339
0
      break; // Prevent restarting list for next call.
340
0
    }
341
0
    v_it_.forward();
342
0
  } while (!v_it_.at_first());
343
0
  return best_v;
344
0
}
345
346
// As RightTabForBox, but finds the left TabVector instead.
347
0
TabVector *TabFind::LeftTabForBox(const TBOX &box, bool crossing, bool extended) {
348
0
  if (v_it_.empty()) {
349
0
    return nullptr;
350
0
  }
351
0
  int top_y = box.top();
352
0
  int bottom_y = box.bottom();
353
0
  int mid_y = (top_y + bottom_y) / 2;
354
0
  int left = crossing ? (box.left() + box.right()) / 2 : box.left();
355
0
  int min_key, max_key;
356
0
  SetupTabSearch(left, mid_y, &min_key, &max_key);
357
  // Position the iterator at the last TabVector with sort_key <= max_key.
358
0
  while (!v_it_.at_last() && v_it_.data()->sort_key() <= max_key) {
359
0
    v_it_.forward();
360
0
  }
361
0
  while (!v_it_.at_first() && v_it_.data()->sort_key() > max_key) {
362
0
    v_it_.backward();
363
0
  }
364
  // Find the rightmost tab vector that overlaps and has XAtY(mid_y) <= left.
365
0
  TabVector *best_v = nullptr;
366
0
  int best_x = -1;
367
0
  int key_limit = -1;
368
0
  do {
369
0
    TabVector *v = v_it_.data();
370
0
    int x = v->XAtY(mid_y);
371
0
    if (x <= left && (v->VOverlap(top_y, bottom_y) > 0 ||
372
0
                      (extended && v->ExtendedOverlap(top_y, bottom_y) > 0))) {
373
0
      if (best_v == nullptr || x > best_x) {
374
0
        best_v = v;
375
0
        best_x = x;
376
        // We can guarantee that no better vector can be found if the
377
        // sort key is less than that of the best by max_key - min_key.
378
0
        key_limit = v->sort_key() - (max_key - min_key);
379
0
      }
380
0
    }
381
    // Break when the search is done to avoid wrapping the iterator and
382
    // thereby potentially slowing the next search.
383
0
    if (v_it_.at_first() || (best_v != nullptr && v->sort_key() < key_limit)) {
384
0
      break; // Prevent restarting list for next call.
385
0
    }
386
0
    v_it_.backward();
387
0
  } while (!v_it_.at_last());
388
0
  return best_v;
389
0
}
390
391
// Return true if the given width is close to one of the common
392
// widths in column_widths_.
393
0
bool TabFind::CommonWidth(int width) {
394
0
  width /= kColumnWidthFactor;
395
0
  ICOORDELT_IT it(&column_widths_);
396
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
397
0
    ICOORDELT *w = it.data();
398
0
    if (w->x() - 1 <= width && width <= w->y() + 1) {
399
0
      return true;
400
0
    }
401
0
  }
402
0
  return false;
403
0
}
404
405
// Return true if the sizes are more than a
406
// factor of 2 different.
407
0
bool TabFind::DifferentSizes(int size1, int size2) {
408
0
  return size1 > size2 * 2 || size2 > size1 * 2;
409
0
}
410
411
// Return true if the sizes are more than a
412
// factor of 5 different.
413
0
bool TabFind::VeryDifferentSizes(int size1, int size2) {
414
0
  return size1 > size2 * 5 || size2 > size1 * 5;
415
0
}
416
417
///////////////// PROTECTED functions (used by ColumnFinder). //////////////
418
419
// Top-level function to find TabVectors in an input page block.
420
// Returns false if the detected skew angle is impossible.
421
// Applies the detected skew angle to deskew the tabs, blobs and part_grid.
422
bool TabFind::FindTabVectors(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block,
423
                             int min_gutter_width, double tabfind_aligned_gap_fraction,
424
0
                             ColPartitionGrid *part_grid, FCOORD *deskew, FCOORD *reskew) {
425
0
  ScrollView *tab_win =
426
0
      FindInitialTabVectors(image_blobs, min_gutter_width, tabfind_aligned_gap_fraction, block);
427
0
  ComputeColumnWidths(tab_win, part_grid);
428
0
  TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
429
0
  SortVectors();
430
0
  CleanupTabs();
431
0
  if (!Deskew(hlines, image_blobs, block, deskew, reskew)) {
432
0
    return false; // Skew angle is too large.
433
0
  }
434
0
  part_grid->Deskew(*deskew);
435
0
  ApplyTabConstraints();
436
#ifndef GRAPHICS_DISABLED
437
  if (textord_tabfind_show_finaltabs) {
438
    tab_win = MakeWindow(640, 50, "FinalTabs");
439
    DisplayBoxes(tab_win);
440
    DisplayTabs("FinalTabs", tab_win);
441
    tab_win = DisplayTabVectors(tab_win);
442
  }
443
#endif // !GRAPHICS_DISABLED
444
0
  return true;
445
0
}
446
447
// Top-level function to not find TabVectors in an input page block,
448
// but setup for single column mode.
449
void TabFind::DontFindTabVectors(BLOBNBOX_LIST *image_blobs, TO_BLOCK *block, FCOORD *deskew,
450
0
                                 FCOORD *reskew) {
451
0
  InsertBlobsToGrid(false, false, image_blobs, this);
452
0
  InsertBlobsToGrid(true, false, &block->blobs, this);
453
0
  deskew->set_x(1.0f);
454
0
  deskew->set_y(0.0f);
455
0
  reskew->set_x(1.0f);
456
0
  reskew->set_y(0.0f);
457
0
}
458
459
// Cleans up the lists of blobs in the block ready for use by TabFind.
460
// Large blobs that look like text are moved to the main blobs list.
461
// Main blobs that are superseded by the image blobs are deleted.
462
0
void TabFind::TidyBlobs(TO_BLOCK *block) {
463
0
  BLOBNBOX_IT large_it = &block->large_blobs;
464
0
  BLOBNBOX_IT blob_it = &block->blobs;
465
0
  int b_count = 0;
466
0
  for (large_it.mark_cycle_pt(); !large_it.cycled_list(); large_it.forward()) {
467
0
    BLOBNBOX *large_blob = large_it.data();
468
0
    if (large_blob->owner() != nullptr) {
469
0
      blob_it.add_to_end(large_it.extract());
470
0
      ++b_count;
471
0
    }
472
0
  }
473
0
  if (textord_debug_tabfind) {
474
0
    tprintf("Moved %d large blobs to normal list\n", b_count);
475
#ifndef GRAPHICS_DISABLED
476
    ScrollView *rej_win = MakeWindow(500, 300, "Image blobs");
477
    block->plot_graded_blobs(rej_win);
478
    block->plot_noise_blobs(rej_win);
479
    rej_win->Update();
480
#endif // !GRAPHICS_DISABLED
481
0
  }
482
0
  block->DeleteUnownedNoise();
483
0
}
484
485
// Helper function to setup search limits for *TabForBox.
486
0
void TabFind::SetupTabSearch(int x, int y, int *min_key, int *max_key) {
487
0
  int key1 = TabVector::SortKey(vertical_skew_, x, (y + tright_.y()) / 2);
488
0
  int key2 = TabVector::SortKey(vertical_skew_, x, (y + bleft_.y()) / 2);
489
0
  *min_key = std::min(key1, key2);
490
0
  *max_key = std::max(key1, key2);
491
0
}
492
493
#ifndef GRAPHICS_DISABLED
494
495
ScrollView *TabFind::DisplayTabVectors(ScrollView *tab_win) {
496
  // For every vector, display it.
497
  TabVector_IT it(&vectors_);
498
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
499
    TabVector *vector = it.data();
500
    vector->Display(tab_win);
501
  }
502
  tab_win->Update();
503
  return tab_win;
504
}
505
506
#endif
507
508
// PRIVATE CODE.
509
//
510
// First part of FindTabVectors, which may be used twice if the text
511
// is mostly of vertical alignment.
512
ScrollView *TabFind::FindInitialTabVectors(BLOBNBOX_LIST *image_blobs, int min_gutter_width,
513
0
                                           double tabfind_aligned_gap_fraction, TO_BLOCK *block) {
514
#ifndef GRAPHICS_DISABLED
515
  if (textord_tabfind_show_initialtabs) {
516
    ScrollView *line_win = MakeWindow(0, 0, "VerticalLines");
517
    line_win = DisplayTabVectors(line_win);
518
  }
519
#endif
520
  // Prepare the grid.
521
0
  if (image_blobs != nullptr) {
522
0
    InsertBlobsToGrid(true, false, image_blobs, this);
523
0
  }
524
0
  InsertBlobsToGrid(true, false, &block->blobs, this);
525
0
  ScrollView *initial_win = FindTabBoxes(min_gutter_width, tabfind_aligned_gap_fraction);
526
0
  FindAllTabVectors(min_gutter_width);
527
528
0
  TabVector::MergeSimilarTabVectors(vertical_skew_, &vectors_, this);
529
0
  SortVectors();
530
0
  EvaluateTabs();
531
#ifndef GRAPHICS_DISABLED
532
  if (textord_tabfind_show_initialtabs && initial_win != nullptr) {
533
    initial_win = DisplayTabVectors(initial_win);
534
  }
535
#endif
536
0
  MarkVerticalText();
537
0
  return initial_win;
538
0
}
539
540
#ifndef GRAPHICS_DISABLED
541
542
// Helper displays all the boxes in the given vector on the given window.
543
static void DisplayBoxVector(const std::vector<BLOBNBOX *> &boxes, ScrollView *win) {
544
  for (auto boxe : boxes) {
545
    TBOX box = boxe->bounding_box();
546
    int left_x = box.left();
547
    int right_x = box.right();
548
    int top_y = box.top();
549
    int bottom_y = box.bottom();
550
    ScrollView::Color box_color = boxe->BoxColor();
551
    win->Pen(box_color);
552
    win->Rectangle(left_x, bottom_y, right_x, top_y);
553
  }
554
  win->Update();
555
}
556
557
#endif // !GRAPHICS_DISABLED
558
559
// For each box in the grid, decide whether it is a candidate tab-stop,
560
// and if so add it to the left/right tab boxes.
561
0
ScrollView *TabFind::FindTabBoxes(int min_gutter_width, double tabfind_aligned_gap_fraction) {
562
0
  left_tab_boxes_.clear();
563
0
  right_tab_boxes_.clear();
564
  // For every bbox in the grid, determine whether it uses a tab on an edge.
565
0
  BlobGridSearch gsearch(this);
566
0
  gsearch.StartFullSearch();
567
0
  BLOBNBOX *bbox;
568
0
  while ((bbox = gsearch.NextFullSearch()) != nullptr) {
569
0
    if (TestBoxForTabs(bbox, min_gutter_width, tabfind_aligned_gap_fraction)) {
570
      // If it is any kind of tab, insert it into the vectors.
571
0
      if (bbox->left_tab_type() != TT_NONE) {
572
0
        left_tab_boxes_.push_back(bbox);
573
0
      }
574
0
      if (bbox->right_tab_type() != TT_NONE) {
575
0
        right_tab_boxes_.push_back(bbox);
576
0
      }
577
0
    }
578
0
  }
579
  // Sort left tabs by left and right by right to see the outermost one first
580
  // on a ragged tab.
581
0
  std::sort(left_tab_boxes_.begin(), left_tab_boxes_.end(), StdSortByBoxLeft<BLOBNBOX>);
582
0
  std::sort(right_tab_boxes_.begin(), right_tab_boxes_.end(), StdSortRightToLeft<BLOBNBOX>);
583
0
  ScrollView *tab_win = nullptr;
584
#ifndef GRAPHICS_DISABLED
585
  if (textord_tabfind_show_initialtabs) {
586
    tab_win = MakeWindow(0, 100, "InitialTabs");
587
    tab_win->Pen(ScrollView::BLUE);
588
    tab_win->Brush(ScrollView::NONE);
589
    // Display the left and right tab boxes.
590
    DisplayBoxVector(left_tab_boxes_, tab_win);
591
    DisplayBoxVector(right_tab_boxes_, tab_win);
592
    tab_win = DisplayTabs("Tabs", tab_win);
593
  }
594
#endif // !GRAPHICS_DISABLED
595
0
  return tab_win;
596
0
}
597
598
bool TabFind::TestBoxForTabs(BLOBNBOX *bbox, int min_gutter_width,
599
0
                             double tabfind_aligned_gap_fraction) {
600
0
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> radsearch(this);
601
0
  TBOX box = bbox->bounding_box();
602
  // If there are separator lines, get the column edges.
603
0
  int left_column_edge = bbox->left_rule();
604
0
  int right_column_edge = bbox->right_rule();
605
  // The edges of the bounding box of the blob being processed.
606
0
  int left_x = box.left();
607
0
  int right_x = box.right();
608
0
  int top_y = box.top();
609
0
  int bottom_y = box.bottom();
610
0
  int height = box.height();
611
0
  bool debug = WithinTestRegion(3, left_x, top_y);
612
0
  if (debug) {
613
0
    tprintf("Column edges for blob at (%d,%d)->(%d,%d) are [%d, %d]\n", left_x, top_y, right_x,
614
0
            bottom_y, left_column_edge, right_column_edge);
615
0
  }
616
  // Compute a search radius based on a multiple of the height.
617
0
  int radius = (height * kTabRadiusFactor + gridsize_ - 1) / gridsize_;
618
0
  radsearch.StartRadSearch((left_x + right_x) / 2, (top_y + bottom_y) / 2, radius);
619
  // In Vertical Page mode, once we have an estimate of the vertical line
620
  // spacing, the minimum amount of gutter space before a possible tab is
621
  // increased under the assumption that column partition is always larger
622
  // than line spacing.
623
0
  int min_spacing = static_cast<int>(height * tabfind_aligned_gap_fraction);
624
0
  if (min_gutter_width > min_spacing) {
625
0
    min_spacing = min_gutter_width;
626
0
  }
627
0
  int min_ragged_gutter = kRaggedGutterMultiple * gridsize();
628
0
  if (min_gutter_width > min_ragged_gutter) {
629
0
    min_ragged_gutter = min_gutter_width;
630
0
  }
631
0
  int target_right = left_x - min_spacing;
632
0
  int target_left = right_x + min_spacing;
633
  // We will be evaluating whether the left edge could be a left tab, and
634
  // whether the right edge could be a right tab.
635
  // A box can be a tab if its bool is_(left/right)_tab remains true, meaning
636
  // that no blobs have been found in the gutter during the radial search.
637
  // A box can also be a tab if there are objects in the gutter only above
638
  // or only below, and there are aligned objects on the opposite side, but
639
  // not too many unaligned objects. The maybe_(left/right)_tab_up counts
640
  // aligned objects above and negatively counts unaligned objects above,
641
  // and is set to -INT32_MAX if a gutter object is found above.
642
  // The other 3 maybe ints work similarly for the other sides.
643
  // These conditions are very strict, to minimize false positives, and really
644
  // only aligned tabs and outermost ragged tab blobs will qualify, so we
645
  // also have maybe_ragged_left/right with less stringent rules.
646
  // A blob that is maybe_ragged_left/right will be further qualified later,
647
  // using the min_ragged_gutter.
648
0
  bool is_left_tab = true;
649
0
  bool is_right_tab = true;
650
0
  bool maybe_ragged_left = true;
651
0
  bool maybe_ragged_right = true;
652
0
  int maybe_left_tab_up = 0;
653
0
  int maybe_right_tab_up = 0;
654
0
  int maybe_left_tab_down = 0;
655
0
  int maybe_right_tab_down = 0;
656
0
  if (bbox->leader_on_left()) {
657
0
    is_left_tab = false;
658
0
    maybe_ragged_left = false;
659
0
    maybe_left_tab_up = -INT32_MAX;
660
0
    maybe_left_tab_down = -INT32_MAX;
661
0
  }
662
0
  if (bbox->leader_on_right()) {
663
0
    is_right_tab = false;
664
0
    maybe_ragged_right = false;
665
0
    maybe_right_tab_up = -INT32_MAX;
666
0
    maybe_right_tab_down = -INT32_MAX;
667
0
  }
668
0
  int alignment_tolerance = static_cast<int>(resolution_ * kAlignedFraction);
669
0
  BLOBNBOX *neighbour = nullptr;
670
0
  while ((neighbour = radsearch.NextRadSearch()) != nullptr) {
671
0
    if (neighbour == bbox) {
672
0
      continue;
673
0
    }
674
0
    TBOX nbox = neighbour->bounding_box();
675
0
    int n_left = nbox.left();
676
0
    int n_right = nbox.right();
677
0
    if (debug) {
678
0
      tprintf("Neighbour at (%d,%d)->(%d,%d)\n", n_left, nbox.bottom(), n_right, nbox.top());
679
0
    }
680
    // If the neighbouring blob is the wrong side of a separator line, then it
681
    // "doesn't exist" as far as we are concerned.
682
0
    if (n_right > right_column_edge || n_left < left_column_edge ||
683
0
        left_x < neighbour->left_rule() || right_x > neighbour->right_rule()) {
684
0
      continue; // Separator line in the way.
685
0
    }
686
0
    int n_mid_x = (n_left + n_right) / 2;
687
0
    int n_mid_y = (nbox.top() + nbox.bottom()) / 2;
688
0
    if (n_mid_x <= left_x && n_right >= target_right) {
689
0
      if (debug) {
690
0
        tprintf("Not a left tab\n");
691
0
      }
692
0
      is_left_tab = false;
693
0
      if (n_mid_y < top_y) {
694
0
        maybe_left_tab_down = -INT32_MAX;
695
0
      }
696
0
      if (n_mid_y > bottom_y) {
697
0
        maybe_left_tab_up = -INT32_MAX;
698
0
      }
699
0
    } else if (NearlyEqual(left_x, n_left, alignment_tolerance)) {
700
0
      if (debug) {
701
0
        tprintf("Maybe a left tab\n");
702
0
      }
703
0
      if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
704
0
        ++maybe_left_tab_up;
705
0
      }
706
0
      if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
707
0
        ++maybe_left_tab_down;
708
0
      }
709
0
    } else if (n_left < left_x && n_right >= left_x) {
710
      // Overlaps but not aligned so negative points on a maybe.
711
0
      if (debug) {
712
0
        tprintf("Maybe Not a left tab\n");
713
0
      }
714
0
      if (n_mid_y > top_y && maybe_left_tab_up > -INT32_MAX) {
715
0
        --maybe_left_tab_up;
716
0
      }
717
0
      if (n_mid_y < bottom_y && maybe_left_tab_down > -INT32_MAX) {
718
0
        --maybe_left_tab_down;
719
0
      }
720
0
    }
721
0
    if (n_left < left_x && nbox.y_overlap(box) && n_right >= target_right) {
722
0
      maybe_ragged_left = false;
723
0
      if (debug) {
724
0
        tprintf("Not a ragged left\n");
725
0
      }
726
0
    }
727
0
    if (n_mid_x >= right_x && n_left <= target_left) {
728
0
      if (debug) {
729
0
        tprintf("Not a right tab\n");
730
0
      }
731
0
      is_right_tab = false;
732
0
      if (n_mid_y < top_y) {
733
0
        maybe_right_tab_down = -INT32_MAX;
734
0
      }
735
0
      if (n_mid_y > bottom_y) {
736
0
        maybe_right_tab_up = -INT32_MAX;
737
0
      }
738
0
    } else if (NearlyEqual(right_x, n_right, alignment_tolerance)) {
739
0
      if (debug) {
740
0
        tprintf("Maybe a right tab\n");
741
0
      }
742
0
      if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
743
0
        ++maybe_right_tab_up;
744
0
      }
745
0
      if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
746
0
        ++maybe_right_tab_down;
747
0
      }
748
0
    } else if (n_right > right_x && n_left <= right_x) {
749
      // Overlaps but not aligned so negative points on a maybe.
750
0
      if (debug) {
751
0
        tprintf("Maybe Not a right tab\n");
752
0
      }
753
0
      if (n_mid_y > top_y && maybe_right_tab_up > -INT32_MAX) {
754
0
        --maybe_right_tab_up;
755
0
      }
756
0
      if (n_mid_y < bottom_y && maybe_right_tab_down > -INT32_MAX) {
757
0
        --maybe_right_tab_down;
758
0
      }
759
0
    }
760
0
    if (n_right > right_x && nbox.y_overlap(box) && n_left <= target_left) {
761
0
      maybe_ragged_right = false;
762
0
      if (debug) {
763
0
        tprintf("Not a ragged right\n");
764
0
      }
765
0
    }
766
0
    if (maybe_left_tab_down == -INT32_MAX && maybe_left_tab_up == -INT32_MAX &&
767
0
        maybe_right_tab_down == -INT32_MAX && maybe_right_tab_up == -INT32_MAX) {
768
0
      break;
769
0
    }
770
0
  }
771
0
  if (is_left_tab || maybe_left_tab_up > 1 || maybe_left_tab_down > 1) {
772
0
    bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
773
0
  } else if (maybe_ragged_left && ConfirmRaggedLeft(bbox, min_ragged_gutter)) {
774
0
    bbox->set_left_tab_type(TT_MAYBE_RAGGED);
775
0
  } else {
776
0
    bbox->set_left_tab_type(TT_NONE);
777
0
  }
778
0
  if (is_right_tab || maybe_right_tab_up > 1 || maybe_right_tab_down > 1) {
779
0
    bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
780
0
  } else if (maybe_ragged_right && ConfirmRaggedRight(bbox, min_ragged_gutter)) {
781
0
    bbox->set_right_tab_type(TT_MAYBE_RAGGED);
782
0
  } else {
783
0
    bbox->set_right_tab_type(TT_NONE);
784
0
  }
785
0
  if (debug) {
786
0
    tprintf("Left result = %s, Right result=%s\n",
787
0
            bbox->left_tab_type() == TT_MAYBE_ALIGNED
788
0
                ? "Aligned"
789
0
                : (bbox->left_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"),
790
0
            bbox->right_tab_type() == TT_MAYBE_ALIGNED
791
0
                ? "Aligned"
792
0
                : (bbox->right_tab_type() == TT_MAYBE_RAGGED ? "Ragged" : "None"));
793
0
  }
794
0
  return bbox->left_tab_type() != TT_NONE || bbox->right_tab_type() != TT_NONE;
795
0
}
796
797
// Returns true if there is nothing in the rectangle of width min_gutter to
798
// the left of bbox.
799
0
bool TabFind::ConfirmRaggedLeft(BLOBNBOX *bbox, int min_gutter) {
800
0
  TBOX search_box(bbox->bounding_box());
801
0
  search_box.set_right(search_box.left());
802
0
  search_box.set_left(search_box.left() - min_gutter);
803
0
  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
804
0
}
805
806
// Returns true if there is nothing in the rectangle of width min_gutter to
807
// the right of bbox.
808
0
bool TabFind::ConfirmRaggedRight(BLOBNBOX *bbox, int min_gutter) {
809
0
  TBOX search_box(bbox->bounding_box());
810
0
  search_box.set_left(search_box.right());
811
0
  search_box.set_right(search_box.right() + min_gutter);
812
0
  return NothingYOverlapsInBox(search_box, bbox->bounding_box());
813
0
}
814
815
// Returns true if there is nothing in the given search_box that vertically
816
// overlaps target_box other than target_box itself.
817
0
bool TabFind::NothingYOverlapsInBox(const TBOX &search_box, const TBOX &target_box) {
818
0
  BlobGridSearch rsearch(this);
819
0
  rsearch.StartRectSearch(search_box);
820
0
  BLOBNBOX *blob;
821
0
  while ((blob = rsearch.NextRectSearch()) != nullptr) {
822
0
    const TBOX &box = blob->bounding_box();
823
0
    if (box.y_overlap(target_box) && !(box == target_box)) {
824
0
      return false;
825
0
    }
826
0
  }
827
0
  return true;
828
0
}
829
830
0
void TabFind::FindAllTabVectors(int min_gutter_width) {
831
  // A list of vectors that will be created in estimating the skew.
832
0
  TabVector_LIST dummy_vectors;
833
  // An estimate of the vertical direction, revised as more lines are added.
834
0
  int vertical_x = 0;
835
0
  int vertical_y = 1;
836
  // Find an estimate of the vertical direction by finding some tab vectors.
837
  // Slowly up the search size until we get some vectors.
838
0
  for (int search_size = kMinVerticalSearch; search_size < kMaxVerticalSearch;
839
0
       search_size += kMinVerticalSearch) {
840
0
    int vector_count = FindTabVectors(search_size, TA_LEFT_ALIGNED, min_gutter_width,
841
0
                                      &dummy_vectors, &vertical_x, &vertical_y);
842
0
    vector_count += FindTabVectors(search_size, TA_RIGHT_ALIGNED, min_gutter_width, &dummy_vectors,
843
0
                                   &vertical_x, &vertical_y);
844
0
    if (vector_count > 0) {
845
0
      break;
846
0
    }
847
0
  }
848
  // Get rid of the test vectors and reset the types of the tabs.
849
0
  dummy_vectors.clear();
850
0
  for (auto bbox : left_tab_boxes_) {
851
0
    if (bbox->left_tab_type() == TT_CONFIRMED) {
852
0
      bbox->set_left_tab_type(TT_MAYBE_ALIGNED);
853
0
    }
854
0
  }
855
0
  for (auto bbox : right_tab_boxes_) {
856
0
    if (bbox->right_tab_type() == TT_CONFIRMED) {
857
0
      bbox->set_right_tab_type(TT_MAYBE_ALIGNED);
858
0
    }
859
0
  }
860
0
  if (textord_debug_tabfind) {
861
0
    tprintf("Beginning real tab search with vertical = %d,%d...\n", vertical_x, vertical_y);
862
0
  }
863
  // Now do the real thing ,but keep the vectors in the dummy_vectors list
864
  // until they are all done, so we don't get the tab vectors confused with
865
  // the rule line vectors.
866
0
  FindTabVectors(kMaxVerticalSearch, TA_LEFT_ALIGNED, min_gutter_width, &dummy_vectors, &vertical_x,
867
0
                 &vertical_y);
868
0
  FindTabVectors(kMaxVerticalSearch, TA_RIGHT_ALIGNED, min_gutter_width, &dummy_vectors,
869
0
                 &vertical_x, &vertical_y);
870
0
  FindTabVectors(kMaxRaggedSearch, TA_LEFT_RAGGED, min_gutter_width, &dummy_vectors, &vertical_x,
871
0
                 &vertical_y);
872
0
  FindTabVectors(kMaxRaggedSearch, TA_RIGHT_RAGGED, min_gutter_width, &dummy_vectors, &vertical_x,
873
0
                 &vertical_y);
874
  // Now add the vectors to the vectors_ list.
875
0
  TabVector_IT v_it(&vectors_);
876
0
  v_it.add_list_after(&dummy_vectors);
877
  // Now use the summed (mean) vertical vector as the direction for everything.
878
0
  SetVerticalSkewAndParallelize(vertical_x, vertical_y);
879
0
}
880
881
// Helper for FindAllTabVectors finds the vectors of a particular type.
882
int TabFind::FindTabVectors(int search_size_multiple, TabAlignment alignment, int min_gutter_width,
883
0
                            TabVector_LIST *vectors, int *vertical_x, int *vertical_y) {
884
0
  TabVector_IT vector_it(vectors);
885
0
  int vector_count = 0;
886
  // Search the right or left tab boxes, looking for tab vectors.
887
0
  bool right = alignment == TA_RIGHT_ALIGNED || alignment == TA_RIGHT_RAGGED;
888
0
  const std::vector<BLOBNBOX *> &boxes = right ? right_tab_boxes_ : left_tab_boxes_;
889
0
  for (auto bbox : boxes) {
890
0
    if ((!right && bbox->left_tab_type() == TT_MAYBE_ALIGNED) ||
891
0
        (right && bbox->right_tab_type() == TT_MAYBE_ALIGNED)) {
892
0
      TabVector *vector = FindTabVector(search_size_multiple, min_gutter_width, alignment, bbox,
893
0
                                        vertical_x, vertical_y);
894
0
      if (vector != nullptr) {
895
0
        ++vector_count;
896
0
        vector_it.add_to_end(vector);
897
0
      }
898
0
    }
899
0
  }
900
0
  return vector_count;
901
0
}
902
903
// Finds a vector corresponding to a tabstop running through the
904
// given box of the given alignment type.
905
// search_size_multiple is a multiple of height used to control
906
// the size of the search.
907
// vertical_x and y are updated with an estimate of the real
908
// vertical direction. (skew finding.)
909
// Returns nullptr if no decent tabstop can be found.
910
TabVector *TabFind::FindTabVector(int search_size_multiple, int min_gutter_width,
911
                                  TabAlignment alignment, BLOBNBOX *bbox, int *vertical_x,
912
0
                                  int *vertical_y) {
913
0
  int height = std::max(static_cast<int>(bbox->bounding_box().height()), gridsize());
914
0
  AlignedBlobParams align_params(*vertical_x, *vertical_y, height, search_size_multiple,
915
0
                                 min_gutter_width, resolution_, alignment);
916
  // FindVerticalAlignment is in the parent (AlignedBlob) class.
917
0
  return FindVerticalAlignment(align_params, bbox, vertical_x, vertical_y);
918
0
}
919
920
// Set the vertical_skew_ member from the given vector and refit
921
// all vectors parallel to the skew vector.
922
0
void TabFind::SetVerticalSkewAndParallelize(int vertical_x, int vertical_y) {
923
  // Fit the vertical vector into an ICOORD, which is 16 bit.
924
0
  vertical_skew_.set_with_shrink(vertical_x, vertical_y);
925
0
  if (textord_debug_tabfind) {
926
0
    tprintf("Vertical skew vector=(%d,%d)\n", vertical_skew_.x(), vertical_skew_.y());
927
0
  }
928
0
  v_it_.set_to_list(&vectors_);
929
0
  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
930
0
    TabVector *v = v_it_.data();
931
0
    v->Fit(vertical_skew_, true);
932
0
  }
933
  // Now sort the vectors as their direction has potentially changed.
934
0
  SortVectors();
935
0
}
936
937
// Sort all the current vectors using the given vertical direction vector.
938
0
void TabFind::SortVectors() {
939
0
  vectors_.sort(TabVector::SortVectorsByKey);
940
0
  v_it_.set_to_list(&vectors_);
941
0
}
942
943
// Evaluate all the current tab vectors.
944
0
void TabFind::EvaluateTabs() {
945
0
  TabVector_IT rule_it(&vectors_);
946
0
  for (rule_it.mark_cycle_pt(); !rule_it.cycled_list(); rule_it.forward()) {
947
0
    TabVector *tab = rule_it.data();
948
0
    if (!tab->IsSeparator()) {
949
0
      tab->Evaluate(vertical_skew_, this);
950
0
      if (tab->BoxCount() < kMinEvaluatedTabs) {
951
0
        if (textord_debug_tabfind > 2) {
952
0
          tab->Print("Too few boxes");
953
0
        }
954
0
        delete rule_it.extract();
955
0
        v_it_.set_to_list(&vectors_);
956
0
      } else if (WithinTestRegion(3, tab->startpt().x(), tab->startpt().y())) {
957
0
        tab->Print("Evaluated tab");
958
0
      }
959
0
    }
960
0
  }
961
0
}
962
963
// Trace textlines from one side to the other of each tab vector, saving
964
// the most frequent column widths found in a list so that a given width
965
// can be tested for being a common width with a simple callback function.
966
0
void TabFind::ComputeColumnWidths(ScrollView *tab_win, ColPartitionGrid *part_grid) {
967
#ifndef GRAPHICS_DISABLED
968
  if (tab_win != nullptr) {
969
    tab_win->Pen(ScrollView::WHITE);
970
  }
971
#endif // !GRAPHICS_DISABLED
972
  // Accumulate column sections into a STATS
973
0
  int col_widths_size = (tright_.x() - bleft_.x()) / kColumnWidthFactor;
974
0
  STATS col_widths(0, col_widths_size);
975
0
  ApplyPartitionsToColumnWidths(part_grid, &col_widths);
976
#ifndef GRAPHICS_DISABLED
977
  if (tab_win != nullptr) {
978
    tab_win->Update();
979
  }
980
#endif // !GRAPHICS_DISABLED
981
0
  if (textord_debug_tabfind > 1) {
982
0
    col_widths.print();
983
0
  }
984
  // Now make a list of column widths.
985
0
  MakeColumnWidths(col_widths_size, &col_widths);
986
  // Turn the column width into a range.
987
0
  ApplyPartitionsToColumnWidths(part_grid, nullptr);
988
0
}
989
990
// Finds column width and:
991
//   if col_widths is not null (pass1):
992
//     pair-up tab vectors with existing ColPartitions and accumulate widths.
993
//   else (pass2):
994
//     find the largest real partition width for each recorded column width,
995
//     to be used as the minimum acceptable width.
996
0
void TabFind::ApplyPartitionsToColumnWidths(ColPartitionGrid *part_grid, STATS *col_widths) {
997
  // For every ColPartition in the part_grid, add partners to the tabvectors
998
  // and accumulate the column widths.
999
0
  ColPartitionGridSearch gsearch(part_grid);
1000
0
  gsearch.StartFullSearch();
1001
0
  ColPartition *part;
1002
0
  while ((part = gsearch.NextFullSearch()) != nullptr) {
1003
0
    BLOBNBOX_C_IT blob_it(part->boxes());
1004
0
    if (blob_it.empty()) {
1005
0
      continue;
1006
0
    }
1007
0
    BLOBNBOX *left_blob = blob_it.data();
1008
0
    blob_it.move_to_last();
1009
0
    BLOBNBOX *right_blob = blob_it.data();
1010
0
    TabVector *left_vector = LeftTabForBox(left_blob->bounding_box(), true, false);
1011
0
    if (left_vector == nullptr || left_vector->IsRightTab()) {
1012
0
      continue;
1013
0
    }
1014
0
    TabVector *right_vector = RightTabForBox(right_blob->bounding_box(), true, false);
1015
0
    if (right_vector == nullptr || right_vector->IsLeftTab()) {
1016
0
      continue;
1017
0
    }
1018
1019
0
    int line_left = left_vector->XAtY(left_blob->bounding_box().bottom());
1020
0
    int line_right = right_vector->XAtY(right_blob->bounding_box().bottom());
1021
    // Add to STATS of measurements if the width is significant.
1022
0
    int width = line_right - line_left;
1023
0
    if (col_widths != nullptr) {
1024
0
      AddPartnerVector(left_blob, right_blob, left_vector, right_vector);
1025
0
      if (width >= kMinColumnWidth) {
1026
0
        col_widths->add(width / kColumnWidthFactor, 1);
1027
0
      }
1028
0
    } else {
1029
0
      width /= kColumnWidthFactor;
1030
0
      ICOORDELT_IT it(&column_widths_);
1031
0
      for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1032
0
        ICOORDELT *w = it.data();
1033
0
        if (NearlyEqual<int>(width, w->y(), 1)) {
1034
0
          int true_width = part->bounding_box().width() / kColumnWidthFactor;
1035
0
          if (true_width <= w->y() && true_width > w->x()) {
1036
0
            w->set_x(true_width);
1037
0
          }
1038
0
          break;
1039
0
        }
1040
0
      }
1041
0
    }
1042
0
  }
1043
0
}
1044
1045
// Helper makes the list of common column widths in column_widths_ from the
1046
// input col_widths. Destroys the content of col_widths by repeatedly
1047
// finding the mode and erasing the peak.
1048
0
void TabFind::MakeColumnWidths(int col_widths_size, STATS *col_widths) {
1049
0
  ICOORDELT_IT w_it(&column_widths_);
1050
0
  int total_col_count = col_widths->get_total();
1051
0
  while (col_widths->get_total() > 0) {
1052
0
    int width = col_widths->mode();
1053
0
    int col_count = col_widths->pile_count(width);
1054
0
    col_widths->add(width, -col_count);
1055
    // Get the entire peak.
1056
0
    for (int left = width - 1; left > 0 && col_widths->pile_count(left) > 0; --left) {
1057
0
      int new_count = col_widths->pile_count(left);
1058
0
      col_count += new_count;
1059
0
      col_widths->add(left, -new_count);
1060
0
    }
1061
0
    for (int right = width + 1; right < col_widths_size && col_widths->pile_count(right) > 0;
1062
0
         ++right) {
1063
0
      int new_count = col_widths->pile_count(right);
1064
0
      col_count += new_count;
1065
0
      col_widths->add(right, -new_count);
1066
0
    }
1067
0
    if (col_count > kMinLinesInColumn &&
1068
0
        col_count > kMinFractionalLinesInColumn * total_col_count) {
1069
0
      auto *w = new ICOORDELT(0, width);
1070
0
      w_it.add_after_then_move(w);
1071
0
      if (textord_debug_tabfind) {
1072
0
        tprintf("Column of width %d has %d = %.2f%% lines\n", width * kColumnWidthFactor, col_count,
1073
0
                100.0 * col_count / total_col_count);
1074
0
      }
1075
0
    }
1076
0
  }
1077
0
}
1078
1079
// Mark blobs as being in a vertical text line where that is the case.
1080
// Returns true if the majority of the image is vertical text lines.
1081
0
void TabFind::MarkVerticalText() {
1082
0
  if (textord_debug_tabfind) {
1083
0
    tprintf("Checking for vertical lines\n");
1084
0
  }
1085
0
  BlobGridSearch gsearch(this);
1086
0
  gsearch.StartFullSearch();
1087
0
  BLOBNBOX *blob = nullptr;
1088
0
  while ((blob = gsearch.NextFullSearch()) != nullptr) {
1089
0
    if (blob->region_type() < BRT_UNKNOWN) {
1090
0
      continue;
1091
0
    }
1092
0
    if (blob->UniquelyVertical()) {
1093
0
      blob->set_region_type(BRT_VERT_TEXT);
1094
0
    }
1095
0
  }
1096
0
}
1097
1098
0
int TabFind::FindMedianGutterWidth(TabVector_LIST *lines) {
1099
0
  TabVector_IT it(lines);
1100
0
  int prev_right = -1;
1101
0
  int max_gap = static_cast<int>(kMaxGutterWidthAbsolute * resolution_);
1102
0
  STATS gaps(0, max_gap - 1);
1103
0
  STATS heights(0, max_gap - 1);
1104
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1105
0
    TabVector *v = it.data();
1106
0
    TabVector *partner = v->GetSinglePartner();
1107
0
    if (!v->IsLeftTab() || v->IsSeparator() || !partner) {
1108
0
      continue;
1109
0
    }
1110
0
    heights.add(partner->startpt().x() - v->startpt().x(), 1);
1111
0
    if (prev_right > 0 && v->startpt().x() > prev_right) {
1112
0
      gaps.add(v->startpt().x() - prev_right, 1);
1113
0
    }
1114
0
    prev_right = partner->startpt().x();
1115
0
  }
1116
0
  if (textord_debug_tabfind) {
1117
0
    tprintf("TabGutter total %d  median_gap %.2f  median_hgt %.2f\n", gaps.get_total(),
1118
0
            gaps.median(), heights.median());
1119
0
  }
1120
0
  if (gaps.get_total() < kMinLinesInColumn) {
1121
0
    return 0;
1122
0
  }
1123
0
  return static_cast<int>(gaps.median());
1124
0
}
1125
1126
// Find the next adjacent (looking to the left or right) blob on this text
1127
// line, with the constraint that it must vertically significantly overlap
1128
// the [top_y, bottom_y] range.
1129
// If ignore_images is true, then blobs with aligned_text() < 0 are treated
1130
// as if they do not exist.
1131
BLOBNBOX *TabFind::AdjacentBlob(const BLOBNBOX *bbox, bool look_left, bool ignore_images,
1132
                                double min_overlap_fraction, int gap_limit, int top_y,
1133
0
                                int bottom_y) {
1134
0
  GridSearch<BLOBNBOX, BLOBNBOX_CLIST, BLOBNBOX_C_IT> sidesearch(this);
1135
0
  const TBOX &box = bbox->bounding_box();
1136
0
  int left = box.left();
1137
0
  int right = box.right();
1138
0
  int mid_x = (left + right) / 2;
1139
0
  sidesearch.StartSideSearch(mid_x, bottom_y, top_y);
1140
0
  int best_gap = 0;
1141
0
  bool debug = WithinTestRegion(3, left, bottom_y);
1142
0
  BLOBNBOX *result = nullptr;
1143
0
  BLOBNBOX *neighbour = nullptr;
1144
0
  while ((neighbour = sidesearch.NextSideSearch(look_left)) != nullptr) {
1145
0
    if (debug) {
1146
0
      tprintf("Adjacent blob: considering box:");
1147
0
      neighbour->bounding_box().print();
1148
0
    }
1149
0
    if (neighbour == bbox || (ignore_images && neighbour->region_type() < BRT_UNKNOWN)) {
1150
0
      continue;
1151
0
    }
1152
0
    const TBOX &nbox = neighbour->bounding_box();
1153
0
    int n_top_y = nbox.top();
1154
0
    int n_bottom_y = nbox.bottom();
1155
0
    int v_overlap = std::min(n_top_y, top_y) - std::max(n_bottom_y, bottom_y);
1156
0
    int height = top_y - bottom_y;
1157
0
    int n_height = n_top_y - n_bottom_y;
1158
0
    if (v_overlap > min_overlap_fraction * std::min(height, n_height) &&
1159
0
        (min_overlap_fraction == 0.0 || !DifferentSizes(height, n_height))) {
1160
0
      int n_left = nbox.left();
1161
0
      int n_right = nbox.right();
1162
0
      int h_gap = std::max(n_left, left) - std::min(n_right, right);
1163
0
      int n_mid_x = (n_left + n_right) / 2;
1164
0
      if (look_left == (n_mid_x < mid_x) && n_mid_x != mid_x) {
1165
0
        if (h_gap > gap_limit) {
1166
          // Hit a big gap before next tab so don't return anything.
1167
0
          if (debug) {
1168
0
            tprintf("Giving up due to big gap = %d vs %d\n", h_gap, gap_limit);
1169
0
          }
1170
0
          return result;
1171
0
        }
1172
0
        if (h_gap > 0 && (look_left ? neighbour->right_tab_type() : neighbour->left_tab_type()) >=
1173
0
                             TT_CONFIRMED) {
1174
          // Hit a tab facing the wrong way. Stop in case we are crossing
1175
          // the column boundary.
1176
0
          if (debug) {
1177
0
            tprintf("Collision with like tab of type %d at %d,%d\n",
1178
0
                    look_left ? neighbour->right_tab_type() : neighbour->left_tab_type(), n_left,
1179
0
                    nbox.bottom());
1180
0
          }
1181
0
          return result;
1182
0
        }
1183
        // This is a good fit to the line. Continue with this
1184
        // neighbour as the bbox if the best gap.
1185
0
        if (result == nullptr || h_gap < best_gap) {
1186
0
          if (debug) {
1187
0
            tprintf("Good result\n");
1188
0
          }
1189
0
          result = neighbour;
1190
0
          best_gap = h_gap;
1191
0
        } else {
1192
          // The new one is worse, so we probably already have the best result.
1193
0
          return result;
1194
0
        }
1195
0
      } else if (debug) {
1196
0
        tprintf("Wrong way\n");
1197
0
      }
1198
0
    } else if (debug) {
1199
0
      tprintf("Insufficient overlap\n");
1200
0
    }
1201
0
  }
1202
0
  if (WithinTestRegion(3, left, box.top())) {
1203
0
    tprintf("Giving up due to end of search\n");
1204
0
  }
1205
0
  return result; // Hit the edge and found nothing.
1206
0
}
1207
1208
// Add a bi-directional partner relationship between the left
1209
// and the right. If one (or both) of the vectors is a separator,
1210
// extend a nearby extendable vector or create a new one of the
1211
// correct type, using the given left or right blob as a guide.
1212
void TabFind::AddPartnerVector(BLOBNBOX *left_blob, BLOBNBOX *right_blob, TabVector *left,
1213
0
                               TabVector *right) {
1214
0
  const TBOX &left_box = left_blob->bounding_box();
1215
0
  const TBOX &right_box = right_blob->bounding_box();
1216
0
  if (left->IsSeparator()) {
1217
    // Try to find a nearby left edge to extend.
1218
0
    TabVector *v = LeftTabForBox(left_box, true, true);
1219
0
    if (v != nullptr && v != left && v->IsLeftTab() &&
1220
0
        v->XAtY(left_box.top()) > left->XAtY(left_box.top())) {
1221
0
      left = v; // Found a good replacement.
1222
0
      left->ExtendToBox(left_blob);
1223
0
    } else {
1224
      // Fake a vector.
1225
0
      left = new TabVector(*left, TA_LEFT_RAGGED, vertical_skew_, left_blob);
1226
0
      vectors_.add_sorted(TabVector::SortVectorsByKey, left);
1227
0
      v_it_.move_to_first();
1228
0
    }
1229
0
  }
1230
0
  if (right->IsSeparator()) {
1231
    // Try to find a nearby left edge to extend.
1232
0
    if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1233
0
      tprintf("Box edge (%d,%d-%d)", right_box.right(), right_box.bottom(), right_box.top());
1234
0
      right->Print(" looking for improvement for");
1235
0
    }
1236
0
    TabVector *v = RightTabForBox(right_box, true, true);
1237
0
    if (v != nullptr && v != right && v->IsRightTab() &&
1238
0
        v->XAtY(right_box.top()) < right->XAtY(right_box.top())) {
1239
0
      right = v; // Found a good replacement.
1240
0
      right->ExtendToBox(right_blob);
1241
0
      if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1242
0
        right->Print("Extended vector");
1243
0
      }
1244
0
    } else {
1245
      // Fake a vector.
1246
0
      right = new TabVector(*right, TA_RIGHT_RAGGED, vertical_skew_, right_blob);
1247
0
      vectors_.add_sorted(TabVector::SortVectorsByKey, right);
1248
0
      v_it_.move_to_first();
1249
0
      if (WithinTestRegion(3, right_box.right(), right_box.bottom())) {
1250
0
        right->Print("Created new vector");
1251
0
      }
1252
0
    }
1253
0
  }
1254
0
  left->AddPartner(right);
1255
0
  right->AddPartner(left);
1256
0
}
1257
1258
// Remove separators and unused tabs from the main vectors_ list
1259
// to the dead_vectors_ list.
1260
0
void TabFind::CleanupTabs() {
1261
  // TODO(rays) Before getting rid of separators and unused vectors, it
1262
  // would be useful to try moving ragged vectors outwards to see if this
1263
  // allows useful extension. Could be combined with checking ends of partners.
1264
0
  TabVector_IT it(&vectors_);
1265
0
  TabVector_IT dead_it(&dead_vectors_);
1266
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1267
0
    TabVector *v = it.data();
1268
0
    if (v->IsSeparator() || v->Partnerless()) {
1269
0
      dead_it.add_after_then_move(it.extract());
1270
0
      v_it_.set_to_list(&vectors_);
1271
0
    } else {
1272
0
      v->FitAndEvaluateIfNeeded(vertical_skew_, this);
1273
0
    }
1274
0
  }
1275
0
}
1276
1277
// Apply the given rotation to the given list of blobs.
1278
0
void TabFind::RotateBlobList(const FCOORD &rotation, BLOBNBOX_LIST *blobs) {
1279
0
  BLOBNBOX_IT it(blobs);
1280
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1281
0
    it.data()->rotate_box(rotation);
1282
0
  }
1283
0
}
1284
1285
// Recreate the grid with deskewed BLOBNBOXes.
1286
// Returns false if the detected skew angle is impossible.
1287
bool TabFind::Deskew(TabVector_LIST *hlines, BLOBNBOX_LIST *image_blobs, TO_BLOCK *block,
1288
0
                     FCOORD *deskew, FCOORD *reskew) {
1289
0
  ComputeDeskewVectors(deskew, reskew);
1290
0
  if (deskew->x() < kCosMaxSkewAngle) {
1291
0
    return false;
1292
0
  }
1293
0
  RotateBlobList(*deskew, image_blobs);
1294
0
  RotateBlobList(*deskew, &block->blobs);
1295
0
  RotateBlobList(*deskew, &block->small_blobs);
1296
0
  RotateBlobList(*deskew, &block->noise_blobs);
1297
1298
  // Rotate the horizontal vectors. The vertical vectors don't need
1299
  // rotating as they can just be refitted.
1300
0
  TabVector_IT h_it(hlines);
1301
0
  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1302
0
    TabVector *h = h_it.data();
1303
0
    h->Rotate(*deskew);
1304
0
  }
1305
0
  TabVector_IT d_it(&dead_vectors_);
1306
0
  for (d_it.mark_cycle_pt(); !d_it.cycled_list(); d_it.forward()) {
1307
0
    TabVector *d = d_it.data();
1308
0
    d->Rotate(*deskew);
1309
0
  }
1310
0
  SetVerticalSkewAndParallelize(0, 1);
1311
  // Rebuild the grid to the new size.
1312
0
  TBOX grid_box(bleft_, tright_);
1313
0
  grid_box.rotate_large(*deskew);
1314
0
  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1315
0
  InsertBlobsToGrid(false, false, image_blobs, this);
1316
0
  InsertBlobsToGrid(true, false, &block->blobs, this);
1317
0
  return true;
1318
0
}
1319
1320
// Flip the vertical and horizontal lines and rotate the grid ready
1321
// for working on the rotated image.
1322
// This also makes parameter adjustments for FindInitialTabVectors().
1323
void TabFind::ResetForVerticalText(const FCOORD &rotate, const FCOORD &rerotate,
1324
0
                                   TabVector_LIST *horizontal_lines, int *min_gutter_width) {
1325
  // Rotate the horizontal and vertical vectors and swap them over.
1326
  // Only the separators are kept and rotated; other tabs are used
1327
  // to estimate the gutter width then thrown away.
1328
0
  TabVector_LIST ex_verticals;
1329
0
  TabVector_IT ex_v_it(&ex_verticals);
1330
0
  TabVector_LIST vlines;
1331
0
  TabVector_IT v_it(&vlines);
1332
0
  while (!v_it_.empty()) {
1333
0
    TabVector *v = v_it_.extract();
1334
0
    if (v->IsSeparator()) {
1335
0
      v->Rotate(rotate);
1336
0
      ex_v_it.add_after_then_move(v);
1337
0
    } else {
1338
0
      v_it.add_after_then_move(v);
1339
0
    }
1340
0
    v_it_.forward();
1341
0
  }
1342
1343
  // Adjust the min gutter width for better tabbox selection
1344
  // in 2nd call to FindInitialTabVectors().
1345
0
  int median_gutter = FindMedianGutterWidth(&vlines);
1346
0
  if (median_gutter > *min_gutter_width) {
1347
0
    *min_gutter_width = median_gutter;
1348
0
  }
1349
1350
0
  TabVector_IT h_it(horizontal_lines);
1351
0
  for (h_it.mark_cycle_pt(); !h_it.cycled_list(); h_it.forward()) {
1352
0
    TabVector *h = h_it.data();
1353
0
    h->Rotate(rotate);
1354
0
  }
1355
0
  v_it_.add_list_after(horizontal_lines);
1356
0
  v_it_.move_to_first();
1357
0
  h_it.set_to_list(horizontal_lines);
1358
0
  h_it.add_list_after(&ex_verticals);
1359
1360
  // Rebuild the grid to the new size.
1361
0
  TBOX grid_box(bleft(), tright());
1362
0
  grid_box.rotate_large(rotate);
1363
0
  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1364
0
}
1365
1366
// Clear the grid and get rid of the tab vectors, but not separators,
1367
// ready to start again.
1368
0
void TabFind::Reset() {
1369
0
  v_it_.move_to_first();
1370
0
  for (v_it_.mark_cycle_pt(); !v_it_.cycled_list(); v_it_.forward()) {
1371
0
    if (!v_it_.data()->IsSeparator()) {
1372
0
      delete v_it_.extract();
1373
0
    }
1374
0
  }
1375
0
  Clear();
1376
0
}
1377
1378
// Reflect the separator tab vectors and the grids in the y-axis.
1379
// Can only be called after Reset!
1380
0
void TabFind::ReflectInYAxis() {
1381
0
  TabVector_LIST temp_list;
1382
0
  TabVector_IT temp_it(&temp_list);
1383
0
  v_it_.move_to_first();
1384
  // The TabVector list only contains vertical lines, but they need to be
1385
  // reflected and the list needs to be reversed, so they are still in
1386
  // sort_key order.
1387
0
  while (!v_it_.empty()) {
1388
0
    TabVector *v = v_it_.extract();
1389
0
    v_it_.forward();
1390
0
    v->ReflectInYAxis();
1391
0
    temp_it.add_before_then_move(v);
1392
0
  }
1393
0
  v_it_.add_list_after(&temp_list);
1394
0
  v_it_.move_to_first();
1395
  // Reset this grid with reflected bounding boxes.
1396
0
  TBOX grid_box(bleft(), tright());
1397
0
  int tmp = grid_box.left();
1398
0
  grid_box.set_left(-grid_box.right());
1399
0
  grid_box.set_right(-tmp);
1400
0
  Init(gridsize(), grid_box.botleft(), grid_box.topright());
1401
0
}
1402
1403
// Compute the rotation required to deskew, and its inverse rotation.
1404
0
void TabFind::ComputeDeskewVectors(FCOORD *deskew, FCOORD *reskew) {
1405
0
  double length = vertical_skew_ % vertical_skew_;
1406
0
  length = sqrt(length);
1407
0
  deskew->set_x(static_cast<float>(vertical_skew_.y() / length));
1408
0
  deskew->set_y(static_cast<float>(vertical_skew_.x() / length));
1409
0
  reskew->set_x(deskew->x());
1410
0
  reskew->set_y(-deskew->y());
1411
0
}
1412
1413
// Compute and apply constraints to the end positions of TabVectors so
1414
// that where possible partners end at the same y coordinate.
1415
0
void TabFind::ApplyTabConstraints() {
1416
0
  TabVector_IT it(&vectors_);
1417
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1418
0
    TabVector *v = it.data();
1419
0
    v->SetupConstraints();
1420
0
  }
1421
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1422
0
    TabVector *v = it.data();
1423
    // With the first and last partner, we want a common bottom and top,
1424
    // respectively, and for each change of partner, we want a common
1425
    // top of first with bottom of next.
1426
0
    v->SetupPartnerConstraints();
1427
0
  }
1428
  // TODO(rays) The back-to-back pairs should really be done like the
1429
  // front-to-front pairs, but there is no convenient way of producing the
1430
  // list of partners like there is with the front-to-front.
1431
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1432
0
    TabVector *v = it.data();
1433
0
    if (!v->IsRightTab()) {
1434
0
      continue;
1435
0
    }
1436
    // For each back-to-back pair of vectors, try for common top and bottom.
1437
0
    TabVector_IT partner_it(it);
1438
0
    for (partner_it.forward(); !partner_it.at_first(); partner_it.forward()) {
1439
0
      TabVector *partner = partner_it.data();
1440
0
      if (!partner->IsLeftTab() || !v->VOverlap(*partner)) {
1441
0
        continue;
1442
0
      }
1443
0
      v->SetupPartnerConstraints(partner);
1444
0
    }
1445
0
  }
1446
  // Now actually apply the constraints to get common start/end points.
1447
0
  for (it.mark_cycle_pt(); !it.cycled_list(); it.forward()) {
1448
0
    TabVector *v = it.data();
1449
0
    if (!v->IsSeparator()) {
1450
0
      v->ApplyConstraints();
1451
0
    }
1452
0
  }
1453
  // TODO(rays) Where constraint application fails, it would be good to try
1454
  // checking the ends to see if they really should be moved.
1455
0
}
1456
1457
} // namespace tesseract.