Coverage Report

Created: 2025-07-23 07:12

/src/tesseract/src/textord/tabvector.h
Line
Count
Source (jump to first uncovered line)
1
///////////////////////////////////////////////////////////////////////
2
// File:        tabvector.h
3
// Description: Class to hold a near-vertical vector representing a tab-stop.
4
// Author:      Ray Smith
5
//
6
// (C) Copyright 2008, Google Inc.
7
// Licensed under the Apache License, Version 2.0 (the "License");
8
// you may not use this file except in compliance with the License.
9
// You may obtain a copy of the License at
10
// http://www.apache.org/licenses/LICENSE-2.0
11
// Unless required by applicable law or agreed to in writing, software
12
// distributed under the License is distributed on an "AS IS" BASIS,
13
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
// See the License for the specific language governing permissions and
15
// limitations under the License.
16
//
17
///////////////////////////////////////////////////////////////////////
18
19
#ifndef TESSERACT_TEXTORD_TABVECTOR_H_
20
#define TESSERACT_TEXTORD_TABVECTOR_H_
21
22
#include "bbgrid.h"
23
#include "blobgrid.h"
24
#include "clst.h"
25
#include "elst.h"
26
#include "elst2.h"
27
#include "rect.h"
28
29
#include <algorithm>
30
31
class BLOBNBOX;
32
class ScrollView;
33
34
namespace tesseract {
35
36
extern double_VAR_H(textord_tabvector_vertical_gap_fraction);
37
extern double_VAR_H(textord_tabvector_vertical_box_ratio);
38
39
// The alignment type that a tab vector represents.
40
// Keep this enum synced with kAlignmentNames in tabvector.cpp.
41
enum TabAlignment {
42
  TA_LEFT_ALIGNED,
43
  TA_LEFT_RAGGED,
44
  TA_CENTER_JUSTIFIED,
45
  TA_RIGHT_ALIGNED,
46
  TA_RIGHT_RAGGED,
47
  TA_SEPARATOR,
48
  TA_COUNT
49
};
50
51
// Forward declarations. The classes use their own list types, so we
52
// need to make the list types first.
53
class TabFind;
54
class TabVector;
55
class TabConstraint;
56
57
ELIST2IZEH(TabVector)
58
CLISTIZEH(TabVector)
59
ELISTIZEH(TabConstraint)
60
61
// TabConstraint is a totally self-contained class to maintain
62
// a list of [min,max] constraints, each referring to a TabVector.
63
// The constraints are manipulated through static methods that act
64
// on a list of constraints. The list itself is cooperatively owned
65
// by the TabVectors of the constraints on the list and managed
66
// by implicit reference counting via the elements of the list.
67
class TabConstraint : public ELIST<TabConstraint>::LINK {
68
public:
69
  // This empty constructor is here only so that the class can be ELISTIZED.
70
  // TODO(rays) change deep_copy in elst.h line 955 to take a callback copier
71
  // and eliminate CLASSNAME##_copier.
72
  TabConstraint() = default;
73
74
  // Create a constraint for the top or bottom of this TabVector.
75
  static void CreateConstraint(TabVector *vector, bool is_top);
76
77
  // Test to see if the constraints are compatible enough to merge.
78
  static bool CompatibleConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
79
80
  // Merge the lists of constraints and update the TabVector pointers.
81
  // The second list is deleted.
82
  static void MergeConstraints(TabConstraint_LIST *list1, TabConstraint_LIST *list2);
83
84
  // Set all the tops and bottoms as appropriate to a mean of the
85
  // constrained range. Delete all the constraints and list.
86
  static void ApplyConstraints(TabConstraint_LIST *constraints);
87
88
private:
89
  TabConstraint(TabVector *vector, bool is_top);
90
91
  // Get the max of the mins and the min of the maxes.
92
  static void GetConstraints(TabConstraint_LIST *constraints, int *y_min, int *y_max);
93
94
  // The TabVector this constraint applies to.
95
  TabVector *vector_;
96
  // If true then we refer to the top of the vector_.
97
  bool is_top_;
98
  // The allowed range of this vector_.
99
  int y_min_;
100
  int y_max_;
101
};
102
103
// Class to hold information about a single vector
104
// that represents a tab stop or a rule line.
105
class TabVector : public ELIST2<TabVector>::LINK {
106
public:
107
  // TODO(rays) fix this in elst.h line 1076, where it should use the
108
  // copy constructor instead of operator=.
109
0
  TabVector() = default;
110
0
  ~TabVector() = default;
111
112
  // Public factory to build a TabVector from a list of boxes.
113
  // The TabVector will be of the given alignment type.
114
  // The input vertical vector is used in fitting, and the output
115
  // vertical_x, vertical_y have the resulting line vector added to them
116
  // if the alignment is not ragged.
117
  // The extended_start_y and extended_end_y are the maximum possible
118
  // extension to the line segment that can be used to align with others.
119
  // The input CLIST of BLOBNBOX good_points is consumed and taken over.
120
  static TabVector *FitVector(TabAlignment alignment, ICOORD vertical, int extended_start_y,
121
                              int extended_end_y, BLOBNBOX_CLIST *good_points, int *vertical_x,
122
                              int *vertical_y);
123
124
  // Build a ragged TabVector by copying another's direction, shifting it
125
  // to match the given blob, and making its initial extent the height
126
  // of the blob, but its extended bounds from the bounds of the original.
127
  TabVector(const TabVector &src, TabAlignment alignment, const ICOORD &vertical_skew,
128
            BLOBNBOX *blob);
129
130
  // Copies basic attributes of a tab vector for simple operations.
131
  // Copies things such startpt, endpt, range, width.
132
  // Does not copy things such as partners, boxes, or constraints.
133
  // This is useful if you only need vector information for processing, such
134
  // as in the table detection code.
135
  TabVector *ShallowCopy() const;
136
137
  // Simple accessors.
138
0
  const ICOORD &startpt() const {
139
0
    return startpt_;
140
0
  }
141
0
  const ICOORD &endpt() const {
142
0
    return endpt_;
143
0
  }
144
0
  int extended_ymax() const {
145
0
    return extended_ymax_;
146
0
  }
147
0
  int extended_ymin() const {
148
0
    return extended_ymin_;
149
0
  }
150
0
  int sort_key() const {
151
0
    return sort_key_;
152
0
  }
153
0
  int mean_width() const {
154
0
    return mean_width_;
155
0
  }
156
0
  void set_top_constraints(TabConstraint_LIST *constraints) {
157
0
    top_constraints_ = constraints;
158
0
  }
159
0
  void set_bottom_constraints(TabConstraint_LIST *constraints) {
160
0
    bottom_constraints_ = constraints;
161
0
  }
162
0
  TabVector_CLIST *partners() {
163
0
    return &partners_;
164
0
  }
165
0
  void set_startpt(const ICOORD &start) {
166
0
    startpt_ = start;
167
0
  }
168
0
  void set_endpt(const ICOORD &end) {
169
0
    endpt_ = end;
170
0
  }
171
0
  bool intersects_other_lines() const {
172
0
    return intersects_other_lines_;
173
0
  }
174
0
  void set_intersects_other_lines(bool value) {
175
0
    intersects_other_lines_ = value;
176
0
  }
177
178
  // Inline quasi-accessors that require some computation.
179
180
  // Compute the x coordinate at the given y coordinate.
181
0
  int XAtY(int y) const {
182
0
    int height = endpt_.y() - startpt_.y();
183
0
    if (height != 0) {
184
0
      return (y - startpt_.y()) * (endpt_.x() - startpt_.x()) / height + startpt_.x();
185
0
    } else {
186
0
      return startpt_.x();
187
0
    }
188
0
  }
189
190
  // Compute the vertical overlap with the other TabVector.
191
0
  int VOverlap(const TabVector &other) const {
192
0
    return std::min(other.endpt_.y(), endpt_.y()) - std::max(other.startpt_.y(), startpt_.y());
193
0
  }
194
  // Compute the vertical overlap with the given y bounds.
195
0
  int VOverlap(int top_y, int bottom_y) const {
196
0
    return std::min(top_y, static_cast<int>(endpt_.y())) -
197
0
           std::max(bottom_y, static_cast<int>(startpt_.y()));
198
0
  }
199
  // Compute the extended vertical overlap with the given y bounds.
200
0
  int ExtendedOverlap(int top_y, int bottom_y) const {
201
0
    return std::min(top_y, extended_ymax_) - std::max(bottom_y, extended_ymin_);
202
0
  }
203
204
  // Return true if this is a left tab stop, either aligned, or ragged.
205
0
  bool IsLeftTab() const {
206
0
    return alignment_ == TA_LEFT_ALIGNED || alignment_ == TA_LEFT_RAGGED;
207
0
  }
208
  // Return true if this is a right tab stop, either aligned, or ragged.
209
0
  bool IsRightTab() const {
210
0
    return alignment_ == TA_RIGHT_ALIGNED || alignment_ == TA_RIGHT_RAGGED;
211
0
  }
212
  // Return true if this is a separator.
213
0
  bool IsSeparator() const {
214
0
    return alignment_ == TA_SEPARATOR;
215
0
  }
216
  // Return true if this is a center aligned tab stop.
217
0
  bool IsCenterTab() const {
218
0
    return alignment_ == TA_CENTER_JUSTIFIED;
219
0
  }
220
  // Return true if this is a ragged tab top, either left or right.
221
0
  bool IsRagged() const {
222
0
    return alignment_ == TA_LEFT_RAGGED || alignment_ == TA_RIGHT_RAGGED;
223
0
  }
224
225
  // Return true if this vector is to the left of the other in terms
226
  // of sort_key_.
227
0
  bool IsLeftOf(const TabVector &other) const {
228
0
    return sort_key_ < other.sort_key_;
229
0
  }
230
231
  // Return true if the vector has no partners.
232
0
  bool Partnerless() {
233
0
    return partners_.empty();
234
0
  }
235
236
  // Return the number of tab boxes in this vector.
237
0
  int BoxCount() {
238
0
    return boxes_.length();
239
0
  }
240
241
  // Lock the vector from refits by clearing the boxes_ list.
242
0
  void Freeze() {
243
0
    boxes_.shallow_clear();
244
0
  }
245
246
  // Flip x and y on the ends so a vector can be created from flipped input.
247
0
  void XYFlip() {
248
0
    int x = startpt_.y();
249
0
    startpt_.set_y(startpt_.x());
250
0
    startpt_.set_x(x);
251
0
    x = endpt_.y();
252
0
    endpt_.set_y(endpt_.x());
253
0
    endpt_.set_x(x);
254
0
  }
255
256
  // Reflect the tab vector in the y-axis.
257
0
  void ReflectInYAxis() {
258
0
    startpt_.set_x(-startpt_.x());
259
0
    endpt_.set_x(-endpt_.x());
260
0
    sort_key_ = -sort_key_;
261
0
    if (alignment_ == TA_LEFT_ALIGNED) {
262
0
      alignment_ = TA_RIGHT_ALIGNED;
263
0
    } else if (alignment_ == TA_RIGHT_ALIGNED) {
264
0
      alignment_ = TA_LEFT_ALIGNED;
265
0
    }
266
0
    if (alignment_ == TA_LEFT_RAGGED) {
267
0
      alignment_ = TA_RIGHT_RAGGED;
268
0
    } else if (alignment_ == TA_RIGHT_RAGGED) {
269
0
      alignment_ = TA_LEFT_RAGGED;
270
0
    }
271
0
  }
272
273
  // Separate function to compute the sort key for a given coordinate pair.
274
0
  static int SortKey(const ICOORD &vertical, int x, int y) {
275
0
    ICOORD pt(x, y);
276
0
    return pt * vertical;
277
0
  }
278
279
  // Return the x at the given y for the given sort key.
280
0
  static int XAtY(const ICOORD &vertical, int sort_key, int y) {
281
0
    if (vertical.y() != 0) {
282
0
      return (vertical.x() * y + sort_key) / vertical.y();
283
0
    } else {
284
0
      return sort_key;
285
0
    }
286
0
  }
287
288
  // Sort function for E2LIST::sort to sort by sort_key_.
289
0
  static int SortVectorsByKey(const TabVector *tv1, const TabVector *tv2) {
290
0
    return tv1->sort_key_ - tv2->sort_key_;
291
0
  }
292
293
  // More complex members.
294
295
  // Extend this vector to include the supplied blob if it doesn't
296
  // already have it.
297
  void ExtendToBox(BLOBNBOX *blob);
298
299
  // Set the ycoord of the start and move the xcoord to match.
300
  void SetYStart(int start_y);
301
  // Set the ycoord of the end and move the xcoord to match.
302
  void SetYEnd(int end_y);
303
304
  // Rotate the ends by the given vector.
305
  void Rotate(const FCOORD &rotation);
306
307
  // Setup the initial constraints, being the limits of
308
  // the vector and the extended ends.
309
  void SetupConstraints();
310
311
  // Setup the constraints between the partners of this TabVector.
312
  void SetupPartnerConstraints();
313
314
  // Setup the constraints between this and its partner.
315
  void SetupPartnerConstraints(TabVector *partner);
316
317
  // Use the constraints to modify the top and bottom.
318
  void ApplyConstraints();
319
320
  // Merge close tab vectors of the same side that overlap.
321
  static void MergeSimilarTabVectors(const ICOORD &vertical, TabVector_LIST *vectors,
322
                                     BlobGrid *grid);
323
324
  // Return true if this vector is the same side, overlaps, and close
325
  // enough to the other to be merged.
326
  bool SimilarTo(const ICOORD &vertical, const TabVector &other, BlobGrid *grid) const;
327
328
  // Eat the other TabVector into this and delete it.
329
  void MergeWith(const ICOORD &vertical, TabVector *other);
330
331
  // Add a new element to the list of partner TabVectors.
332
  // Partners must be added in order of increasing y coordinate of the text line
333
  // that makes them partners.
334
  // Groups of identical partners are merged into one.
335
  void AddPartner(TabVector *partner);
336
337
  // Return true if other is a partner of this.
338
  bool IsAPartner(const TabVector *other);
339
340
  // Print basic information about this tab vector.
341
  void Print(const char *prefix);
342
343
  // Print basic information about this tab vector and every box in it.
344
  void Debug(const char *prefix);
345
346
  // Draw this tabvector in place in the given window.
347
  void Display(ScrollView *tab_win);
348
349
  // Refit the line and/or re-evaluate the vector if the dirty flags are set.
350
  void FitAndEvaluateIfNeeded(const ICOORD &vertical, TabFind *finder);
351
352
  // Evaluate the vector in terms of coverage of its length by good-looking
353
  // box edges. A good looking box is one where its nearest neighbour on the
354
  // inside is nearer than half the distance its nearest neighbour on the
355
  // outside of the putative column. Bad boxes are removed from the line.
356
  // A second pass then further filters boxes by requiring that the gutter
357
  // width be a minimum fraction of the mean gutter along the line.
358
  void Evaluate(const ICOORD &vertical, TabFind *finder);
359
360
  // (Re)Fit a line to the stored points. Returns false if the line
361
  // is degenerate. Although the TabVector code mostly doesn't care about the
362
  // direction of lines, XAtY would give silly results for a horizontal line.
363
  // The class is mostly aimed at use for vertical lines representing
364
  // horizontal tab stops.
365
  bool Fit(ICOORD vertical, bool force_parallel);
366
367
  // Return the partner of this TabVector if the vector qualifies as
368
  // being a vertical text line, otherwise nullptr.
369
  TabVector *VerticalTextlinePartner();
370
371
  // Return the matching tabvector if there is exactly one partner, or
372
  // nullptr otherwise.  This can be used after matching is done, eg. by
373
  // VerticalTextlinePartner(), without checking if the line is vertical.
374
  TabVector *GetSinglePartner();
375
376
private:
377
  // Constructor is private as the static factory is the external way
378
  // to build a TabVector.
379
  TabVector(int extended_ymin, int extended_ymax, TabAlignment alignment, BLOBNBOX_CLIST *boxes);
380
381
  // Delete this, but first, repoint all the partners to point to
382
  // replacement. If replacement is nullptr, then partner relationships
383
  // are removed.
384
  void Delete(TabVector *replacement);
385
386
private:
387
  // The bottom of the tab line.
388
  ICOORD startpt_;
389
  // The top of the tab line.
390
  ICOORD endpt_;
391
  // The lowest y that the vector might extend to.
392
  int extended_ymin_ = 0;
393
  // The highest y that the vector might extend to.
394
  int extended_ymax_ = 0;
395
  // Perpendicular distance of vector from a given vertical for sorting.
396
  int sort_key_ = 0;
397
  // Result of Evaluate 0-100. Coverage of line with good boxes.
398
  int percent_score_ = 0;
399
  // The mean width of the blobs. Meaningful only for separator lines.
400
  int mean_width_ = 0;
401
  // True if the boxes_ list has been modified, so a refit is needed.
402
  bool needs_refit_ = false;
403
  // True if a fit has been done, so re-evaluation is needed.
404
  bool needs_evaluation_ = false;
405
  // True if a separator line intersects at least 2 other lines.
406
  bool intersects_other_lines_ = false;
407
  // The type of this TabVector.
408
  TabAlignment alignment_ = TA_LEFT_ALIGNED;
409
  // The list of boxes whose edges are aligned at this TabVector.
410
  BLOBNBOX_CLIST boxes_;
411
  // List of TabVectors that have a connection with this via a text line.
412
  TabVector_CLIST partners_;
413
  // Constraints used to resolve the exact location of the top and bottom
414
  // of the tab line.
415
  TabConstraint_LIST *top_constraints_ = nullptr;
416
  TabConstraint_LIST *bottom_constraints_ = nullptr;
417
};
418
419
} // namespace tesseract.
420
421
#endif // TESSERACT_TEXTORD_TABVECTOR_H_